Add utf8_iseol() to test for end-of-line
Add a character classification function modelled after the Unicode Standard Annex 13 Unicode newline guidelines to test for end-of-line characters.
This commit is contained in:
parent
a46f5cc2ee
commit
355444649a
|
@ -0,0 +1,28 @@
|
||||||
|
/* libutf8/src/docs/MainPage.dox
|
||||||
|
*
|
||||||
|
* (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
|
||||||
|
* Released under the GNU GPLv3. See file COPYING or
|
||||||
|
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \page char_class_eol Character classification: end of line
|
||||||
|
|
||||||
|
From <a href='http://unicode.org/reports/tr13/tr13-9.html'>Unicode Standard
|
||||||
|
Annex #13 (Unicode newline guidelines)</a>:
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>LF (000A)</li>
|
||||||
|
<li>VT (000B)</li>
|
||||||
|
<li>FF (000C)</li>
|
||||||
|
<li>CR (000D)</li>
|
||||||
|
<li>NEL (0085)</li>
|
||||||
|
<li>LS (2028)</li>
|
||||||
|
<li>PS (2029)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* options for text editors
|
||||||
|
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||||
|
vim: expandtab:ts=4:sw=4:syntax=doxygen
|
||||||
|
*/
|
|
@ -34,6 +34,17 @@ utf8_isspace(wchar_t ch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
utf8_iseol(wchar_t ch)
|
||||||
|
{
|
||||||
|
return (ch >= 0x000A && ch <= 0x000D)
|
||||||
|
|| ch == 0x0085
|
||||||
|
|| ch == 0x2028
|
||||||
|
|| ch == 0x2029;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
utf8_isucs4(wchar_t ch)
|
utf8_isucs4(wchar_t ch)
|
||||||
{
|
{
|
||||||
|
|
|
@ -51,6 +51,21 @@ int utf8_isspace(wchar_t ch);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*! \brief Test if character is end-of-line.
|
||||||
|
|
||||||
|
\param ch Character to test.
|
||||||
|
\retval nonzero if \a ch is an EOL character.
|
||||||
|
\retval 0 if \a ch is not an EOL character.
|
||||||
|
|
||||||
|
This function tests a UCS char to see if it should be classified as
|
||||||
|
\ref char_class_eol "end-of-line". Note that both ASCII LR and CF are treated
|
||||||
|
as EOL; it is up to the application to disambiguate the line ending in use.
|
||||||
|
|
||||||
|
*/
|
||||||
|
int utf8_iseol(wchar_t ch);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*! \brief Test if character is valid UCS-4 codepoint.
|
/*! \brief Test if character is valid UCS-4 codepoint.
|
||||||
|
|
||||||
\param ch The character to classify.
|
\param ch The character to classify.
|
||||||
|
|
Loading…
Reference in New Issue