Add utf8_iseol() to test for end-of-line
Add a character classification function modelled after the Unicode Standard Annex 13 Unicode newline guidelines to test for end-of-line characters.
This commit is contained in:
parent
a46f5cc2ee
commit
355444649a
|
@ -0,0 +1,28 @@
|
|||
/* libutf8/src/docs/MainPage.dox
|
||||
*
|
||||
* (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv3. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \page char_class_eol Character classification: end of line
|
||||
|
||||
From <a href='http://unicode.org/reports/tr13/tr13-9.html'>Unicode Standard
|
||||
Annex #13 (Unicode newline guidelines)</a>:
|
||||
|
||||
<ul>
|
||||
<li>LF (000A)</li>
|
||||
<li>VT (000B)</li>
|
||||
<li>FF (000C)</li>
|
||||
<li>CR (000D)</li>
|
||||
<li>NEL (0085)</li>
|
||||
<li>LS (2028)</li>
|
||||
<li>PS (2029)</li>
|
||||
</ul>
|
||||
|
||||
*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4:syntax=doxygen
|
||||
*/
|
|
@ -34,6 +34,17 @@ utf8_isspace(wchar_t ch)
|
|||
|
||||
|
||||
|
||||
int
|
||||
utf8_iseol(wchar_t ch)
|
||||
{
|
||||
return (ch >= 0x000A && ch <= 0x000D)
|
||||
|| ch == 0x0085
|
||||
|| ch == 0x2028
|
||||
|| ch == 0x2029;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int
|
||||
utf8_isucs4(wchar_t ch)
|
||||
{
|
||||
|
|
|
@ -51,6 +51,21 @@ int utf8_isspace(wchar_t ch);
|
|||
|
||||
|
||||
|
||||
/*! \brief Test if character is end-of-line.
|
||||
|
||||
\param ch Character to test.
|
||||
\retval nonzero if \a ch is an EOL character.
|
||||
\retval 0 if \a ch is not an EOL character.
|
||||
|
||||
This function tests a UCS char to see if it should be classified as
|
||||
\ref char_class_eol "end-of-line". Note that both ASCII LR and CF are treated
|
||||
as EOL; it is up to the application to disambiguate the line ending in use.
|
||||
|
||||
*/
|
||||
int utf8_iseol(wchar_t ch);
|
||||
|
||||
|
||||
|
||||
/*! \brief Test if character is valid UCS-4 codepoint.
|
||||
|
||||
\param ch The character to classify.
|
||||
|
|
Loading…
Reference in New Issue