From 355444649a3f1d463ca4fea0cb2198a69ee74aa9 Mon Sep 17 00:00:00 2001
From: Laurence Withers <lwithers@amethyst.(none)>
Date: Tue, 13 Oct 2009 11:51:49 +0000
Subject: [PATCH] Add utf8_iseol() to test for end-of-line

Add a character classification function modelled after the Unicode Standard
Annex 13 Unicode newline guidelines to test for end-of-line characters.
---
 src/docs/char_class_eol.dox | 28 ++++++++++++++++++++++++++++
 src/libutf8/100_ctype.c     | 11 +++++++++++
 src/libutf8/100_ctype.h     | 15 +++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 src/docs/char_class_eol.dox
diff --git a/src/docs/char_class_eol.dox b/src/docs/char_class_eol.dox
new file mode 100644
index 0000000..83ba8ce
--- /dev/null
+++ b/src/docs/char_class_eol.dox
@@ -0,0 +1,28 @@
+/* libutf8/src/docs/MainPage.dox
+ *
+ *  (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
+ *  Released under the GNU GPLv3. See file COPYING or
+ *  http://www.gnu.org/copyleft/gpl.html for details.
+*/
+
+/*! \page char_class_eol Character classification: end of line
+
+From <a href='http://unicode.org/reports/tr13/tr13-9.html'>Unicode Standard
+Annex #13 (Unicode newline guidelines)</a>:
+
+<ul>
+ <li>LF (000A)</li>
+ <li>VT (000B)</li>
+ <li>FF (000C)</li>
+ <li>CR (000D)</li>
+ <li>NEL (0085)</li>
+ <li>LS (2028)</li>
+ <li>PS (2029)</li>
+</ul>
+
+*/
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+vim: expandtab:ts=4:sw=4:syntax=doxygen
+*/
diff --git a/src/libutf8/100_ctype.c b/src/libutf8/100_ctype.c
index 55eba0b..6b4a2f2 100644
--- a/src/libutf8/100_ctype.c
+++ b/src/libutf8/100_ctype.c
@@ -34,6 +34,17 @@ utf8_isspace(wchar_t ch)
 
 
 
+int
+utf8_iseol(wchar_t ch)
+{
+    return (ch >= 0x000A && ch <= 0x000D)
+        || ch == 0x0085
+        || ch == 0x2028
+        || ch == 0x2029;
+}
+
+
+
 int
 utf8_isucs4(wchar_t ch)
 {
diff --git a/src/libutf8/100_ctype.h b/src/libutf8/100_ctype.h
index 61c3624..c0194f6 100644
--- a/src/libutf8/100_ctype.h
+++ b/src/libutf8/100_ctype.h
@@ -51,6 +51,21 @@ int utf8_isspace(wchar_t ch);
 
 
 
+/*! \brief Test if character is end-of-line.
+
+\param ch Character to test.
+\retval nonzero if \a ch is an EOL character.
+\retval 0 if \a ch is not an EOL character.
+
+This function tests a UCS char to see if it should be classified as 
+\ref char_class_eol "end-of-line". Note that both ASCII LR and CF are treated
+as EOL; it is up to the application to disambiguate the line ending in use.
+
+*/
+int utf8_iseol(wchar_t ch);
+
+
+
 /*! \brief Test if character is valid UCS-4 codepoint.
 
 \param ch The character to classify.