diff --git a/src/docs/char_class_whitespace.dox b/src/docs/char_class_whitespace.dox
new file mode 100644
index 0000000..78dbb64
--- /dev/null
+++ b/src/docs/char_class_whitespace.dox
@@ -0,0 +1,30 @@
+/* libutf8/src/docs/MainPage.dox
+ *
+ *  (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
+ *  Released under the GNU GPLv3. See file COPYING or
+ *  http://www.gnu.org/copyleft/gpl.html for details.
+*/
+
+/*! \page char_class_whitespace Character classification: whitespace
+
+From <a href='http://www.unicode.org/Public/UNIDATA/'>PropList-4.1.0.txt</a>:
+
+<pre>0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
+0020          ; White_Space # Zs       SPACE
+0085          ; White_Space # Cc       <control-0085>
+00A0          ; White_Space # Zs       NO-BREAK SPACE
+1680          ; White_Space # Zs       OGHAM SPACE MARK
+180E          ; White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
+2000..200A    ; White_Space # Zs  [11] EN QUAD..HAIR SPACE
+2028          ; White_Space # Zl       LINE SEPARATOR
+2029          ; White_Space # Zp       PARAGRAPH SEPARATOR
+202F          ; White_Space # Zs       NARROW NO-BREAK SPACE
+205F          ; White_Space # Zs       MEDIUM MATHEMATICAL SPACE
+3000          ; White_Space # Zs       IDEOGRAPHIC SPACE</pre>
+
+*/
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+vim: expandtab:ts=4:sw=4:syntax=doxygen
+*/
diff --git a/src/libutf8/100_ctype.c b/src/libutf8/100_ctype.c
index 7b15428..d2b19bf 100644
--- a/src/libutf8/100_ctype.c
+++ b/src/libutf8/100_ctype.c
@@ -5,6 +5,8 @@
  *  http://www.gnu.org/copyleft/gpl.html for details.
 */
 
+
+
 int utf8_isascii(wchar_t ch)
 {
     return !(ch & ~0x7F);
@@ -12,22 +14,6 @@ int utf8_isascii(wchar_t ch)
 
 
 
-/* From PropList-4.1.0.txt (http://www.unicode.org/Public/UNIDATA/)
-
-0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
-0020          ; White_Space # Zs       SPACE
-0085          ; White_Space # Cc       <control-0085>
-00A0          ; White_Space # Zs       NO-BREAK SPACE
-1680          ; White_Space # Zs       OGHAM SPACE MARK
-180E          ; White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
-2000..200A    ; White_Space # Zs  [11] EN QUAD..HAIR SPACE
-2028          ; White_Space # Zl       LINE SEPARATOR
-2029          ; White_Space # Zp       PARAGRAPH SEPARATOR
-202F          ; White_Space # Zs       NARROW NO-BREAK SPACE
-205F          ; White_Space # Zs       MEDIUM MATHEMATICAL SPACE
-3000          ; White_Space # Zs       IDEOGRAPHIC SPACE
-*/
-
 int utf8_isspace(wchar_t ch)
 {
     return((ch >= 0x0009 && ch <= 0x000D)
@@ -46,11 +32,32 @@ int utf8_isspace(wchar_t ch)
 
 
 
-int utf8_isvalid(wchar_t ch)
+int utf8_isucs4(wchar_t ch)
 {
-    return !(ch & (~((wchar_t)0x7FFFFFFF))) && (ch < 0xD800 || ch > 0xDFFF) && (ch != 0xFFFE) && (ch != 0xFFFF);
+    return !(ch & (~((wchar_t)0x7FFFFFFF)))
+        && (ch < 0xD800 || ch > 0xDFFF)
+        && (ch != 0xFFFE) && (ch != 0xFFFF);
 }
 
+
+
+int utf8_isutf32(wchar_t ch)
+{
+    return ch >= 0 && ch <= 0x10FFFF
+        && (ch < 0xD800 || ch > 0xDFFF)
+        && (ch != 0xFFFE) && (ch != 0xFFFF);
+}
+
+
+
+int utf8_isutf16(wchar_t ch)
+{
+    return ch >= 0 && ch <= 0xFFFD
+        && (ch < 0xD800 || ch > 0xDFFF);
+}
+
+
+
 /* options for text editors
 kate: replace-trailing-space-save true; space-indent true; tab-width 4;
 vim: expandtab:ts=4:sw=4:syntax=c.doxygen
diff --git a/src/libutf8/100_ctype.h b/src/libutf8/100_ctype.h
index f72d233..61c3624 100644
--- a/src/libutf8/100_ctype.h
+++ b/src/libutf8/100_ctype.h
@@ -5,43 +5,103 @@
  *  http://www.gnu.org/copyleft/gpl.html for details.
 */
 
+
+
 /*! \defgroup ctype Character classification
 
-This module contains functions for character classification. These are basically an extension of the
-\c is* functions defined in \c &lt;ctype.h&gt;.
+This module contains functions for character classification. These are
+semantically equivalent to the \c is* functions defined in \c &lt;ctype.h&gt;,
+except that they work on \c wchar_t UCS chars and are independent of the
+system's current locale setting.
 
-\todo There are many char classification functions that haven't been implemented yet. These won't be
-    implemented until they can be done in a proper, Unicode-safe fashion.
+\todo There are many char classification functions that haven't been
+      implemented yet. These will be added on demand.
 
- */
+*/
 /*!@{*/
 
 
 
-/*! \biref Returns \c true if \a ch can be represented in ASCII. */
+/*! \brief Test if character is ASCII.
+
+\param ch Character to test.
+\retval nonzero if \a ch is ASCII.
+\retval 0 if \a ch is not ASCII.
+
+This function tests a UCS char to see if it lies within the range of characters
+that can be represented by ASCII (i.e. that the value of \a ch lies between 0
+and 127, inclusive).
+
+*/
 int utf8_isascii(wchar_t ch);
 
-/*! \brief Returns \c true if \a ch is whitespace. */
+
+
+/*! \brief Test if character is whitespace.
+
+\param ch Character to test.
+\retval nonzero if \a ch is whitespace.
+\retval 0 if \a ch is not whitespace.
+
+This function tests a UCS char to see if it should be classified as 
+\ref char_class_whitespace "whitespace".
+
+*/
 int utf8_isspace(wchar_t ch);
 
-/*! \brief Returns \c true if \a ch is a valid UCS-4 character.
+
+
+/*! \brief Test if character is valid UCS-4 codepoint.
 
 \param ch The character to classify.
-\retval true If \a ch is a valid UCS-4 character.
-\retval false If \a ch is not a valid UCS-4 character.
+\retval nonzero If \a ch is a valid UCS-4 character.
+\retval 0 If \a ch is not a valid UCS-4 character.
 
-This function will examine a \c wchar_t value and determine whether or not it is a valid UCS-4
-character. Valid characters lie in the range 0&ndash;0x7FFFFFFF but exclude:
+This function will examine a \c wchar_t value and determine whether or not it
+is a valid UCS-4 character. Valid characters lie in the range
+0&ndash;0x7FFFFFFF but exclude:
 \li the UTF-16 surrogate code points (U+D800&ndash;U+DFFF, inclusive)
 \li the invalid code points U+FFFE and U+FFFF
 
 */
-int utf8_isvalid(wchar_t ch);
+int utf8_isucs4(wchar_t ch);
+
+
+
+/*! \brief Test if character is valid UTF-32 (Unicode) codepoint.
+
+\param ch The character to classify.
+\retval nonzero If \a ch is a valid Unicode character.
+\retval 0 If \a ch is not a valid Unicode character.
+
+This function will examine a \c wchar_t value and determine whether or not it
+is a valid Unicode character. Valid characters lie in the range
+0&ndash;0x10FFFF but exclude:
+\li the UTF-16 surrogate code points (U+D800&ndash;U+DFFF, inclusive)
+\li the invalid code points U+FFFE and U+FFFF
+
+*/
+int utf8_isutf32(wchar_t ch);
+
+
+
+/*! \brief Test if character is valid UTF-16 (Unicode) codepoint.
+
+\param ch The character to classify.
+\retval nonzero If \a ch is a valid Unicode character.
+\retval 0 If \a ch is not a valid Unicode character.
+
+This function will examine a \c wchar_t value and determine whether or not it
+is a valid Unicode character that can be represented by a single UTF-16
+codepoint. Valid characters lie in the range 0&ndash;0xFFFD but exclude:
+\li the UTF-16 surrogate code points (U+D800&ndash;U+DFFF, inclusive)
+
+*/
+int utf8_isutf16(wchar_t ch);
 
 
 
 /*!@}*/
-
 /* options for text editors
 kate: replace-trailing-space-save true; space-indent true; tab-width 4;
 vim: expandtab:ts=4:sw=4:syntax=c.doxygen
diff --git a/src/libutf8/300_encode.c b/src/libutf8/300_encode.c
index ed0097b..811ab30 100644
--- a/src/libutf8/300_encode.c
+++ b/src/libutf8/300_encode.c
@@ -11,7 +11,7 @@ char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
         errno = EINVAL;
         return 0;
     }
-    if(!utf8_isvalid(ch)) {
+    if(!utf8_isucs4(ch)) {
         errno = EILSEQ;
         return 0;
     }
@@ -78,12 +78,12 @@ char* utf8_encode_char(char* dest, size_t amt, wchar_t ch)
 
 char* utf8_encode_char_force(char* dest, size_t amt, wchar_t ch, wchar_t ilseq)
 {
-    if(!utf8_isvalid(ilseq)) {
+    if(!utf8_isucs4(ilseq)) {
         errno = EILSEQ;
         return 0;
     }
 
-    return utf8_encode_char(dest, amt, utf8_isvalid(ch) ? ch : ilseq);
+    return utf8_encode_char(dest, amt, utf8_isucs4(ch) ? ch : ilseq);
 }
 
 
diff --git a/src/libutf8/400_decode_state.c b/src/libutf8/400_decode_state.c
index 15d331a..4aab0cc 100644
--- a/src/libutf8/400_decode_state.c
+++ b/src/libutf8/400_decode_state.c
@@ -108,7 +108,7 @@ loop:
                     goto error;
                 } else {
                     // validate codepoint
-                    if(!utf8_isvalid(ctx->statech)) {
+                    if(!utf8_isucs4(ctx->statech)) {
                         error_type = utf8_decode_error_illegal_cp;
                         goto error;
                     }