Copy from svn repository.

2006-07-31 15:40:56 +01:00 · 2006-07-31 15:40:56 +01:00 · ac22dabfe6
parent 73d6e6fbd0
commit ac22dabfe6
34 changed files with 1557 additions and 1 deletions
--- a/11
+++ b/11
@ -10,5 +10,14 @@ Really Quick Instructions
 To build: ./make.sh
 To install: ./make.sh install
    (you might want to set PREFIX, by default it's /usr/local)
+Documentation is automatically built using doxygen.

-@TODO@
+Dependencies
+------------
+
+libutf8, http://www.lwithers.me.uk/projects/libutf8/
+
+Project Homepage
+----------------
+
+http://www.lwithers.me.uk/projects/libutf8++/
--- a/src/docs/.params
+++ b/src/docs/.params
@ -0,0 +1 @@
+doxygen docs docs
--- a/src/docs/Doxyfile.in
+++ b/src/docs/Doxyfile.in
@ -0,0 +1,146 @@
+# libutf8++/src/docs/Doxyfile.in
+#
+#  (c)2006, Laurence Withers, <l@lwithers.me.uk>.
+#  Released under the GNU GPLv2. See file COPYING or
+#  http://www.gnu.org/copyleft/gpl.html for details.
+#
+
+PROJECT_NAME           = libutf8++
+OUTPUT_DIRECTORY       =
+CREATE_SUBDIRS         = NO
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ABBREVIATE_BRIEF       =
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = YES
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        =
+STRIP_FROM_INC_PATH    =
+SHORT_NAMES            = NO
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = YES
+DETAILS_AT_TOP         = YES
+INHERIT_DOCS           = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 4
+ALIASES                =
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SUBGROUPING            = YES
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = NO
+EXTRACT_LOCAL_METHODS  = NO
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = YES
+HIDE_IN_BODY_DOCS      = NO
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = NO
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+SORT_BRIEF_DOCS        = NO
+SORT_BY_SCOPE_NAME     = NO
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ENABLED_SECTIONS       =
+MAX_INITIALIZER_LINES  = 30
+SHOW_USED_FILES        = NO
+SHOW_DIRECTORIES       = NO
+FILE_VERSION_FILTER    =
+QUIET                  = YES
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_NO_PARAMDOC       = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           =
+FILE_PATTERNS          =
+RECURSIVE              = NO
+EXCLUDE                =
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       =
+EXAMPLE_PATH           =
+EXAMPLE_PATTERNS       =
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = src/docs
+INPUT_FILTER           =
+FILTER_PATTERNS        =
+FILTER_SOURCE_FILES    = NO
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = NO
+ALPHABETICAL_INDEX     = YES
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          =
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            =
+HTML_FOOTER            =
+HTML_STYLESHEET        =
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               =
+HHC_LOCATION           =
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+GENERATE_LATEX         = NO
+GENERATE_RTF           = NO
+GENERATE_MAN           = NO
+GENERATE_XML           = NO
+GENERATE_AUTOGEN_DEF   = NO
+GENERATE_PERLMOD       = NO
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           =
+INCLUDE_FILE_PATTERNS  =
+PREDEFINED             = DOXYGEN
+EXPAND_AS_DEFINED      =
+SKIP_FUNCTION_MACROS   = YES
+TAGFILES               =
+GENERATE_TAGFILE       =
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = YES
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+GROUP_GRAPHS           = NO
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = NO
+INCLUDED_BY_GRAPH      = NO
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DIRECTORY_GRAPH        = NO
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               =
+DOTFILE_DIRS           =
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 0
+DOT_TRANSPARENT        = YES
+DOT_MULTI_TARGETS      = YES
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+SEARCHENGINE           = NO
--- a/src/docs/MainPage.dox
+++ b/src/docs/MainPage.dox
@ -0,0 +1,15 @@
+/* libutf8++/src/docs/MainPage.dox
+ *
+ *  (c)2006, Laurence Withers, <l@lwithers.me.uk>.
+ *  Released under the GNU GPLv2. See file COPYING or
+ *  http://www.gnu.org/copyleft/gpl.html for details.
+*/
+
+/*! \mainpage
+
+*/
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+vim: expandtab:ts=4:sw=4
+*/
--- a/src/docs/build.default
+++ b/src/docs/build.default
@ -0,0 +1 @@
+source src/docs/build.docs
--- a/src/docs/build.docs
+++ b/src/docs/build.docs
@ -0,0 +1,43 @@
+# These are external variables, and shouldn't clash with anything else
+#  docs_BUILT
+#
+
+MONOLITHIC_DOC="${MONOLITHIC_DOC} $(echo src/docs/*.dox)"
+build_target monolithic
+
+if [ -z ${docs_BUILT} ]
+then
+    echo "Building documentation with Doxygen..."
+
+    DOXYFILE=obj/Doxyfile.docs
+
+    if [ ! -e ${DOXYFILE} ]
+    then
+        do_cmd cp src/docs/Doxyfile.in ${DOXYFILE} || return 1
+        echo "INPUT = ${MONOLITHIC_DOC}" >> ${DOXYFILE}
+        echo "PROJECT_NUMBER = ${VERSION}" >> ${DOXYFILE}
+    fi
+
+    MODIFIED=0
+    for file in ${MONOLITHIC_DOC}
+    do
+        if [ ${file} -nt html/index.html ]
+        then
+            MODIFIED=1
+            break
+        fi
+    done
+
+    if [ ${MODIFIED} -ne 0 ]
+    then
+        do_cmd doxygen ${DOXYFILE} || return 1
+        print_success "Documentation built"
+    else
+        print_success "Documentation is up to date"
+    fi
+
+    docs_BUILT=1
+fi
+
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/docs/build.install
+++ b/src/docs/build.install
@ -0,0 +1 @@
+source src/docs/build.install-docs
--- a/src/docs/build.install-docs
+++ b/src/docs/build.install-docs
@ -0,0 +1,21 @@
+build_target docs
+
+# create documentation directories
+echo "Installing documentation into ${DOCSDIR}"
+build_dir_tree "${DOCSDIR}/html" || return 1
+
+# copy across the Doxygen-generated documentation
+for file in html/*
+do
+    install_file ${file} ${DOCSDIR}/html 0644 || return 1
+done
+
+# copy across the generic files
+for file in COPYING README
+do
+    install_file ${file} ${DOCSDIR} 0644 || return 1
+done
+
+print_success "Documentation installed"
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/libutf8++/.params
+++ b/src/libutf8++/.params
@ -0,0 +1 @@
+c++ lib libutf8++ utf8
--- a/src/libutf8++/BottomHeader.h
+++ b/src/libutf8++/BottomHeader.h
@ -0,0 +1,9 @@
+/* libutf8++/src/lib/BottomHeader.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+}
+
+#endif
--- a/src/libutf8++/ForwardDeclare.h
+++ b/src/libutf8++/ForwardDeclare.h
@ -0,0 +1,9 @@
+/* libutf8++/src/lib/ForwardDeclare.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+// This file simply contains forward declarations of all libutf8++
+// classes, to facilitate header ordering, etc.
+
--- a/src/libutf8++/TopHeader.h
+++ b/src/libutf8++/TopHeader.h
@ -0,0 +1,23 @@
+/* libutf8++/src/lib/TopHeader.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+#ifndef HEADER_libutf8pp
+#define HEADER_libutf8pp
+
+// standard includes, or includes needed for type declarations
+
+#include <string>
+#include <stdexcept>
+#include <utf8.h>
+
+
+
+/*! \brief UTF-8 handling routines.
+
+The library's UTF-8 handling routines are all made available through this namespace.
+
+*/
+namespace utf8 {
--- a/src/libutf8++/TopSource.cpp
+++ b/src/libutf8++/TopSource.cpp
@ -0,0 +1,12 @@
+/* libutf8++/src/lib/TopSource.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+#include "utf8"
+
+// Below are all the includes used throughout the library.
+
+#include <sstream>
+#include <iomanip>
--- a/src/libutf8++/build.default
+++ b/src/libutf8++/build.default
@ -0,0 +1 @@
+source src/libutf8++/build.lib
--- a/src/libutf8++/build.install
+++ b/src/libutf8++/build.install
@ -0,0 +1 @@
+source src/libutf8++/build.install-lib
--- a/src/libutf8++/build.install-lib
+++ b/src/libutf8++/build.install-lib
@ -0,0 +1,36 @@
+build_target libutf8++
+
+# make paths (this is for Gentoo in particular)
+build_dir_tree "${LIBDIR}" || return 1
+build_dir_tree "${PKGCONFDIR}" || return 1
+build_dir_tree "${INCLUDEDIR}" || return 1
+
+# install library
+echo "Installing libraries into '${LIBDIR}'"
+install_file ${libutf8pp} ${LIBDIR} 0755 || return 1
+BASE="${libutf8pp_BASE}.so"
+MAJOR="${BASE}.${SOMAJOR}"
+MINOR="${MAJOR}.${SOMINOR}"
+MICRO="${MINOR}.${SOMICRO}"
+install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}"
+install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}"
+install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}"
+
+# install header
+echo "Installing header file '${libutf8pp_HEADER}' into ${INCLUDEDIR}"
+install_header ${libutf8pp_HEADER} ${INCLUDEDIR} 0644 || return 1
+
+# install pkgconfig file
+echo "Installing package config file into ${PKGCONFDIR}"
+PKGCONFFILE=${PKGCONFDIR}/libutf8pp.pc
+do_cmd rm -f ${PKGCONFFILE}
+do_cmd_redir ${PKGCONFFILE} sed \
+    -e "s,@VERSION@,${VERSION}," \
+    -e "s,@LIBDIR@,${FINALLIBDIR}," \
+    -e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \
+    src/libutf8++/pkgconf.in
+do_cmd chmod 0644 ${PKGCONFFILE}
+print_success "Done"
+
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/libutf8++/build.lib
+++ b/src/libutf8++/build.lib
@ -0,0 +1,51 @@
+# These are external variables, and shouldn't clash with anything else
+#  libutf8pp
+#  libutf8pp_BUILT
+#  libutf8pp_HEADER
+#  libutf8pp_BASE
+
+if [ -z ${libutf8pp_BUILT} ]
+then
+    libutf8pp_BASE=libutf8++
+    source src/libutf8++/soversion
+
+    libutf8pp="obj/${libutf8pp_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}"
+    SO_EXTRA="$(pkg-config libutf8 --libs --cflags) -lstdc++ -lc"
+
+    echo "Building library ${libutf8pp}..."
+
+    do_cmd source src/libutf8++/build.monolithic || return 1
+
+    MODIFIED=0
+    for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC}
+    do
+        if [ ${test} -nt ${libutf8pp} ]
+        then
+            MODIFIED=1
+            break
+        fi
+    done
+
+    if [ ${MODIFIED} -ne 0 ]
+    then
+        echo " Compiling"
+
+        SONAME="${libutf8pp_BASE}.so.${SOMAJOR}.${SOMINOR}"
+        do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libutf8pp}" \
+            -Wl,-soname,${SONAME} \
+            ${SRC} ${SO_EXTRA} || return 1
+
+        # make tests work
+        do_cmd ln -sf $(basename ${libutf8pp}) obj/${SONAME} || return 1
+
+        print_success "Library built"
+    else
+        print_success "Library up to date"
+    fi
+
+    libutf8pp_BUILT=1
+    libutf8pp_HEADER=${HDR}
+
+fi
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/libutf8++/build.monolithic
+++ b/src/libutf8++/build.monolithic
@ -0,0 +1,21 @@
+# These are external variables, and shouldn't clash with anything else
+#  libutf8pp_MONOLITHIC
+
+SRC="obj/libutf8++.cpp"
+HDR="obj/utf8"
+
+MONOLITHIC_TESTS="src/libutf8++/build.lib src/libutf8++/build.monolithic"
+
+if [ -z "${libutf8pp_MONOLITHIC}" ]
+then
+    MONOLITHIC_SOURCE="$(echo src/libutf8++/{TopHeader,ForwardDeclare,exception,string,{en,de}coder,BottomHeader}.h)"
+    make_monolithic ${HDR} C || return 1
+
+    MONOLITHIC_SOURCE="$(echo src/libutf8++/{TopSource,exception,string,{en,de}coder}.cpp)"
+    make_monolithic ${SRC} C || return 1
+
+    libutf8pp_MONOLITHIC=1
+    MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}"
+fi
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/libutf8++/decoder.cpp
+++ b/src/libutf8++/decoder.cpp
@ -0,0 +1,159 @@
+/* libutf8++/src/lib/decoder.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+namespace utf8 {
+
+
+
+Decoder::Decoder(size_t hint)
+{
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.wr_size = (hint < 2) ? 2 : hint;
+    ctx.wr = new wchar_t[ctx.wr_size];
+    ctx.error_callback = _exceptionOnError;
+    ctx.data = this;
+}
+
+
+
+Decoder::~Decoder()
+{
+    delete [] ctx.wr;
+}
+
+
+
+void Decoder::decode(const std::string& str)
+{
+    decode(str.data(), str.size());
+}
+
+
+
+void Decoder::decode(const char* str, ssize_t amt)
+{
+    ctx.rd = str;
+    ctx.rd_remain = amt;
+    while(ctx.rd_remain) {
+        utf8_decoder(&ctx);
+        decoded.append(ctx.wr, ctx.written);
+
+        if(ctx.rd_remain < 0 && !*(ctx.rd)) break;
+        if(ctx.rd_remain) {
+            ctx.wr_size *= 2;
+            delete [] ctx.wr;
+            ctx.wr = new wchar_t[ctx.wr_size];
+        }
+    }
+}
+
+
+
+bool Decoder::complete() const
+{
+    return ctx.complete;
+}
+
+
+
+void Decoder::reset()
+{
+    size_t old_wr_size = ctx.wr_size;
+    wchar_t* old_wr = ctx.wr;
+    utf8_decode_error_callback old_error_callback = ctx.error_callback;
+
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.wr_size = old_wr_size;
+    ctx.wr = old_wr;
+    ctx.error_callback = old_error_callback;
+    ctx.data = this;
+    decoded.clear();
+}
+
+
+
+void Decoder::skipOnError()
+{
+    ctx.error_callback = _skipOnError;
+}
+
+
+
+void Decoder::replaceOnError(wchar_t ch)
+{
+    replaceChar = ch;
+    ctx.error_callback = _replaceOnError;
+}
+
+
+
+void Decoder::exceptionOnError()
+{
+    ctx.error_callback = _exceptionOnError;
+}
+
+
+
+enum utf8_decode_error_action Decoder::_skipOnError
+    (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
+{
+    (void)ctx;
+    (void)error;
+    (void)newch;
+    return utf8_decode_error_action_skip;
+}
+
+
+
+enum utf8_decode_error_action Decoder::_replaceOnError
+    (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
+{
+    (void)error;
+    Decoder* self = (utf8::Decoder*)(ctx->data);
+    *newch = self->replaceChar;
+    return utf8_decode_error_action_replace;
+}
+
+
+
+enum utf8_decode_error_action Decoder::_exceptionOnError
+    (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch)
+{
+    (void)newch;
+    const char* desc = "unknown";
+
+    switch(error) {
+    case utf8_decode_error_lone_cchar:
+        desc = "An invalid continuation byte was encountered while expecting a character.";
+        break;
+
+    case utf8_decode_error_not_cchar:
+        desc = "A multi-byte sequence contained an invalid byte.";
+        break;
+
+    case utf8_decode_error_not_schar:
+        desc = "An invalid byte was encountered while expecting a character.";
+        break;
+
+    case utf8_decode_error_overlong:
+        desc = "An overlong encoding of a character was encountered.";
+        break;
+
+    case utf8_decode_error_illegal_cp:
+        desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered.";
+        break;
+    }
+
+    throw BadUTF8Sequence(desc, ctx);
+}
+
+
+
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/decoder.h
+++ b/src/libutf8++/decoder.h
@ -0,0 +1,128 @@
+/* libutf8++/src/lib/decoder.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+/*! \brief Stateful UTF-8 decoder object.
+
+This object is used for stateful decoding of a UTF-8 byte stream. It can be fed the data in
+arbitrary chunks, even split on non-character boundaries. It writes its output into a wide character
+string.
+
+A variety of error handling modes are available. The default is to throw a BadUTF8Sequence
+exception, but you can change this with skipOnError() or replaceOnError().
+
+*/
+class Decoder {
+public:
+    /*! \brief Constructor.
+
+    \param hint Hint at number of characters to allocate space for in decoder buffer.
+
+    The constructor sets up the UTF-8 decoder. You can provide a hint as to the size of your input
+    stream chunks. This hint is the number of characters to allocate in the output buffer. If,
+    during a single decode operation, this buffer is filled, then it is doubled in size.
+
+     */
+    Decoder(size_t hint = 25);
+
+
+
+    /// Destructor.
+    ~Decoder();
+
+
+
+    /// Result of decoding operations (appended to).
+    std::wstring decoded;
+
+
+
+    /*! \brief UTF-8 decoder.
+
+    \param str Pointer to source data.
+    \param amt Number of bytes in source data (-1 for null terminated strings).
+    \throws BadUTF8Sequence.
+
+    This function will decode a chunk of UTF-8 data. The decoded data will be appended to whatever
+    is contained in the string decoded. You can check if the decoder ended on a character boundary
+    or not by calling complete().
+
+    */
+    void decode(const char* str, ssize_t amt);
+
+
+
+    /// Decode data stored in a std::string.
+    void decode(const std::string& str);
+
+
+
+    /// Returns \a true if the last call to \a decode() ended on a character boundary.
+    bool complete() const;
+
+
+
+    /*! \brief Resets the parser for a new UTF-8 stream.
+
+    This function will clear the internal state of the decoder so that it is ready for data from a
+    new source. This can be used if you have opened a new file, accepted a new connection, recovered
+    from an error, etc. It will also clear \a decoded.
+
+    */
+    void reset();
+
+
+
+    /*! \brief Set error handling to \e skip mode.
+
+    This function will set the error handling into \e skip mode. In this mode, any invalid UTF-8
+    byte sequences will simply be skipped altogether, and will not have any effect on the output in
+    \a decoded.
+
+    */
+    void skipOnError();
+
+
+
+    /*! \brief Set error handling to \e replace mode.
+
+    \param ch The replacement character that will appear in the output.
+
+    This function will set the error handling into \e replace mode. In this mode, any invalid UTF-8
+    byte sequences will be skipped, and a replacement character \a ch will be placed onto the output
+    in \a decoded. The default parameter is the unicode replacement character, which should look
+    like an upside-down question mark.
+
+    */
+    void replaceOnError(wchar_t ch = 0xFFFD);
+
+
+
+    /*! \brief Set error handling to \e exception mode (default).
+
+    This function will set the error handling to \e exception mode. In this mode, any invalid
+    UTF-8 byte sequences will cause a BadUTF8Sequence exception to be thrown. This is the default
+    mode.
+
+    */
+    void exceptionOnError();
+
+
+
+private:
+    struct utf8_decode_state ctx;
+    wchar_t replaceChar;
+
+    static enum utf8_decode_error_action _skipOnError
+        (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch);
+    static enum utf8_decode_error_action _replaceOnError
+        (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch);
+    static enum utf8_decode_error_action _exceptionOnError
+        (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch);
+};
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/encoder.cpp
+++ b/src/libutf8++/encoder.cpp
@ -0,0 +1,124 @@
+/* libutf8++/src/lib/encoder.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+namespace utf8 {
+
+
+
+Encoder::Encoder(size_t hint)
+{
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.wr_size = (hint < 7) ? 7 : hint;
+    ctx.wr = new char[ctx.wr_size];
+    ctx.error_callback = _exceptionOnError;
+    ctx.data = this;
+}
+
+
+
+Encoder::~Encoder()
+{
+    delete [] ctx.wr;
+}
+
+
+
+void Encoder::reset()
+{
+    char* wr = ctx.wr;
+    size_t wr_size = ctx.wr_size;
+    utf8_encode_error_callback cb = ctx.error_callback;
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.wr = wr;
+    ctx.wr_size = wr_size;
+    ctx.error_callback = cb;
+    ctx.data = this;
+}
+
+
+
+void Encoder::encode(const std::wstring& str)
+{
+    encode(str.data(), str.size());
+}
+
+
+
+void Encoder::encode(const wchar_t* str, ssize_t amt)
+{
+    ctx.rd = str;
+    ctx.rd_remain = amt;
+    while(ctx.rd_remain) {
+        if(!utf8_encoder(&ctx)) throw BadUnicodeChar(&ctx);
+        encoded.append(ctx.wr, ctx.written);
+
+        if(ctx.rd_remain < 0 && !*(ctx.rd)) break;
+        if(ctx.rd_remain) {
+            ctx.wr_size *= 2;
+            delete [] ctx.wr;
+            ctx.wr = new char[ctx.wr_size];
+        }
+    }
+}
+
+
+
+void Encoder::skipOnError()
+{
+    ctx.error_callback = _skipOnError;
+}
+
+
+
+void Encoder::replaceOnError(wchar_t ch)
+{
+    replaceChar = ch;
+    ctx.error_callback = _replaceOnError;
+}
+
+
+
+void Encoder::exceptionOnError()
+{
+    ctx.error_callback = _exceptionOnError;
+}
+
+
+
+enum utf8_encode_error_action Encoder::_skipOnError
+    (const struct utf8_encode_state *ctx, wchar_t *newch)
+{
+    (void)ctx;
+    (void)newch;
+    return utf8_encode_error_action_skip;
+}
+
+
+
+enum utf8_encode_error_action Encoder::_replaceOnError
+    (const struct utf8_encode_state *ctx, wchar_t *newch)
+{
+    Encoder* self = (utf8::Encoder*)(ctx->data);
+    *newch = self->replaceChar;
+    return utf8_encode_error_action_replace;
+}
+
+
+
+enum utf8_encode_error_action Encoder::_exceptionOnError
+    (const struct utf8_encode_state *ctx, wchar_t *newch)
+{
+    (void)newch;
+    throw BadUnicodeChar(ctx);
+}
+
+
+
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/encoder.h
+++ b/src/libutf8++/encoder.h
@ -0,0 +1,108 @@
+/* libutf8++/src/lib/encoder.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+/*! \brief UTF-8 encoder object.
+
+This object is used to encode Unicode wide characters into UTF-8. It can be fed chunks of characters
+which it then encodes, appending the result to an internal buffer.
+
+*/
+class Encoder {
+public:
+    /*! \brief Constructor.
+
+    \param hint Number of bytes to allocate for the encoding buffer.
+
+    The constructor sets up the encoder and allocates some space for an internal buffer. You can
+    hint at how large you expect the chunks to be encoded will be. If an encoding operation fills
+    the buffer without consuming all the input data, the buffer will be doubled in size for the
+    next round.
+
+    */
+    Encoder(size_t hint = 100);
+
+    /// Destructor.
+    virtual ~Encoder();
+
+
+
+    /// UTF-8 output data is appended to this string.
+    std::string encoded;
+
+
+
+    /*! \brief Encode some data into UTF-8.
+
+    \param str Pointer to the character array to encode.
+    \param amt Number of characters to encode.
+
+    This function performs an encoding of some Unicode characters into UTF-8. It appends the result
+    onto \a encoded.
+
+    */
+    void encode(const wchar_t* str, ssize_t amt);
+
+    /// Encode a std::wstring.
+    void encode(const std::wstring& str);
+
+
+
+    /// Reset the encoder for a new character stream.
+    void reset();
+
+
+
+    /*! \brief Set error handling to \e skip mode.
+
+    This function will set the error handling into \e skip mode. In this mode, any invalid UTF-8
+    byte sequences will simply be skipped altogether, and will not have any effect on the output in
+    \a decoded.
+
+    */
+    void skipOnError();
+
+
+
+    /*! \brief Set error handling to \e replace mode.
+
+        \param ch The replacement character that will appear in the output.
+
+    This function will set the error handling into \e replace mode. In this mode, any invalid UTF-8
+    byte sequences will be skipped, and a replacement character \a ch will be placed onto the output
+    in \a decoded. The default parameter is the unicode replacement character, which should look
+    like an upside-down question mark.
+
+    */
+    void replaceOnError(wchar_t ch = 0xFFFD);
+
+
+
+    /*! \brief Set error handling to \e exception mode (default).
+
+    This function will set the error handling to \e exception mode. In this mode, any invalid
+    UTF-8 byte sequences will cause a BadUTF8Sequence exception to be thrown. This is the default
+    mode.
+
+    */
+    void exceptionOnError();
+
+
+
+private:
+    struct utf8_encode_state ctx;
+    wchar_t replaceChar;
+
+    static enum utf8_encode_error_action _skipOnError
+        (const struct utf8_encode_state *ctx, wchar_t *newch);
+    static enum utf8_encode_error_action _replaceOnError
+        (const struct utf8_encode_state *ctx, wchar_t *newch);
+    static enum utf8_encode_error_action _exceptionOnError
+        (const struct utf8_encode_state *ctx, wchar_t *newch);
+};
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/exception.cpp
+++ b/src/libutf8++/exception.cpp
@ -0,0 +1,81 @@
+/* libutf8++/src/lib/exception.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+namespace utf8 {
+
+
+
+Error::Error(const std::string& reason)
+    : reason(reason)
+{
+}
+
+
+
+const char* Error::what()
+{
+    return reason.c_str();
+}
+
+
+
+BadUnicodeChar::BadUnicodeChar(const struct utf8_encode_state* ctx)
+    : Error(format(ctx)), badChar(*ctx->rd), line(ctx->line), col(ctx->col), char_offset(ctx->char_offset)
+{
+}
+
+
+
+std::string BadUnicodeChar::format(const struct utf8_encode_state* ctx)
+{
+    std::ostringstream str;
+
+    str << "Invalid Unicode code point encountered."
+           "\n  Position       : line "
+        << ctx->line + 1
+        << ", column "
+        << ctx->col + 1
+        << "\n  Stream offset  : "
+        << ctx->char_offset
+        << " characters\n  Character value: 0x"
+        << std::hex
+        << *(ctx->rd);
+
+    return str.str();
+}
+
+
+
+BadUTF8Sequence::BadUTF8Sequence(const std::string& description,
+                                 const struct utf8_decode_state* ctx)
+    : Error(format(description, ctx)), description(description), line(ctx->line + 1),
+    col(ctx->col + 1), char_offset(ctx->char_offset), byte_offset(ctx->byte_offset)
+{
+}
+
+
+
+std::string BadUTF8Sequence::format(const std::string& description,
+                                    const struct utf8_decode_state* ctx)
+{
+    std::ostringstream str;
+
+    str << "Bad byte sequence in UTF-8 data.\n"
+        "  Reason  : " << description
+        << "\n  Position: line " << ctx->line + 1
+        << ", column " << ctx->col + 1
+        << "\n  Offset  : " << ctx->char_offset << " chars, " << ctx->byte_offset << " bytes";
+
+    return str.str();
+}
+
+
+
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/exception.h
+++ b/src/libutf8++/exception.h
@ -0,0 +1,96 @@
+/* libutf8++/src/lib/exception.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+/*! \brief Exception base class.
+
+This is the base class for all libutf8 exceptions. It contains one member, \a reason, which allows
+you to print a human-readable description of the error. To recover the actual type, you can refer
+to the more specific derived classes.
+
+*/
+class Error : public std::exception {
+public:
+    /// Human-readable reason for error.
+    std::string reason;
+
+    /// Constructor.
+    Error(const std::string& reason);
+
+    /// Destructor.
+    virtual ~Error() throw()
+    { }
+
+    /// Find what caused the error.
+    virtual const char* what();
+};
+
+
+
+/*! \brief Invalid Unicode character exception.
+
+This exception is thrown when encoding Unicode into UTF-8 and an invalid character is encountered.
+
+*/
+class BadUnicodeChar : public Error {
+public:
+    /// A copy of the invalid character.
+    wchar_t badChar;
+
+    /// Line of input data at which error occurred (starts at 1).
+    int line;
+
+    /// Column of input data at which error occurred (starts at 1).
+    int col;
+
+    /// Character offset of input data at which error occurred.
+    int char_offset;
+
+    /// Constructor.
+    BadUnicodeChar(const struct utf8_encode_state* ctx);
+
+private:
+    static std::string format(const struct utf8_encode_state* ctx);
+};
+
+
+
+/*! \brief Invalid UTF-8 sequence exception.
+
+This exception is thrown when decoding UTF-8 and an invalid sequence is encountered. This could be
+a nonsensical sequence, a redundantly-encounded character or truncated source data. It contains some
+variables for allowing detailed diagnostics.
+
+*/
+class BadUTF8Sequence : public Error {
+public:
+    /// Description of the error, for human diagnostics.
+    std::string description;
+
+    /// Line of input data at which error occurred (starts at 1).
+    int line;
+
+    /// Column of input data at which error occurred (starts at 1).
+    int col;
+
+    /// Character offset of input data at which error occurred.
+    int char_offset;
+
+    /// Byte offset of input data at which error occurred.
+    int byte_offset;
+
+    /// Constructor.
+    BadUTF8Sequence(const std::string& description, const struct utf8_decode_state* ctx);
+
+    /// Destructor.
+    ~BadUTF8Sequence() throw() { }
+
+private:
+    std::string format(const std::string& description, const struct utf8_decode_state* ctx);
+};
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/pkgconf.in
+++ b/src/libutf8++/pkgconf.in
@ -0,0 +1,21 @@
+# libutf8++/src/lib/libutf8++/pkgconf.in
+#
+#  Metadata file for pkg-config
+#  ( http://www.freedesktop.org/software/pkgconfig/ )
+#
+#  (c)2006, Laurence Withers, <l@lwithers.me.uk>.
+#  Released under the GNU GPLv2. See file COPYING or
+#  http://www.gnu.org/copyleft/gpl.html for details.
+#
+
+# Name, description
+Name: libutf8++
+Description: C++ wrapper around libutf8 (library for handling UTF-8)
+Version: @VERSION@
+
+# Requirements
+Requires:
+
+# Compilation information
+Libs: -L@LIBDIR@ -lutf8++
+Cflags: -I@INCLUDEDIR@
--- a/src/libutf8++/soversion
+++ b/src/libutf8++/soversion
@ -0,0 +1,17 @@
+# libutf8++/src/libutf8++/soversion
+#
+#  (c)2006, Laurence Withers, <l@lwithers.me.uk>.
+#  Released under the GNU GPLv2. See file COPYING or
+#  http://www.gnu.org/copyleft/gpl.html for details.
+#
+
+
+
+# SOMAJOR and SOMINOR are included in the library's soname. They need to
+# be bumped on a binary-incompatible release. They are both single
+# integers.
+SOMAJOR=0
+SOMINOR=0
+
+# SOMICRO is bumped every time there is a binary-compatible release.
+SOMICRO=0
--- a/src/libutf8++/string.cpp
+++ b/src/libutf8++/string.cpp
@ -0,0 +1,126 @@
+/* libutf8++/src/lib/string.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+namespace utf8 {
+
+
+
+static enum utf8_decode_error_action decode_replace_callback(const struct utf8_decode_state* ctx,
+    enum utf8_decode_error error, wchar_t* newch)
+{
+    (void)error;
+    *newch = *(wchar_t*)(ctx->data);
+    return utf8_decode_error_action_replace;
+}
+
+
+
+static enum utf8_decode_error_action decode_error_callback(const struct utf8_decode_state* ctx,
+    enum utf8_decode_error error, wchar_t* newch)
+{
+    (void)newch;
+    const char* desc = "unknown";
+
+    switch(error) {
+    case utf8_decode_error_lone_cchar:
+        desc = "An invalid continuation byte was encountered while expecting a character.";
+        break;
+
+    case utf8_decode_error_not_cchar:
+        desc = "A multi-byte sequence contained an invalid byte.";
+        break;
+
+    case utf8_decode_error_not_schar:
+        desc = "An invalid byte was encountered while expecting a character.";
+        break;
+
+    case utf8_decode_error_overlong:
+        desc = "An overlong encoding of a character was encountered.";
+        break;
+
+    case utf8_decode_error_illegal_cp:
+        desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered.";
+        break;
+    }
+
+    throw BadUTF8Sequence(desc, ctx);
+}
+
+
+
+std::wstring decode(const std::string& utf8, bool force, wchar_t replace)
+{
+    wchar_t buffer[128];
+    struct utf8_decode_state ctx;
+    memset(&ctx, 0, sizeof(ctx));
+
+    ctx.rd = utf8.data();
+    ctx.rd_remain = utf8.size();
+    ctx.wr = buffer;
+    ctx.wr_size = 128;
+    if(force) {
+        ctx.error_callback = decode_replace_callback;
+        ctx.data = &replace;
+    } else {
+        ctx.error_callback = decode_error_callback;
+    }
+
+    std::wstring ret;
+
+    while(ctx.rd_remain) {
+        utf8_decoder(&ctx);
+        ret.append(buffer, ctx.written);
+    }
+
+    return ret;
+}
+
+
+
+static enum utf8_encode_error_action encode_replace_callback(const struct utf8_encode_state* ctx,
+    wchar_t* newch)
+{
+    *newch = *(wchar_t*)(ctx->data);
+    return utf8_encode_error_action_replace;
+}
+
+
+
+std::string encode(const std::wstring& ustr, bool force, wchar_t replace)
+{
+    char buffer[512];
+    struct utf8_encode_state ctx;
+    memset(&ctx, 0, sizeof(ctx));
+
+    ctx.rd = ustr.data();
+    ctx.rd_remain = ustr.size();
+    ctx.wr = buffer;
+    ctx.wr_size = 512;
+    if(force) {
+        ctx.error_callback = encode_replace_callback;
+        ctx.data = &replace;
+    }
+
+    std::string ret;
+
+    while(ctx.rd_remain) {
+        if(!utf8_encoder(&ctx)) {
+            throw BadUnicodeChar(&ctx);
+        }
+
+        ret.append(buffer, ctx.written);
+    }
+
+    return ret;
+}
+
+
+
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/libutf8++/string.h
+++ b/src/libutf8++/string.h
@ -0,0 +1,37 @@
+/* libutf8++/src/lib/string.h
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+/*! \brief Decode UTF-8.
+
+\param utf8 The UTF-8 encoded data.
+\param force If set to \a true, errors will be inhibited.
+\param replace If \a force is \a true, then invalid UTF-8 sequences will be replaced by this
+    character.
+\returns The Unicode wide-character string representation.
+\throws BadUTF8Sequence if there is an invalid byte sequence in the UTF-8 source data.
+
+This function will decode a UTF-8 source string into a Unicode wide-character string. It has a force
+mode whereby any errors will be inhibited and a best-effort attempt will be made.
+
+*/
+std::wstring decode(const std::string& utf8, bool force = false, wchar_t replace = 0xFFFD);
+
+
+
+/*! \brief Encode UTF-8.
+
+\param ustr The Unicode wide-character string.
+\param force If set to \a true, errors will be inhibited (invalid chars will be omitted).
+\param replace If \a force is \a true, then invalid UTF-8 sequences will be replaced by this
+    character.
+\returns The UTF-8 transformed representation of \a ustr.
+\throws BadUnicodeChar on invalid characters in the source data.
+
+This function will encode a Unicode wide-character string into a UTF-8 transformed representation.
+It has a force mode whereby any errors will be inhibited and a best-effort attempt will be made.
+
+*/
+std::string encode(const std::wstring& ustr, bool force = false, wchar_t replace = 0xFFFD);
--- a/src/tests/.params
+++ b/src/tests/.params
@ -0,0 +1 @@
+c++ tests tests libutf8++
--- a/src/tests/build.default
+++ b/src/tests/build.default
@ -0,0 +1,3 @@
+source src/tests/build.tests
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/tests/build.tests
+++ b/src/tests/build.tests
@ -0,0 +1,43 @@
+# These are external variables, and shouldn't clash with anything else
+#  tests_BUILT
+#
+
+build_target libutf8++ || return 1
+
+if [ -z ${tests_BUILT} ]
+then
+    LIBS="${libutf8pp} "
+    EXTRAS=""
+
+    echo "Building test programs..."
+    do_cmd mkdir -p obj/tests || return 1
+
+    for SRC in src/tests/*.cpp
+    do
+        TEST="obj/tests/$(basename ${SRC} | sed -e 's,.cpp$,,')"
+        MODIFIED=0
+        for file in ${LIBS} ${SRC} src/tests/build.tests
+        do
+            if [ ${file} -nt ${TEST} ]
+            then
+                MODIFIED=1
+                break
+            fi
+        done
+
+        if [ ${MODIFIED} -ne 0 ]
+        then
+            do_cmd ${CXX} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1
+            print_success "Built ${TEST}"
+        else
+            print_success "${TEST} is up to date"
+        fi
+    done
+
+    print_success "All tests built"
+
+    tests_BUILT=1
+fi
+
+# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+# vim: expandtab:ts=4:sw=4
--- a/src/tests/objects.cpp
+++ b/src/tests/objects.cpp
@ -0,0 +1,85 @@
+/* libutf8++/src/tests/objects.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+#include "utf8"
+#include <iostream>
+#include <iomanip>
+#include <fcntl.h>
+#include <unistd.h>
+
+
+
+void make_random(wchar_t* buf, int ch)
+{
+    int fd = open("/dev/urandom", O_RDONLY);
+    if(fd < 0) {
+        perror("open(\"/dev/urandom\")");
+        throw 1;
+    }
+    ch *= sizeof(wchar_t);
+    if(read(fd, (char*)buf, ch) != ch) {
+        perror("read(\"/dev/urandom\")");
+        throw 1;
+    }
+    close(fd);
+
+    ch /= sizeof(wchar_t);
+    while(ch--) {
+        buf[ch] &= 0x7FFFFFFF;
+    }
+}
+
+
+
+int main(int argc, char* argv[])
+{
+    if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
+        std::cout << "Performs some tests on the Encoder and Decoder objects.\n";
+        return 0;
+    }
+
+    int ret = 0;
+    try {
+        wchar_t wch[1024];
+        make_random(wch, 1024);
+
+        std::wstring ustr;
+        ustr.assign(wch, 1024);
+
+        utf8::Encoder encoder;
+        utf8::Decoder decoder;
+
+        encoder.encode(ustr);
+        decoder.decode(encoder.encoded);
+
+        if(ustr != decoder.decoded) {
+            std::cerr << "Decoded string does not match original.\n";
+            for(size_t i = 0, end = std::min(ustr.size(), decoder.decoded.size()); i != end; ++i) {
+                if(ustr[i] != decoder.decoded[i]) {
+                    std::cerr << std::dec << std::setfill(' ') << std::setw(4) << i
+                            << std::setfill('0') << std::hex << ": 0x"
+                            << std::setw(8) << ustr[i] << " != "
+                            << std::setw(8) << decoder.decoded[i] << "\n";
+                }
+            }
+            std::cerr << "Original size " << std::dec << ustr.size()
+                    << ", decoded size " << decoder.decoded.size() << std::endl;
+            return 1;
+        }
+
+        std::cout << "Success.\n";
+    }
+    catch(utf8::Error& e) {
+        std::cerr << e.reason << std::endl;
+        ret = 1;
+    }
+
+    return ret;
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/tests/strings.cpp
+++ b/src/tests/strings.cpp
@ -0,0 +1,82 @@
+/* libutf8++/src/tests/strings.cpp
+ *
+ *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
+ *  COPYING for more information / terms of license.
+*/
+
+#include "utf8"
+#include <iostream>
+#include <iomanip>
+#include <fcntl.h>
+#include <unistd.h>
+
+
+
+void make_random(wchar_t* buf, int ch)
+{
+    int fd = open("/dev/urandom", O_RDONLY);
+    if(fd < 0) {
+        perror("open(\"/dev/urandom\")");
+        throw 1;
+    }
+    ch *= sizeof(wchar_t);
+    if(read(fd, (char*)buf, ch) != ch) {
+        perror("read(\"/dev/urandom\")");
+        throw 1;
+    }
+    close(fd);
+
+    ch /= sizeof(wchar_t);
+    while(ch--) {
+        buf[ch] &= 0x7FFFFFFF;
+    }
+}
+
+
+
+int main(int argc, char* argv[])
+{
+    if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
+        std::cout << "Performs some tests on the string encode/decode routines.\n";
+        return 0;
+    }
+
+    int ret = 0;
+    try {
+        wchar_t wch[1024];
+        make_random(wch, 1024);
+
+        std::wstring ustr1, ustr2;
+        std::string utf8;
+        ustr1.assign(wch, 1024);
+        utf8 = utf8::encode(ustr1);
+        ustr2 = utf8::decode(utf8);
+
+        if(ustr1 != ustr2) {
+            std::cerr << "Decoded string does not match original.\n";
+            for(size_t i = 0, end = std::min(ustr1.size(), ustr2.size()); i != end; ++i) {
+                if(ustr1[i] != ustr2[i]) {
+                    std::cerr << std::dec << std::setfill(' ') << std::setw(4) << i
+                            << std::setfill('0') << std::hex << ": 0x"
+                            << std::setw(8) << ustr1[i] << " != "
+                            << std::setw(8) << ustr2[i] << "\n";
+                }
+            }
+            std::cerr << "Original size " << std::dec << ustr1.size()
+                    << ", decoded size " << ustr2.size() << std::endl;
+            return 1;
+        }
+
+        std::cout << "Success.\n";
+    }
+    catch(utf8::Error& e) {
+        std::cerr << e.reason << std::endl;
+        ret = 1;
+    }
+
+    return ret;
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+*/
--- a/src/tests/template
+++ b/src/tests/template
@ -0,0 +1,44 @@
+/* libutf8++/src/tests/???.cpp
+ *
+ *  (c)2006, Laurence Withers, <l@lwithers.me.uk>.
+ *  Released under the GNU GPLv2. See file COPYING or
+ *  http://www.gnu.org/copyleft/gpl.html for details.
+*/
+
+#include "utf8"
+
+#include <iostream>
+
+
+
+int main(int argc, char* argv[])
+{
+    if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
+        std::cout << "One line summary.\n";
+        return 0;
+    }
+
+    if(argc == 1) {
+        // empty argument list
+    }
+
+    int ret = 0;
+    try {
+        // TODO
+    }
+    catch(std::exception& e) {
+        std::cerr << e.what() << std::endl;
+        ret = 1;
+    }
+    catch(...) {
+        std::cerr << "Unknown exception caught." << std::endl;
+        ret = 1;
+    }
+
+    return ret;
+}
+
+/* options for text editors
+kate: replace-trailing-space-save true; space-indent true; tab-width 4;
+vim: expandtab:ts=4:sw=4
+*/