diff --git a/README b/README index a59d035..ba60856 100644 --- a/README +++ b/README @@ -10,5 +10,14 @@ Really Quick Instructions To build: ./make.sh To install: ./make.sh install (you might want to set PREFIX, by default it's /usr/local) +Documentation is automatically built using doxygen. -@TODO@ +Dependencies +------------ + +libutf8, http://www.lwithers.me.uk/projects/libutf8/ + +Project Homepage +---------------- + +http://www.lwithers.me.uk/projects/libutf8++/ diff --git a/src/docs/.params b/src/docs/.params new file mode 100644 index 0000000..efd9ae0 --- /dev/null +++ b/src/docs/.params @@ -0,0 +1 @@ +doxygen docs docs diff --git a/src/docs/Doxyfile.in b/src/docs/Doxyfile.in new file mode 100644 index 0000000..a6440c0 --- /dev/null +++ b/src/docs/Doxyfile.in @@ -0,0 +1,146 @@ +# libutf8++/src/docs/Doxyfile.in +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + +PROJECT_NAME = libutf8++ +OUTPUT_DIRECTORY = +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = YES +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = YES +DETAILS_AT_TOP = YES +INHERIT_DOCS = YES +DISTRIBUTE_GROUP_DOC = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +SUBGROUPING = YES +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = NO +EXTRACT_LOCAL_METHODS = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = YES +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = NO +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +QUIET = YES +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = YES +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +FILE_PATTERNS = +RECURSIVE = NO +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = src/docs +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +VERBATIM_HEADERS = NO +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +GENERATE_LATEX = NO +GENERATE_RTF = NO +GENERATE_MAN = NO +GENERATE_XML = NO +GENERATE_AUTOGEN_DEF = NO +GENERATE_PERLMOD = NO +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = DOXYGEN +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = NO +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = NO +INCLUDED_BY_GRAPH = NO +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = NO +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = YES +DOT_MULTI_TARGETS = YES +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +SEARCHENGINE = NO diff --git a/src/docs/MainPage.dox b/src/docs/MainPage.dox new file mode 100644 index 0000000..e65e88d --- /dev/null +++ b/src/docs/MainPage.dox @@ -0,0 +1,15 @@ +/* libutf8++/src/docs/MainPage.dox + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +/*! \mainpage + +*/ + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/docs/build.default b/src/docs/build.default new file mode 100644 index 0000000..ca22639 --- /dev/null +++ b/src/docs/build.default @@ -0,0 +1 @@ +source src/docs/build.docs diff --git a/src/docs/build.docs b/src/docs/build.docs new file mode 100644 index 0000000..653c323 --- /dev/null +++ b/src/docs/build.docs @@ -0,0 +1,43 @@ +# These are external variables, and shouldn't clash with anything else +# docs_BUILT +# + +MONOLITHIC_DOC="${MONOLITHIC_DOC} $(echo src/docs/*.dox)" +build_target monolithic + +if [ -z ${docs_BUILT} ] +then + echo "Building documentation with Doxygen..." + + DOXYFILE=obj/Doxyfile.docs + + if [ ! -e ${DOXYFILE} ] + then + do_cmd cp src/docs/Doxyfile.in ${DOXYFILE} || return 1 + echo "INPUT = ${MONOLITHIC_DOC}" >> ${DOXYFILE} + echo "PROJECT_NUMBER = ${VERSION}" >> ${DOXYFILE} + fi + + MODIFIED=0 + for file in ${MONOLITHIC_DOC} + do + if [ ${file} -nt html/index.html ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + do_cmd doxygen ${DOXYFILE} || return 1 + print_success "Documentation built" + else + print_success "Documentation is up to date" + fi + + docs_BUILT=1 +fi + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/docs/build.install b/src/docs/build.install new file mode 100644 index 0000000..016c75c --- /dev/null +++ b/src/docs/build.install @@ -0,0 +1 @@ +source src/docs/build.install-docs diff --git a/src/docs/build.install-docs b/src/docs/build.install-docs new file mode 100644 index 0000000..66167d3 --- /dev/null +++ b/src/docs/build.install-docs @@ -0,0 +1,21 @@ +build_target docs + +# create documentation directories +echo "Installing documentation into ${DOCSDIR}" +build_dir_tree "${DOCSDIR}/html" || return 1 + +# copy across the Doxygen-generated documentation +for file in html/* +do + install_file ${file} ${DOCSDIR}/html 0644 || return 1 +done + +# copy across the generic files +for file in COPYING README +do + install_file ${file} ${DOCSDIR} 0644 || return 1 +done + +print_success "Documentation installed" +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libutf8++/.params b/src/libutf8++/.params new file mode 100644 index 0000000..a095052 --- /dev/null +++ b/src/libutf8++/.params @@ -0,0 +1 @@ +c++ lib libutf8++ utf8 diff --git a/src/libutf8++/BottomHeader.h b/src/libutf8++/BottomHeader.h new file mode 100644 index 0000000..65869c9 --- /dev/null +++ b/src/libutf8++/BottomHeader.h @@ -0,0 +1,9 @@ +/* libutf8++/src/lib/BottomHeader.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +} + +#endif diff --git a/src/libutf8++/ForwardDeclare.h b/src/libutf8++/ForwardDeclare.h new file mode 100644 index 0000000..e785736 --- /dev/null +++ b/src/libutf8++/ForwardDeclare.h @@ -0,0 +1,9 @@ +/* libutf8++/src/lib/ForwardDeclare.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +// This file simply contains forward declarations of all libutf8++ +// classes, to facilitate header ordering, etc. + diff --git a/src/libutf8++/TopHeader.h b/src/libutf8++/TopHeader.h new file mode 100644 index 0000000..a72ac72 --- /dev/null +++ b/src/libutf8++/TopHeader.h @@ -0,0 +1,23 @@ +/* libutf8++/src/lib/TopHeader.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#ifndef HEADER_libutf8pp +#define HEADER_libutf8pp + +// standard includes, or includes needed for type declarations + +#include +#include +#include + + + +/*! \brief UTF-8 handling routines. + +The library's UTF-8 handling routines are all made available through this namespace. + +*/ +namespace utf8 { diff --git a/src/libutf8++/TopSource.cpp b/src/libutf8++/TopSource.cpp new file mode 100644 index 0000000..37610e6 --- /dev/null +++ b/src/libutf8++/TopSource.cpp @@ -0,0 +1,12 @@ +/* libutf8++/src/lib/TopSource.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "utf8" + +// Below are all the includes used throughout the library. + +#include +#include diff --git a/src/libutf8++/build.default b/src/libutf8++/build.default new file mode 100644 index 0000000..43bf772 --- /dev/null +++ b/src/libutf8++/build.default @@ -0,0 +1 @@ +source src/libutf8++/build.lib diff --git a/src/libutf8++/build.install b/src/libutf8++/build.install new file mode 100644 index 0000000..0c1245b --- /dev/null +++ b/src/libutf8++/build.install @@ -0,0 +1 @@ +source src/libutf8++/build.install-lib diff --git a/src/libutf8++/build.install-lib b/src/libutf8++/build.install-lib new file mode 100644 index 0000000..b3e6f20 --- /dev/null +++ b/src/libutf8++/build.install-lib @@ -0,0 +1,36 @@ +build_target libutf8++ + +# make paths (this is for Gentoo in particular) +build_dir_tree "${LIBDIR}" || return 1 +build_dir_tree "${PKGCONFDIR}" || return 1 +build_dir_tree "${INCLUDEDIR}" || return 1 + +# install library +echo "Installing libraries into '${LIBDIR}'" +install_file ${libutf8pp} ${LIBDIR} 0755 || return 1 +BASE="${libutf8pp_BASE}.so" +MAJOR="${BASE}.${SOMAJOR}" +MINOR="${MAJOR}.${SOMINOR}" +MICRO="${MINOR}.${SOMICRO}" +install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}" +install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}" +install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}" + +# install header +echo "Installing header file '${libutf8pp_HEADER}' into ${INCLUDEDIR}" +install_header ${libutf8pp_HEADER} ${INCLUDEDIR} 0644 || return 1 + +# install pkgconfig file +echo "Installing package config file into ${PKGCONFDIR}" +PKGCONFFILE=${PKGCONFDIR}/libutf8pp.pc +do_cmd rm -f ${PKGCONFFILE} +do_cmd_redir ${PKGCONFFILE} sed \ + -e "s,@VERSION@,${VERSION}," \ + -e "s,@LIBDIR@,${FINALLIBDIR}," \ + -e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \ + src/libutf8++/pkgconf.in +do_cmd chmod 0644 ${PKGCONFFILE} +print_success "Done" + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libutf8++/build.lib b/src/libutf8++/build.lib new file mode 100644 index 0000000..484ee47 --- /dev/null +++ b/src/libutf8++/build.lib @@ -0,0 +1,51 @@ +# These are external variables, and shouldn't clash with anything else +# libutf8pp +# libutf8pp_BUILT +# libutf8pp_HEADER +# libutf8pp_BASE + +if [ -z ${libutf8pp_BUILT} ] +then + libutf8pp_BASE=libutf8++ + source src/libutf8++/soversion + + libutf8pp="obj/${libutf8pp_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}" + SO_EXTRA="$(pkg-config libutf8 --libs --cflags) -lstdc++ -lc" + + echo "Building library ${libutf8pp}..." + + do_cmd source src/libutf8++/build.monolithic || return 1 + + MODIFIED=0 + for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC} + do + if [ ${test} -nt ${libutf8pp} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + echo " Compiling" + + SONAME="${libutf8pp_BASE}.so.${SOMAJOR}.${SOMINOR}" + do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libutf8pp}" \ + -Wl,-soname,${SONAME} \ + ${SRC} ${SO_EXTRA} || return 1 + + # make tests work + do_cmd ln -sf $(basename ${libutf8pp}) obj/${SONAME} || return 1 + + print_success "Library built" + else + print_success "Library up to date" + fi + + libutf8pp_BUILT=1 + libutf8pp_HEADER=${HDR} + +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libutf8++/build.monolithic b/src/libutf8++/build.monolithic new file mode 100644 index 0000000..d584710 --- /dev/null +++ b/src/libutf8++/build.monolithic @@ -0,0 +1,21 @@ +# These are external variables, and shouldn't clash with anything else +# libutf8pp_MONOLITHIC + +SRC="obj/libutf8++.cpp" +HDR="obj/utf8" + +MONOLITHIC_TESTS="src/libutf8++/build.lib src/libutf8++/build.monolithic" + +if [ -z "${libutf8pp_MONOLITHIC}" ] +then + MONOLITHIC_SOURCE="$(echo src/libutf8++/{TopHeader,ForwardDeclare,exception,string,{en,de}coder,BottomHeader}.h)" + make_monolithic ${HDR} C || return 1 + + MONOLITHIC_SOURCE="$(echo src/libutf8++/{TopSource,exception,string,{en,de}coder}.cpp)" + make_monolithic ${SRC} C || return 1 + + libutf8pp_MONOLITHIC=1 + MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}" +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libutf8++/decoder.cpp b/src/libutf8++/decoder.cpp new file mode 100644 index 0000000..8d940d1 --- /dev/null +++ b/src/libutf8++/decoder.cpp @@ -0,0 +1,159 @@ +/* libutf8++/src/lib/decoder.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace utf8 { + + + +Decoder::Decoder(size_t hint) +{ + memset(&ctx, 0, sizeof(ctx)); + ctx.wr_size = (hint < 2) ? 2 : hint; + ctx.wr = new wchar_t[ctx.wr_size]; + ctx.error_callback = _exceptionOnError; + ctx.data = this; +} + + + +Decoder::~Decoder() +{ + delete [] ctx.wr; +} + + + +void Decoder::decode(const std::string& str) +{ + decode(str.data(), str.size()); +} + + + +void Decoder::decode(const char* str, ssize_t amt) +{ + ctx.rd = str; + ctx.rd_remain = amt; + while(ctx.rd_remain) { + utf8_decoder(&ctx); + decoded.append(ctx.wr, ctx.written); + + if(ctx.rd_remain < 0 && !*(ctx.rd)) break; + if(ctx.rd_remain) { + ctx.wr_size *= 2; + delete [] ctx.wr; + ctx.wr = new wchar_t[ctx.wr_size]; + } + } +} + + + +bool Decoder::complete() const +{ + return ctx.complete; +} + + + +void Decoder::reset() +{ + size_t old_wr_size = ctx.wr_size; + wchar_t* old_wr = ctx.wr; + utf8_decode_error_callback old_error_callback = ctx.error_callback; + + memset(&ctx, 0, sizeof(ctx)); + ctx.wr_size = old_wr_size; + ctx.wr = old_wr; + ctx.error_callback = old_error_callback; + ctx.data = this; + decoded.clear(); +} + + + +void Decoder::skipOnError() +{ + ctx.error_callback = _skipOnError; +} + + + +void Decoder::replaceOnError(wchar_t ch) +{ + replaceChar = ch; + ctx.error_callback = _replaceOnError; +} + + + +void Decoder::exceptionOnError() +{ + ctx.error_callback = _exceptionOnError; +} + + + +enum utf8_decode_error_action Decoder::_skipOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) +{ + (void)ctx; + (void)error; + (void)newch; + return utf8_decode_error_action_skip; +} + + + +enum utf8_decode_error_action Decoder::_replaceOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) +{ + (void)error; + Decoder* self = (utf8::Decoder*)(ctx->data); + *newch = self->replaceChar; + return utf8_decode_error_action_replace; +} + + + +enum utf8_decode_error_action Decoder::_exceptionOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch) +{ + (void)newch; + const char* desc = "unknown"; + + switch(error) { + case utf8_decode_error_lone_cchar: + desc = "An invalid continuation byte was encountered while expecting a character."; + break; + + case utf8_decode_error_not_cchar: + desc = "A multi-byte sequence contained an invalid byte."; + break; + + case utf8_decode_error_not_schar: + desc = "An invalid byte was encountered while expecting a character."; + break; + + case utf8_decode_error_overlong: + desc = "An overlong encoding of a character was encountered."; + break; + + case utf8_decode_error_illegal_cp: + desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered."; + break; + } + + throw BadUTF8Sequence(desc, ctx); +} + + + +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/decoder.h b/src/libutf8++/decoder.h new file mode 100644 index 0000000..505bf5c --- /dev/null +++ b/src/libutf8++/decoder.h @@ -0,0 +1,128 @@ +/* libutf8++/src/lib/decoder.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +/*! \brief Stateful UTF-8 decoder object. + +This object is used for stateful decoding of a UTF-8 byte stream. It can be fed the data in +arbitrary chunks, even split on non-character boundaries. It writes its output into a wide character +string. + +A variety of error handling modes are available. The default is to throw a BadUTF8Sequence +exception, but you can change this with skipOnError() or replaceOnError(). + +*/ +class Decoder { +public: + /*! \brief Constructor. + + \param hint Hint at number of characters to allocate space for in decoder buffer. + + The constructor sets up the UTF-8 decoder. You can provide a hint as to the size of your input + stream chunks. This hint is the number of characters to allocate in the output buffer. If, + during a single decode operation, this buffer is filled, then it is doubled in size. + + */ + Decoder(size_t hint = 25); + + + + /// Destructor. + ~Decoder(); + + + + /// Result of decoding operations (appended to). + std::wstring decoded; + + + + /*! \brief UTF-8 decoder. + + \param str Pointer to source data. + \param amt Number of bytes in source data (-1 for null terminated strings). + \throws BadUTF8Sequence. + + This function will decode a chunk of UTF-8 data. The decoded data will be appended to whatever + is contained in the string decoded. You can check if the decoder ended on a character boundary + or not by calling complete(). + + */ + void decode(const char* str, ssize_t amt); + + + + /// Decode data stored in a std::string. + void decode(const std::string& str); + + + + /// Returns \a true if the last call to \a decode() ended on a character boundary. + bool complete() const; + + + + /*! \brief Resets the parser for a new UTF-8 stream. + + This function will clear the internal state of the decoder so that it is ready for data from a + new source. This can be used if you have opened a new file, accepted a new connection, recovered + from an error, etc. It will also clear \a decoded. + + */ + void reset(); + + + + /*! \brief Set error handling to \e skip mode. + + This function will set the error handling into \e skip mode. In this mode, any invalid UTF-8 + byte sequences will simply be skipped altogether, and will not have any effect on the output in + \a decoded. + + */ + void skipOnError(); + + + + /*! \brief Set error handling to \e replace mode. + + \param ch The replacement character that will appear in the output. + + This function will set the error handling into \e replace mode. In this mode, any invalid UTF-8 + byte sequences will be skipped, and a replacement character \a ch will be placed onto the output + in \a decoded. The default parameter is the unicode replacement character, which should look + like an upside-down question mark. + + */ + void replaceOnError(wchar_t ch = 0xFFFD); + + + + /*! \brief Set error handling to \e exception mode (default). + + This function will set the error handling to \e exception mode. In this mode, any invalid + UTF-8 byte sequences will cause a BadUTF8Sequence exception to be thrown. This is the default + mode. + + */ + void exceptionOnError(); + + + +private: + struct utf8_decode_state ctx; + wchar_t replaceChar; + + static enum utf8_decode_error_action _skipOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch); + static enum utf8_decode_error_action _replaceOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch); + static enum utf8_decode_error_action _exceptionOnError + (const struct utf8_decode_state *ctx, enum utf8_decode_error error, wchar_t *newch); +}; + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/encoder.cpp b/src/libutf8++/encoder.cpp new file mode 100644 index 0000000..58d30c2 --- /dev/null +++ b/src/libutf8++/encoder.cpp @@ -0,0 +1,124 @@ +/* libutf8++/src/lib/encoder.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace utf8 { + + + +Encoder::Encoder(size_t hint) +{ + memset(&ctx, 0, sizeof(ctx)); + ctx.wr_size = (hint < 7) ? 7 : hint; + ctx.wr = new char[ctx.wr_size]; + ctx.error_callback = _exceptionOnError; + ctx.data = this; +} + + + +Encoder::~Encoder() +{ + delete [] ctx.wr; +} + + + +void Encoder::reset() +{ + char* wr = ctx.wr; + size_t wr_size = ctx.wr_size; + utf8_encode_error_callback cb = ctx.error_callback; + memset(&ctx, 0, sizeof(ctx)); + ctx.wr = wr; + ctx.wr_size = wr_size; + ctx.error_callback = cb; + ctx.data = this; +} + + + +void Encoder::encode(const std::wstring& str) +{ + encode(str.data(), str.size()); +} + + + +void Encoder::encode(const wchar_t* str, ssize_t amt) +{ + ctx.rd = str; + ctx.rd_remain = amt; + while(ctx.rd_remain) { + if(!utf8_encoder(&ctx)) throw BadUnicodeChar(&ctx); + encoded.append(ctx.wr, ctx.written); + + if(ctx.rd_remain < 0 && !*(ctx.rd)) break; + if(ctx.rd_remain) { + ctx.wr_size *= 2; + delete [] ctx.wr; + ctx.wr = new char[ctx.wr_size]; + } + } +} + + + +void Encoder::skipOnError() +{ + ctx.error_callback = _skipOnError; +} + + + +void Encoder::replaceOnError(wchar_t ch) +{ + replaceChar = ch; + ctx.error_callback = _replaceOnError; +} + + + +void Encoder::exceptionOnError() +{ + ctx.error_callback = _exceptionOnError; +} + + + +enum utf8_encode_error_action Encoder::_skipOnError + (const struct utf8_encode_state *ctx, wchar_t *newch) +{ + (void)ctx; + (void)newch; + return utf8_encode_error_action_skip; +} + + + +enum utf8_encode_error_action Encoder::_replaceOnError + (const struct utf8_encode_state *ctx, wchar_t *newch) +{ + Encoder* self = (utf8::Encoder*)(ctx->data); + *newch = self->replaceChar; + return utf8_encode_error_action_replace; +} + + + +enum utf8_encode_error_action Encoder::_exceptionOnError + (const struct utf8_encode_state *ctx, wchar_t *newch) +{ + (void)newch; + throw BadUnicodeChar(ctx); +} + + + +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/encoder.h b/src/libutf8++/encoder.h new file mode 100644 index 0000000..8b32870 --- /dev/null +++ b/src/libutf8++/encoder.h @@ -0,0 +1,108 @@ +/* libutf8++/src/lib/encoder.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +/*! \brief UTF-8 encoder object. + +This object is used to encode Unicode wide characters into UTF-8. It can be fed chunks of characters +which it then encodes, appending the result to an internal buffer. + +*/ +class Encoder { +public: + /*! \brief Constructor. + + \param hint Number of bytes to allocate for the encoding buffer. + + The constructor sets up the encoder and allocates some space for an internal buffer. You can + hint at how large you expect the chunks to be encoded will be. If an encoding operation fills + the buffer without consuming all the input data, the buffer will be doubled in size for the + next round. + + */ + Encoder(size_t hint = 100); + + /// Destructor. + virtual ~Encoder(); + + + + /// UTF-8 output data is appended to this string. + std::string encoded; + + + + /*! \brief Encode some data into UTF-8. + + \param str Pointer to the character array to encode. + \param amt Number of characters to encode. + + This function performs an encoding of some Unicode characters into UTF-8. It appends the result + onto \a encoded. + + */ + void encode(const wchar_t* str, ssize_t amt); + + /// Encode a std::wstring. + void encode(const std::wstring& str); + + + + /// Reset the encoder for a new character stream. + void reset(); + + + + /*! \brief Set error handling to \e skip mode. + + This function will set the error handling into \e skip mode. In this mode, any invalid UTF-8 + byte sequences will simply be skipped altogether, and will not have any effect on the output in + \a decoded. + + */ + void skipOnError(); + + + + /*! \brief Set error handling to \e replace mode. + + \param ch The replacement character that will appear in the output. + + This function will set the error handling into \e replace mode. In this mode, any invalid UTF-8 + byte sequences will be skipped, and a replacement character \a ch will be placed onto the output + in \a decoded. The default parameter is the unicode replacement character, which should look + like an upside-down question mark. + + */ + void replaceOnError(wchar_t ch = 0xFFFD); + + + + /*! \brief Set error handling to \e exception mode (default). + + This function will set the error handling to \e exception mode. In this mode, any invalid + UTF-8 byte sequences will cause a BadUTF8Sequence exception to be thrown. This is the default + mode. + + */ + void exceptionOnError(); + + + +private: + struct utf8_encode_state ctx; + wchar_t replaceChar; + + static enum utf8_encode_error_action _skipOnError + (const struct utf8_encode_state *ctx, wchar_t *newch); + static enum utf8_encode_error_action _replaceOnError + (const struct utf8_encode_state *ctx, wchar_t *newch); + static enum utf8_encode_error_action _exceptionOnError + (const struct utf8_encode_state *ctx, wchar_t *newch); +}; + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/exception.cpp b/src/libutf8++/exception.cpp new file mode 100644 index 0000000..0bb22ef --- /dev/null +++ b/src/libutf8++/exception.cpp @@ -0,0 +1,81 @@ +/* libutf8++/src/lib/exception.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace utf8 { + + + +Error::Error(const std::string& reason) + : reason(reason) +{ +} + + + +const char* Error::what() +{ + return reason.c_str(); +} + + + +BadUnicodeChar::BadUnicodeChar(const struct utf8_encode_state* ctx) + : Error(format(ctx)), badChar(*ctx->rd), line(ctx->line), col(ctx->col), char_offset(ctx->char_offset) +{ +} + + + +std::string BadUnicodeChar::format(const struct utf8_encode_state* ctx) +{ + std::ostringstream str; + + str << "Invalid Unicode code point encountered." + "\n Position : line " + << ctx->line + 1 + << ", column " + << ctx->col + 1 + << "\n Stream offset : " + << ctx->char_offset + << " characters\n Character value: 0x" + << std::hex + << *(ctx->rd); + + return str.str(); +} + + + +BadUTF8Sequence::BadUTF8Sequence(const std::string& description, + const struct utf8_decode_state* ctx) + : Error(format(description, ctx)), description(description), line(ctx->line + 1), + col(ctx->col + 1), char_offset(ctx->char_offset), byte_offset(ctx->byte_offset) +{ +} + + + +std::string BadUTF8Sequence::format(const std::string& description, + const struct utf8_decode_state* ctx) +{ + std::ostringstream str; + + str << "Bad byte sequence in UTF-8 data.\n" + " Reason : " << description + << "\n Position: line " << ctx->line + 1 + << ", column " << ctx->col + 1 + << "\n Offset : " << ctx->char_offset << " chars, " << ctx->byte_offset << " bytes"; + + return str.str(); +} + + + +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/exception.h b/src/libutf8++/exception.h new file mode 100644 index 0000000..4358b5a --- /dev/null +++ b/src/libutf8++/exception.h @@ -0,0 +1,96 @@ +/* libutf8++/src/lib/exception.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +/*! \brief Exception base class. + +This is the base class for all libutf8 exceptions. It contains one member, \a reason, which allows +you to print a human-readable description of the error. To recover the actual type, you can refer +to the more specific derived classes. + +*/ +class Error : public std::exception { +public: + /// Human-readable reason for error. + std::string reason; + + /// Constructor. + Error(const std::string& reason); + + /// Destructor. + virtual ~Error() throw() + { } + + /// Find what caused the error. + virtual const char* what(); +}; + + + +/*! \brief Invalid Unicode character exception. + +This exception is thrown when encoding Unicode into UTF-8 and an invalid character is encountered. + +*/ +class BadUnicodeChar : public Error { +public: + /// A copy of the invalid character. + wchar_t badChar; + + /// Line of input data at which error occurred (starts at 1). + int line; + + /// Column of input data at which error occurred (starts at 1). + int col; + + /// Character offset of input data at which error occurred. + int char_offset; + + /// Constructor. + BadUnicodeChar(const struct utf8_encode_state* ctx); + +private: + static std::string format(const struct utf8_encode_state* ctx); +}; + + + +/*! \brief Invalid UTF-8 sequence exception. + +This exception is thrown when decoding UTF-8 and an invalid sequence is encountered. This could be +a nonsensical sequence, a redundantly-encounded character or truncated source data. It contains some +variables for allowing detailed diagnostics. + +*/ +class BadUTF8Sequence : public Error { +public: + /// Description of the error, for human diagnostics. + std::string description; + + /// Line of input data at which error occurred (starts at 1). + int line; + + /// Column of input data at which error occurred (starts at 1). + int col; + + /// Character offset of input data at which error occurred. + int char_offset; + + /// Byte offset of input data at which error occurred. + int byte_offset; + + /// Constructor. + BadUTF8Sequence(const std::string& description, const struct utf8_decode_state* ctx); + + /// Destructor. + ~BadUTF8Sequence() throw() { } + +private: + std::string format(const std::string& description, const struct utf8_decode_state* ctx); +}; + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/pkgconf.in b/src/libutf8++/pkgconf.in new file mode 100644 index 0000000..a569072 --- /dev/null +++ b/src/libutf8++/pkgconf.in @@ -0,0 +1,21 @@ +# libutf8++/src/lib/libutf8++/pkgconf.in +# +# Metadata file for pkg-config +# ( http://www.freedesktop.org/software/pkgconfig/ ) +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + +# Name, description +Name: libutf8++ +Description: C++ wrapper around libutf8 (library for handling UTF-8) +Version: @VERSION@ + +# Requirements +Requires: + +# Compilation information +Libs: -L@LIBDIR@ -lutf8++ +Cflags: -I@INCLUDEDIR@ diff --git a/src/libutf8++/soversion b/src/libutf8++/soversion new file mode 100644 index 0000000..edec6ee --- /dev/null +++ b/src/libutf8++/soversion @@ -0,0 +1,17 @@ +# libutf8++/src/libutf8++/soversion +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + + + +# SOMAJOR and SOMINOR are included in the library's soname. They need to +# be bumped on a binary-incompatible release. They are both single +# integers. +SOMAJOR=0 +SOMINOR=0 + +# SOMICRO is bumped every time there is a binary-compatible release. +SOMICRO=0 diff --git a/src/libutf8++/string.cpp b/src/libutf8++/string.cpp new file mode 100644 index 0000000..53128fe --- /dev/null +++ b/src/libutf8++/string.cpp @@ -0,0 +1,126 @@ +/* libutf8++/src/lib/string.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace utf8 { + + + +static enum utf8_decode_error_action decode_replace_callback(const struct utf8_decode_state* ctx, + enum utf8_decode_error error, wchar_t* newch) +{ + (void)error; + *newch = *(wchar_t*)(ctx->data); + return utf8_decode_error_action_replace; +} + + + +static enum utf8_decode_error_action decode_error_callback(const struct utf8_decode_state* ctx, + enum utf8_decode_error error, wchar_t* newch) +{ + (void)newch; + const char* desc = "unknown"; + + switch(error) { + case utf8_decode_error_lone_cchar: + desc = "An invalid continuation byte was encountered while expecting a character."; + break; + + case utf8_decode_error_not_cchar: + desc = "A multi-byte sequence contained an invalid byte."; + break; + + case utf8_decode_error_not_schar: + desc = "An invalid byte was encountered while expecting a character."; + break; + + case utf8_decode_error_overlong: + desc = "An overlong encoding of a character was encountered."; + break; + + case utf8_decode_error_illegal_cp: + desc = "An illegal code point (a UTF-16 surrogate perhaps?) was encountered."; + break; + } + + throw BadUTF8Sequence(desc, ctx); +} + + + +std::wstring decode(const std::string& utf8, bool force, wchar_t replace) +{ + wchar_t buffer[128]; + struct utf8_decode_state ctx; + memset(&ctx, 0, sizeof(ctx)); + + ctx.rd = utf8.data(); + ctx.rd_remain = utf8.size(); + ctx.wr = buffer; + ctx.wr_size = 128; + if(force) { + ctx.error_callback = decode_replace_callback; + ctx.data = &replace; + } else { + ctx.error_callback = decode_error_callback; + } + + std::wstring ret; + + while(ctx.rd_remain) { + utf8_decoder(&ctx); + ret.append(buffer, ctx.written); + } + + return ret; +} + + + +static enum utf8_encode_error_action encode_replace_callback(const struct utf8_encode_state* ctx, + wchar_t* newch) +{ + *newch = *(wchar_t*)(ctx->data); + return utf8_encode_error_action_replace; +} + + + +std::string encode(const std::wstring& ustr, bool force, wchar_t replace) +{ + char buffer[512]; + struct utf8_encode_state ctx; + memset(&ctx, 0, sizeof(ctx)); + + ctx.rd = ustr.data(); + ctx.rd_remain = ustr.size(); + ctx.wr = buffer; + ctx.wr_size = 512; + if(force) { + ctx.error_callback = encode_replace_callback; + ctx.data = &replace; + } + + std::string ret; + + while(ctx.rd_remain) { + if(!utf8_encoder(&ctx)) { + throw BadUnicodeChar(&ctx); + } + + ret.append(buffer, ctx.written); + } + + return ret; +} + + + +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/libutf8++/string.h b/src/libutf8++/string.h new file mode 100644 index 0000000..bbe9aa5 --- /dev/null +++ b/src/libutf8++/string.h @@ -0,0 +1,37 @@ +/* libutf8++/src/lib/string.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +/*! \brief Decode UTF-8. + +\param utf8 The UTF-8 encoded data. +\param force If set to \a true, errors will be inhibited. +\param replace If \a force is \a true, then invalid UTF-8 sequences will be replaced by this + character. +\returns The Unicode wide-character string representation. +\throws BadUTF8Sequence if there is an invalid byte sequence in the UTF-8 source data. + +This function will decode a UTF-8 source string into a Unicode wide-character string. It has a force +mode whereby any errors will be inhibited and a best-effort attempt will be made. + +*/ +std::wstring decode(const std::string& utf8, bool force = false, wchar_t replace = 0xFFFD); + + + +/*! \brief Encode UTF-8. + +\param ustr The Unicode wide-character string. +\param force If set to \a true, errors will be inhibited (invalid chars will be omitted). +\param replace If \a force is \a true, then invalid UTF-8 sequences will be replaced by this + character. +\returns The UTF-8 transformed representation of \a ustr. +\throws BadUnicodeChar on invalid characters in the source data. + +This function will encode a Unicode wide-character string into a UTF-8 transformed representation. +It has a force mode whereby any errors will be inhibited and a best-effort attempt will be made. + +*/ +std::string encode(const std::wstring& ustr, bool force = false, wchar_t replace = 0xFFFD); diff --git a/src/tests/.params b/src/tests/.params new file mode 100644 index 0000000..070030a --- /dev/null +++ b/src/tests/.params @@ -0,0 +1 @@ +c++ tests tests libutf8++ diff --git a/src/tests/build.default b/src/tests/build.default new file mode 100644 index 0000000..2d979e2 --- /dev/null +++ b/src/tests/build.default @@ -0,0 +1,3 @@ +source src/tests/build.tests +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/tests/build.tests b/src/tests/build.tests new file mode 100644 index 0000000..8275e6b --- /dev/null +++ b/src/tests/build.tests @@ -0,0 +1,43 @@ +# These are external variables, and shouldn't clash with anything else +# tests_BUILT +# + +build_target libutf8++ || return 1 + +if [ -z ${tests_BUILT} ] +then + LIBS="${libutf8pp} " + EXTRAS="" + + echo "Building test programs..." + do_cmd mkdir -p obj/tests || return 1 + + for SRC in src/tests/*.cpp + do + TEST="obj/tests/$(basename ${SRC} | sed -e 's,.cpp$,,')" + MODIFIED=0 + for file in ${LIBS} ${SRC} src/tests/build.tests + do + if [ ${file} -nt ${TEST} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + do_cmd ${CXX} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1 + print_success "Built ${TEST}" + else + print_success "${TEST} is up to date" + fi + done + + print_success "All tests built" + + tests_BUILT=1 +fi + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/tests/objects.cpp b/src/tests/objects.cpp new file mode 100644 index 0000000..b343534 --- /dev/null +++ b/src/tests/objects.cpp @@ -0,0 +1,85 @@ +/* libutf8++/src/tests/objects.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "utf8" +#include +#include +#include +#include + + + +void make_random(wchar_t* buf, int ch) +{ + int fd = open("/dev/urandom", O_RDONLY); + if(fd < 0) { + perror("open(\"/dev/urandom\")"); + throw 1; + } + ch *= sizeof(wchar_t); + if(read(fd, (char*)buf, ch) != ch) { + perror("read(\"/dev/urandom\")"); + throw 1; + } + close(fd); + + ch /= sizeof(wchar_t); + while(ch--) { + buf[ch] &= 0x7FFFFFFF; + } +} + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::cout << "Performs some tests on the Encoder and Decoder objects.\n"; + return 0; + } + + int ret = 0; + try { + wchar_t wch[1024]; + make_random(wch, 1024); + + std::wstring ustr; + ustr.assign(wch, 1024); + + utf8::Encoder encoder; + utf8::Decoder decoder; + + encoder.encode(ustr); + decoder.decode(encoder.encoded); + + if(ustr != decoder.decoded) { + std::cerr << "Decoded string does not match original.\n"; + for(size_t i = 0, end = std::min(ustr.size(), decoder.decoded.size()); i != end; ++i) { + if(ustr[i] != decoder.decoded[i]) { + std::cerr << std::dec << std::setfill(' ') << std::setw(4) << i + << std::setfill('0') << std::hex << ": 0x" + << std::setw(8) << ustr[i] << " != " + << std::setw(8) << decoder.decoded[i] << "\n"; + } + } + std::cerr << "Original size " << std::dec << ustr.size() + << ", decoded size " << decoder.decoded.size() << std::endl; + return 1; + } + + std::cout << "Success.\n"; + } + catch(utf8::Error& e) { + std::cerr << e.reason << std::endl; + ret = 1; + } + + return ret; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/tests/strings.cpp b/src/tests/strings.cpp new file mode 100644 index 0000000..e38ba8a --- /dev/null +++ b/src/tests/strings.cpp @@ -0,0 +1,82 @@ +/* libutf8++/src/tests/strings.cpp + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "utf8" +#include +#include +#include +#include + + + +void make_random(wchar_t* buf, int ch) +{ + int fd = open("/dev/urandom", O_RDONLY); + if(fd < 0) { + perror("open(\"/dev/urandom\")"); + throw 1; + } + ch *= sizeof(wchar_t); + if(read(fd, (char*)buf, ch) != ch) { + perror("read(\"/dev/urandom\")"); + throw 1; + } + close(fd); + + ch /= sizeof(wchar_t); + while(ch--) { + buf[ch] &= 0x7FFFFFFF; + } +} + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::cout << "Performs some tests on the string encode/decode routines.\n"; + return 0; + } + + int ret = 0; + try { + wchar_t wch[1024]; + make_random(wch, 1024); + + std::wstring ustr1, ustr2; + std::string utf8; + ustr1.assign(wch, 1024); + utf8 = utf8::encode(ustr1); + ustr2 = utf8::decode(utf8); + + if(ustr1 != ustr2) { + std::cerr << "Decoded string does not match original.\n"; + for(size_t i = 0, end = std::min(ustr1.size(), ustr2.size()); i != end; ++i) { + if(ustr1[i] != ustr2[i]) { + std::cerr << std::dec << std::setfill(' ') << std::setw(4) << i + << std::setfill('0') << std::hex << ": 0x" + << std::setw(8) << ustr1[i] << " != " + << std::setw(8) << ustr2[i] << "\n"; + } + } + std::cerr << "Original size " << std::dec << ustr1.size() + << ", decoded size " << ustr2.size() << std::endl; + return 1; + } + + std::cout << "Success.\n"; + } + catch(utf8::Error& e) { + std::cerr << e.reason << std::endl; + ret = 1; + } + + return ret; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +*/ diff --git a/src/tests/template b/src/tests/template new file mode 100644 index 0000000..7a05226 --- /dev/null +++ b/src/tests/template @@ -0,0 +1,44 @@ +/* libutf8++/src/tests/???.cpp + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#include "utf8" + +#include + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::cout << "One line summary.\n"; + return 0; + } + + if(argc == 1) { + // empty argument list + } + + int ret = 0; + try { + // TODO + } + catch(std::exception& e) { + std::cerr << e.what() << std::endl; + ret = 1; + } + catch(...) { + std::cerr << "Unknown exception caught." << std::endl; + ret = 1; + } + + return ret; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/