From 7610e244f439ecff2f327b54902ecc6594ceb968 Mon Sep 17 00:00:00 2001 From: Laurence Withers Date: Mon, 31 Jul 2006 17:21:55 +0100 Subject: [PATCH] Import old subversion tree. --- src/docs/.params | 1 + src/docs/Doxyfile.in | 146 +++++++ src/docs/MainPage.dox | 47 +++ src/docs/build.default | 1 + src/docs/build.docs | 43 ++ src/docs/build.install | 1 + src/docs/build.install-docs | 21 + src/libStreamedXML/.params | 1 + src/libStreamedXML/BottomHeader.h | 7 + src/libStreamedXML/Callback.h | 197 +++++++++ src/libStreamedXML/Decoder.cpp | 64 +++ src/libStreamedXML/Decoder.h | 72 ++++ src/libStreamedXML/Exceptions.cpp | 61 +++ src/libStreamedXML/Exceptions.h | 97 +++++ src/libStreamedXML/ForwardDeclare.h | 24 ++ src/libStreamedXML/Parser.cpp | 585 +++++++++++++++++++++++++++ src/libStreamedXML/Parser.h | 223 ++++++++++ src/libStreamedXML/TopHeader.h | 16 + src/libStreamedXML/TopSource.cpp | 10 + src/libStreamedXML/build.default | 1 + src/libStreamedXML/build.install | 1 + src/libStreamedXML/build.install-lib | 36 ++ src/libStreamedXML/build.lib | 51 +++ src/libStreamedXML/build.monolithic | 21 + src/libStreamedXML/pkgconf.in | 21 + src/libStreamedXML/soversion | 17 + src/tests/.params | 1 + src/tests/Callback.cpp | 110 +++++ src/tests/Structure.cpp | 479 ++++++++++++++++++++++ src/tests/UTF-8.cpp | 120 ++++++ src/tests/build.default | 3 + src/tests/build.tests | 43 ++ src/tests/template | 44 ++ 33 files changed, 2565 insertions(+) create mode 100644 src/docs/.params create mode 100644 src/docs/Doxyfile.in create mode 100644 src/docs/MainPage.dox create mode 100644 src/docs/build.default create mode 100644 src/docs/build.docs create mode 100644 src/docs/build.install create mode 100644 src/docs/build.install-docs create mode 100644 src/libStreamedXML/.params create mode 100644 src/libStreamedXML/BottomHeader.h create mode 100644 src/libStreamedXML/Callback.h create mode 100644 src/libStreamedXML/Decoder.cpp create mode 100644 src/libStreamedXML/Decoder.h create mode 100644 src/libStreamedXML/Exceptions.cpp create mode 100644 src/libStreamedXML/Exceptions.h create mode 100644 src/libStreamedXML/ForwardDeclare.h create mode 100644 src/libStreamedXML/Parser.cpp create mode 100644 src/libStreamedXML/Parser.h create mode 100644 src/libStreamedXML/TopHeader.h create mode 100644 src/libStreamedXML/TopSource.cpp create mode 100644 src/libStreamedXML/build.default create mode 100644 src/libStreamedXML/build.install create mode 100644 src/libStreamedXML/build.install-lib create mode 100644 src/libStreamedXML/build.lib create mode 100644 src/libStreamedXML/build.monolithic create mode 100644 src/libStreamedXML/pkgconf.in create mode 100644 src/libStreamedXML/soversion create mode 100644 src/tests/.params create mode 100644 src/tests/Callback.cpp create mode 100644 src/tests/Structure.cpp create mode 100644 src/tests/UTF-8.cpp create mode 100644 src/tests/build.default create mode 100644 src/tests/build.tests create mode 100644 src/tests/template diff --git a/src/docs/.params b/src/docs/.params new file mode 100644 index 0000000..efd9ae0 --- /dev/null +++ b/src/docs/.params @@ -0,0 +1 @@ +doxygen docs docs diff --git a/src/docs/Doxyfile.in b/src/docs/Doxyfile.in new file mode 100644 index 0000000..956a79e --- /dev/null +++ b/src/docs/Doxyfile.in @@ -0,0 +1,146 @@ +# libStreamedXML/src/docs/Doxyfile.in +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + +PROJECT_NAME = libStreamedXML +OUTPUT_DIRECTORY = +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = YES +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = YES +DETAILS_AT_TOP = YES +INHERIT_DOCS = YES +DISTRIBUTE_GROUP_DOC = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +SUBGROUPING = YES +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = NO +EXTRACT_LOCAL_METHODS = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = YES +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = NO +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +QUIET = YES +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = YES +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +FILE_PATTERNS = +RECURSIVE = NO +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = src/docs +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +VERBATIM_HEADERS = NO +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +GENERATE_LATEX = NO +GENERATE_RTF = NO +GENERATE_MAN = NO +GENERATE_XML = NO +GENERATE_AUTOGEN_DEF = NO +GENERATE_PERLMOD = NO +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = DOXYGEN +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = NO +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = NO +INCLUDED_BY_GRAPH = NO +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = NO +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = YES +DOT_MULTI_TARGETS = YES +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +SEARCHENGINE = NO diff --git a/src/docs/MainPage.dox b/src/docs/MainPage.dox new file mode 100644 index 0000000..3c8f41e --- /dev/null +++ b/src/docs/MainPage.dox @@ -0,0 +1,47 @@ +/* libStreamedXML/src/docs/MainPage.dox + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +/*! \mainpage + +libStreamedXML is a C++ implementation of a + +Streamed XML processor. It is written using an efficient +state-machine and outputs UCS-4 data using the built-in std::wstring +type. It is written to accept an arbitrary byte stream, which may be +interrupted and restarted at any point, as input. + +libStreamedXML uses a three-layer design. The lowest layer is the +character decoder layer, which takes raw bytes as input and outputs +UCS-4 characters. Its output is pushed to the middle layer, which is the +Streamed XML parser. This layer interprets character data into XML +structures, and pushes its results onto the top layer, which is the +callback layer. This layered design allows for flexibility and a +conciseness of both design and implementation. + +The callback layer is implemented in the lsx::Callback class. You +simply override the functions in a derived class to achieve the +behaviour you want. The callback layer is only ever presented +well-formed Streamed XML data; errors are handled at the other end of +the stack. + +The parser layer is implemented in the lsx::Parser class. You will have +to instantiate this class, but you should never have to derive from it, +since its functionality is fixed to the standard. + +The character decoder layer is implemented in the lsx::Decoder class. +However, since the lsx::Parser class has no direct link to it, there is +no need to use this class (or any class at all). You can supply the +input in whatever way you see fit. The lsx::Decoder class provides a +UTF-8 decoder and allows exceptions to propagate out of its feedData() +method. + +*/ + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/docs/build.default b/src/docs/build.default new file mode 100644 index 0000000..ca22639 --- /dev/null +++ b/src/docs/build.default @@ -0,0 +1 @@ +source src/docs/build.docs diff --git a/src/docs/build.docs b/src/docs/build.docs new file mode 100644 index 0000000..653c323 --- /dev/null +++ b/src/docs/build.docs @@ -0,0 +1,43 @@ +# These are external variables, and shouldn't clash with anything else +# docs_BUILT +# + +MONOLITHIC_DOC="${MONOLITHIC_DOC} $(echo src/docs/*.dox)" +build_target monolithic + +if [ -z ${docs_BUILT} ] +then + echo "Building documentation with Doxygen..." + + DOXYFILE=obj/Doxyfile.docs + + if [ ! -e ${DOXYFILE} ] + then + do_cmd cp src/docs/Doxyfile.in ${DOXYFILE} || return 1 + echo "INPUT = ${MONOLITHIC_DOC}" >> ${DOXYFILE} + echo "PROJECT_NUMBER = ${VERSION}" >> ${DOXYFILE} + fi + + MODIFIED=0 + for file in ${MONOLITHIC_DOC} + do + if [ ${file} -nt html/index.html ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + do_cmd doxygen ${DOXYFILE} || return 1 + print_success "Documentation built" + else + print_success "Documentation is up to date" + fi + + docs_BUILT=1 +fi + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/docs/build.install b/src/docs/build.install new file mode 100644 index 0000000..016c75c --- /dev/null +++ b/src/docs/build.install @@ -0,0 +1 @@ +source src/docs/build.install-docs diff --git a/src/docs/build.install-docs b/src/docs/build.install-docs new file mode 100644 index 0000000..66167d3 --- /dev/null +++ b/src/docs/build.install-docs @@ -0,0 +1,21 @@ +build_target docs + +# create documentation directories +echo "Installing documentation into ${DOCSDIR}" +build_dir_tree "${DOCSDIR}/html" || return 1 + +# copy across the Doxygen-generated documentation +for file in html/* +do + install_file ${file} ${DOCSDIR}/html 0644 || return 1 +done + +# copy across the generic files +for file in COPYING README +do + install_file ${file} ${DOCSDIR} 0644 || return 1 +done + +print_success "Documentation installed" +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libStreamedXML/.params b/src/libStreamedXML/.params new file mode 100644 index 0000000..f31d8c7 --- /dev/null +++ b/src/libStreamedXML/.params @@ -0,0 +1 @@ +c++ lib libStreamedXML StreamedXML diff --git a/src/libStreamedXML/BottomHeader.h b/src/libStreamedXML/BottomHeader.h new file mode 100644 index 0000000..4bbc273 --- /dev/null +++ b/src/libStreamedXML/BottomHeader.h @@ -0,0 +1,7 @@ +/* libStreamedXML/src/lib/BottomHeader.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#endif diff --git a/src/libStreamedXML/Callback.h b/src/libStreamedXML/Callback.h new file mode 100644 index 0000000..78f2add --- /dev/null +++ b/src/libStreamedXML/Callback.h @@ -0,0 +1,197 @@ +/* libStreamedXML/src/lib/Core/Callback.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +/*! \brief This object is the callback used in lsx::Parser. + +The functions in this callback may be overridden to handle "events" +encountered when parsing Streamed XML. For instance, hitting an open +tag will generate an element() event. Only well-formed XML will ever +get through to the callback object. + +Default implementations are provided for all the functions. These +implementations do nothing, so you are free to implement only those +functions that you need. + +You may throw any exceptions you like from the callback handlers; these +will filter through the parser layer and reach the character decoder +layer. + +*/ +class Callback { +public: + /// Destructor (does nothing). + virtual ~Callback() + { } + + + + /*! \brief Stream restart marker. + + \param data Data held in the marker (may be empty). + + This function is called whenever a stream restart marker is + encountered. This means that, if you are keeping track of the parsed + data in any way, you should now reset that state (e.g. you should + clear a stack of which element you are currently in). + + */ + virtual void streamRestart(const std::wstring& data) + { + (void)data; + } + + + + /*! \brief Comment. + + \param data Data held in the comment (may be empty). + + This function is called whenever a comment is encountered. It is + mainly used if you want to transform the input in some ways, but + leave the comments intact. + + */ + virtual void comment(const std::wstring& data) + { + (void)data; + } + + + + /*! \brief Processing instruction. + + \param target The processing instruction's target. + \param data Any data following the target (may be empty). + + This function is called whenever a PI is encountered. It includes + both the target and any data following that target. + + */ + virtual void PI(const std::wstring& target, const std::wstring& data) + { + (void)target; + (void)data; + } + + + + /*! \brief Element start tag. + + \param elemName The element's name. + \param elemAttrs Any attributes the element may have (may be empty). + + This is called whenever an element start tag is encountered. It is + called with the element's name and attributes. If the element tag is + an empty tag, then closeTag() will be called immediately afterward. + + */ + virtual void element(const std::wstring& elemName, + const std::map& elemAttrs) + { + (void)elemName; + (void)elemAttrs; + } + + + + /*! \brief Element close tag. + + \param elemName The element's name. + + This is called whenever a close tag is encountered. It is also + generated after the element() event if an empty element tag is + encountered. + + */ + virtual void closeTag(const std::wstring& elemName) + { + (void)elemName; + } + + + + /*! \brief Ignorable whitespace. + + \param ws The actual whitespace encountered. + + This is called whenever whitespace is encountered between element + tags, but there is no actual content. It is included simply because + you may wish to transform the input in some ways, but keep the + whitespace the same. + + If an element does contain content but is surrounded by whitespace, + this function will not be called: the whitespace will be included + in the reported content. + + */ + virtual void whiteSpace(const std::wstring& ws) + { + (void)ws; + } + + + + /*! \brief Element content. + + \param data The element's content. + + This function is called whenever non-whitespace content is + encountered within an element. Any whitespace within the content (or + at the start or end) is reported verbatim. + + */ + virtual void content(const std::wstring& data) + { + (void)data; + } + + + + /*! \brief Character data. + + \param data The character data. + + This is used to report a chunk of character data enclosed in a CDATA + marked section. This means it might contain the text "&", but + this shouldn't be expanded as an entity, so there would be no way of + disambiguating it from an entity reference if \a expandEntities is + \a false. + + The default behaviour is to pass this straight to content(). + + */ + virtual void cdata(const std::wstring& data) + { + content(data); + } + + + + /*! \brief Entity reference. + + \param name The name of the entity being referred to. + \returns The expanded text. + \throws UnknownEntity if the entity is unknown. + + This function is called whenever an entity reference is encountered, + and \a expandEntities is \a true in your parser. The five standard + entities will be expanded automatically, as will character entities, + but anything else will be passed to this function. + + */ + virtual std::wstring entityRef(const std::wstring& name) + { + throw UnknownEntity(name); + } +}; + + + +}; diff --git a/src/libStreamedXML/Decoder.cpp b/src/libStreamedXML/Decoder.cpp new file mode 100644 index 0000000..2a0d6fb --- /dev/null +++ b/src/libStreamedXML/Decoder.cpp @@ -0,0 +1,64 @@ +/* libStreamedXML/src/lib/Core/Decoder.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +const char* Decoder::sxmlRestartSequence = "streamRestart(); + decoder->reset(); + bytes += pos; + amount -= pos; + goto restart; + } + } else { + sxmlRestartCount = 0; + } + } + + decoder->decoded.clear(); + decoder->decode(bytes, amount); + parser->feedCharData(decoder->decoded); +} + + + +void Decoder::restart() +{ + decoder->reset(); + sxmlRestartCount = 0; +} + + + +} diff --git a/src/libStreamedXML/Decoder.h b/src/libStreamedXML/Decoder.h new file mode 100644 index 0000000..6f220bb --- /dev/null +++ b/src/libStreamedXML/Decoder.h @@ -0,0 +1,72 @@ +/* libStreamedXML/src/lib/Core/Decoder.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +/*! \brief Basic UTF-8 character decoder. + +This is a simple implementation of the character decoder layer. It takes +in a pure UTF-8 byte stream and outputs characters to the parser layer. +Any exceptions thrown from the above layers will simply be propagated to +the caller of feedData(). + +The decoder is stateless with respect to the Streamed XML stream, but stateful with respect to the +UTF-8 byte stream. It can be reset with a call to restart(). + +*/ +class Decoder { +public: + /*! \brief Constructor. + + \param parser The Streamed XML parser which will receive decoded + characters. + \param decoder If you wish to use a more complex decoder, you may pass it here. If you pass 0, + the default UTF-8 decoder will be used. + + Sets up the finite state machine and records the Streamed XML parser + in use. Doesn't generate any data. + + On deletion, \a parser will not be informed or deleted, but \a decoder will be deleted. + + */ + Decoder(Parser* parser, utf8::Decoder* decoder = 0); + + /// Destructor (frees \a decoder passed to ctor). + virtual ~Decoder(); + + + + /*! \brief Feed data to decoder. + + \param bytes The raw input bytes. + \param amount The number of input bytes (may be zero). + \throws (anything) Any exceptions from the Parser layer. + \throws utf8::Error on UTF-8 decoding errors. + + This function is called whenever raw input data becomes available. + It decodes a UTF-8 byte stream into characters, which it then passes + to the Parser layer. + + */ + virtual void feedData(const char* bytes, size_t amount); + + + + /// Restart encoder if it is pointed at a new byte stream, etc. + void restart(); + +private: + Parser* parser; + utf8::Decoder* decoder; + static const char* sxmlRestartSequence; + int sxmlRestartCount; +}; + + + +} diff --git a/src/libStreamedXML/Exceptions.cpp b/src/libStreamedXML/Exceptions.cpp new file mode 100644 index 0000000..68c079b --- /dev/null +++ b/src/libStreamedXML/Exceptions.cpp @@ -0,0 +1,61 @@ +/* libStreamedXML/src/lib/Core/Exceptions.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +Exception::Exception(const std::wstring& reason) + : reason(reason) +{ +} + + + +const char* Exception::what() +{ + if(utf8Reason.empty()) utf8Reason = utf8::encode(reason, true); + return utf8Reason.c_str(); +} + + + +NotWellFormed::NotWellFormed(const std::wstring& error) + : Exception(format(error)), error(error) +{ +} + + + +std::wstring NotWellFormed::format(const std::wstring& error) +{ + std::wostringstream ost; + ost << L"Streamed XML is not well formed:\n" + << error; + return ost.str(); +} + + + +UnknownEntity::UnknownEntity(const std::wstring& name) + : Exception(format(name)), name(name) +{ +} + + + +std::wstring UnknownEntity::format(const std::wstring& name) +{ + std::wostringstream ost; + ost << L"Encountered an unknown entity named '" + << name + << L"' in the Streamed XML."; + return ost.str(); +} + + + +} diff --git a/src/libStreamedXML/Exceptions.h b/src/libStreamedXML/Exceptions.h new file mode 100644 index 0000000..398c51d --- /dev/null +++ b/src/libStreamedXML/Exceptions.h @@ -0,0 +1,97 @@ +/* libStreamedXML/src/lib/Core/Exceptions.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +/*! \brief Exception base class. + +This class is the base class of all exceptions. It simply provides a mechanism for storing a +message; more specific details must be stored in base classes. + +*/ +class Exception { +public: + /*! \brief Constructor. + + \param reason Reason for the exception. + + The constructor simply stores the reason for the exception, which may be later accessed for + reporting to the user. + + */ + Exception(const std::wstring& reason); + + /// Destructor. + virtual ~Exception() throw() + { } + + /// Find what caused the error. + virtual const char* what(); + + /// Reason for the exception. + const std::wstring reason; + +private: + std::string utf8Reason; +}; + + + +/// Thrown when XML is not well formed. +class NotWellFormed : public Exception { +private: + static std::wstring format(const std::wstring& error); + +public: + /*! \brief Constructor. + + \param error Reason for the error. + + The constructor will store the reason for the error. It will also prepare a suitable message + for the Exception base class. + + */ + NotWellFormed(const std::wstring& error); + + /// Destructor. + virtual ~NotWellFormed() throw() + { } + + /// Reason for the error. + const std::wstring error; +}; + + + +/// Thrown when an unknown entity is referred to. +class UnknownEntity : public Exception { +private: + static std::wstring format(const std::wstring& name); + +public: + /*! \brief Constructor. + + \param name Name of the unknown entity. + + The constructor will store the name of the unknown entity. It will also prepare a suitable + message for the Exception base class. + + */ + UnknownEntity(const std::wstring& name); + + /// Destructor. + virtual ~UnknownEntity() throw() + { } + + /// Name of unknown entity. + const std::wstring name; +}; + + + +} diff --git a/src/libStreamedXML/ForwardDeclare.h b/src/libStreamedXML/ForwardDeclare.h new file mode 100644 index 0000000..d3ad008 --- /dev/null +++ b/src/libStreamedXML/ForwardDeclare.h @@ -0,0 +1,24 @@ +/* libStreamedXML/src/lib/ForwardDeclare.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +// This file simply contains forward declarations of all libStreamedXML +// classes, to facilitate header ordering, etc. + +/// The libStreamedXML classes all go into this namespace. +namespace lsx { + + + +class Callback; +class Decoder; +class Exception; +class NotWellFormed; +class Parser; +class UnknownEntity; + + + +} diff --git a/src/libStreamedXML/Parser.cpp b/src/libStreamedXML/Parser.cpp new file mode 100644 index 0000000..f6efd56 --- /dev/null +++ b/src/libStreamedXML/Parser.cpp @@ -0,0 +1,585 @@ +/* libStreamedXML/src/lib/Core/Parser.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +//#define DEBUG_STATE_MACHINE + +#ifdef DEBUG_STATE_MACHINE +#include +#endif + +namespace lsx { + + + +const wchar_t* Parser::xmlCdataMarker = L"= 0x00 && ch <= 0x08) || + (ch >= 0x0B && ch <= 0x1F) || + (ch >= 0x7F && ch <= 0x9F) + ) { + ERROR(L"Restricted character encountered."); + } + c = ClassOther; + } + + skipNextNewline = false; +doBuffer: + // deal with char appropriately, according to state + switch(state) { + case StateError: + return; + + case StateNone: + switch(c) { + case ClassWhitespace: + buffer += ch; + break; + + case ClassOpenTag: + if(!buffer.empty()) callback->whiteSpace(buffer); + state = StateOpen; + buffer.clear(); + break; + + case ClassEntity: + if(expandEntities) { + if(!elementDepth) ERROR(L"Entities cannot appear at stream level."); + if(!buffer.empty()) callback->whiteSpace(buffer); + buffer.clear(); + parsingAttr = false; + state = StateEntity; + break; + } + + // fall through + default: + if(!elementDepth) ERROR(L"Content cannot appear at stream level."); + if(!buffer.empty()) callback->whiteSpace(buffer); + state = StateData; + buffer = ch; + break; + } + break; + + case StateData: + switch(c) { + case ClassOpenTag: + callback->content(buffer); + buffer.clear(); + state = StateOpen; + break; + + case ClassEntity: + callback->content(buffer); + buffer.clear(); + parsingAttr = false; + state = StateEntity; + break; + + default: + buffer += ch; + break; + } + break; + + case StateCDATA: + if(ch == L']') state = StateCDATA1; + else buffer += ch; + break; + + case StateCDATA1: + if(ch == L']') state = StateCDATA2; + else { + buffer += L']'; + buffer += ch; + state = StateCDATA; + } + break; + + case StateCDATA2: + if(ch == L'>') { + callback->cdata(buffer); + buffer.clear(); + state = StateNone; + } else if(ch == L']') { + buffer += ch; + } else { + buffer += L"]]"; + buffer += ch; + state = StateCDATA; + } + break; + + case StateRestartMarker: + if(ch == L']') state = StateRestartMarker1; + else buffer += ch; + break; + + case StateRestartMarker1: + if(ch == L']') state = StateRestartMarker2; + else { + buffer += L']'; + buffer += ch; + state = StateRestartMarker; + } + break; + + case StateRestartMarker2: + if(ch == L'>') { + callback->streamRestart(buffer); + buffer.clear(); + reset(); + } else if(ch == L']') { + buffer += L']'; + break; + } else { + buffer += L"]]"; + buffer += ch; + state = StateRestartMarker; + } + break; + + case StateOpen: + switch(c) { + case ClassNameStartChar: + state = StateElemName; + elemAttrs.clear(); + buffer = ch; + break; + + default: + if(ch == L'!') state = StateOpenBang; + else if(ch == L'?') { + state = StatePI; + buffer2.clear(); + } else if(ch == L'/') { + if(!elementDepth) ERROR(L"Encountered a close tag at stream level."); + state = StateClose; + } else ERROR(L"Invalid start character for element."); + break; + } + break; + + case StatePI: + if(ch == L'?') state = StatePI2; + else if(c == ClassWhitespace) { + state = StatePIData; + buffer.clear(); + } else buffer2 += ch; + break; + + case StatePI2: + if(ch != L'>') ERROR(L"Invalid target for PI"); + else { + callback->PI(buffer2, L""); + buffer.clear(); + state = StateNone; + } + break; + + case StatePIData: + if(ch == L'?') state = StatePI3; + else buffer += ch; + break; + + case StatePI3: + if(ch == L'>') { + callback->PI(buffer2, buffer); + buffer.clear(); + state = StateNone; + } else { + buffer += L'?'; + buffer += ch; + } + break; + + case StateOpenBang: + if(ch == L'[') { + // restart markers handled by lower layer + state = StateOpenCdataMarker; + xmlCount = 3; + if(!elementDepth) ERROR(L"CDATA sections not valid at stream level."); + } else if(ch == L'-') state = StateOpenComment; + else ERROR(L"Invalid special tag."); + break; + + case StateOpenCdataMarker: + if(ch != xmlCdataMarker[xmlCount]) ERROR(L"Invalid marked section."); + if(!xmlCdataMarker[++xmlCount]) state = StateCDATA; + break; + + case StateOpenComment: + if(ch != L'-') ERROR(L"Invalid special tag."); + state = StateComment; + buffer.clear(); + break; + + case StateComment: + if(ch == L'-') state = StateComment2; + else buffer += ch; + break; + + case StateComment2: + if(ch == L'-') state = StateComment3; + else { + buffer += L'-'; + buffer += ch; + } + break; + + case StateComment3: + if(ch != L'>') ERROR(L"`--' not valid in comments"); + callback->comment(buffer); + buffer.clear(); + state = StateNone; + break; + + case StateElemName: + switch(c) { + case ClassWhitespace: + state = StateElemTag; + elemName = buffer; + buffer.clear(); + break; + + case ClassNameStartChar: + case ClassNameChar: + buffer += ch; + break; + + default: + switch(ch) { + case L'>': + elemStack.push_back(buffer); + callback->element(buffer, elemAttrs); + state = StateNone; + ++elementDepth; + buffer.clear(); + break; + + case L'/': + state = StateNeedClose; + break; + + default: + ERROR(L"Invalid character in tag name."); + } + } + break; + + case StateElemTag: + switch(c) { + case ClassWhitespace: + break; + + case ClassNameStartChar: + state = StateElemAttrName; + buffer = ch; + break; + + default: + switch(ch) { + case L'>': + elemStack.push_back(elemName); + callback->element(elemName, elemAttrs); + buffer.clear(); + state = StateNone; + ++elementDepth; + break; + + case L'/': + state = StateNeedClose; + break; + + default: + ERROR(L"Invalid character in tag."); + } + } + break; + + case StateElemAttrName: + switch(c) { + case ClassNameStartChar: + case ClassNameChar: + buffer += ch; + break; + + default: + if(ch != L'=') ERROR(L"Invalid character in attribute name."); + state = StateElemAttrEq; + buffer2 = buffer; + buffer.clear(); + break; + } + break; + + case StateElemAttrEq: + if(ch == L'\'') singleQuote = true; + else if(ch == L'"') singleQuote = false; + else ERROR(L"Invalid character in attribute."); + state = StateElemAttrVal; + break; + + case StateElemAttrVal: + if((singleQuote && ch == L'\'') || (!singleQuote && ch == L'"')) { + elemAttrs[buffer2] = buffer; + buffer.clear(); + state = StateElemAttrDone; + } else if(expandEntities && ch == L'&') { + buffer3 = buffer; + buffer.clear(); + parsingAttr = true; + state = StateEntity; + } else buffer += ch; + break; + + case StateElemAttrDone: + switch(c) { + case ClassWhitespace: + state = StateElemTag; + break; + + default: + if(ch == L'/') { + state = StateNeedClose; + } else if(ch == L'>') { + callback->element(elemName, elemAttrs); + elemStack.push_back(elemName); + buffer.clear(); + state = StateNone; + ++elementDepth; + } else ERROR(L"Invalid character after attribute."); + break; + } + break; + + case StateNeedClose: + if(ch != L'>') ERROR(L"Stray `/' in open tag."); + callback->element(elemName, elemAttrs); + callback->closeTag(elemName); + buffer.clear(); + state = StateNone; + break; + + case StateClose: + if(c != ClassNameStartChar) ERROR(L"Invalid character in close tag name."); + buffer = ch; + state = StateClosing; + break; + + case StateClosing: + switch(c) { + case ClassNameStartChar: + case ClassNameChar: + buffer += ch; + break; + + case ClassWhitespace: + state = StateNeedClose2; + break; + + default: + if(ch != L'>') ERROR(L"Invalid character in close tag name."); + if(elemStack.back() != buffer) ERROR(L"Mismatched close tag."); + elemStack.pop_back(); + callback->closeTag(buffer); + buffer.clear(); + state = StateNone; + --elementDepth; + } + break; + + case StateNeedClose2: + if(c == ClassWhitespace) break; + if(ch != L'>') ERROR(L"Invalid data in close tag."); + if(elemStack.back() != elemName) ERROR(L"Mismatched close tag."); + elemStack.pop_back(); + callback->closeTag(elemName); + buffer.clear(); + state = StateNone; + --elementDepth; + break; + + case StateEntity: + if(ch == L'#') { + state = StateCharEntity; + entityChar = 0; + } else if(c == ClassNameStartChar) { + buffer = ch; + state = StateEntityName; + } else ERROR(L"Invalid entity name."); + break; + + case StateCharEntity: + if(ch == ';') { + if(parsingAttr) { + buffer = buffer3 + entityChar; + state = StateElemAttrVal; + break; + } + + buffer = entityChar; + state = StateData; + break; + + } else if(ch >= L'0' && ch <= L'9') { + if(entityChar > 214748364 || (entityChar == 214748364 && ch >= L'8')) + ERROR(L"Character code too large in character entity."); + entityChar *= 10; + entityChar += (ch - L'0'); + } else ERROR(L"Invalid character in character entity."); + break; + + case StateEntityName: + if(ch == L';') { + if(parsingAttr) { + buffer = buffer3 + entityRef(buffer); + state = StateElemAttrVal; + break; + } + + buffer = entityRef(buffer); + state = StateData; + break; + } + if(c != ClassNameChar && c != ClassNameStartChar) ERROR(L"Invalid entity name."); + buffer += ch; + break; + } +#undef ERROR +} + + + +std::wstring Parser::entityRef(const std::wstring& ent) +{ + if(ent == L"quot") return L"\""; + if(ent == L"amp") return L"&"; + if(ent == L"apos") return L"'"; + if(ent == L"lt") return L"<"; + if(ent == L"gt") return L">"; + return callback->entityRef(ent); +} + + + +} diff --git a/src/libStreamedXML/Parser.h b/src/libStreamedXML/Parser.h new file mode 100644 index 0000000..78f1493 --- /dev/null +++ b/src/libStreamedXML/Parser.h @@ -0,0 +1,223 @@ +/* libStreamedXML/src/lib/Core/Parser.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +namespace lsx { + + + +/*! \brief Streamed XML parser object. + +This object sits between the callback layer (see lsx::Callback) and the +character decoding layer. It is a finite state machine which parses +character data as input and gives Streamed XML events as output. The +FSM design gives both speed and robustness. FSMs are good for dealing +with true data streams since their behaviour can easily be controlled to +avoid becoming unstable, and also because it is easy to provide "reset" +functionality (as in the case of a stream restart marker). + +Data must be fed into the parser through either of the feedChar() or +feedCharData() functions. +Any stream restarts detected by the character decoding layer can be +signalled through the streamRestart() function. The parser will accept +stream reset markers, but only if they occur at stream level; if the +data stream became corrupted (e.g. the end of a comment tag was missed), +the Parser class would not be looking for stream restart markers and +would thus ignore them. + +A parser is tied to a single lsx::Callback object, which cannot be +changed. You could potentially get around this by providing a +multiplexing Callback object. As many streams as you like may provide +input to a Parser, but this would only make sense if you synchronised +their inputs in some manner. Similarly, as many Parser objects as you +like may share the same callback, but again some form of synchronisation +would be needed. + +Should an error occur in processing the XML (i.e. it is not well +formed), a NotWellFormed exception will be thrown. It is also possible +for a callback function to throw any exception, which will propagate +through to the character decoding layer. If this occurs, the character +decoding layer should switch into a state where it searches for a +stream restart marker, and not pass any more data to the Parser until it +encounters one. At this point, the streamRestart() function should be +called and processing can continue. + +*/ +class Parser { +private: + //BEGIN // Enumerations for finite state machine /////////////////// + enum State { + StateNone, // at element or stream level + StateRestartMarker, // after elemStack; + std::wstring elemName; + std::map elemAttrs; + + // parsing state + State state; + std::wstring buffer, buffer2, buffer3; + wchar_t entityChar; + bool singleQuote; + bool skipNextNewline; + int elementDepth, xmlCount, restartCount; + +public: + /*! \brief Constructor. + + \param callback The callback object to use. + \param expandEntities \a true if you want entities to be expanded, + \a false to keep them inline. + + This sets up the finite state machine and records the callback + object to use. It doesn't generate any events. + + */ + Parser(Callback* callback, bool expandEntities); + + + + /*! \brief Feed parser a character. + + \param ch The character to parse. + \throws NotWellFormed If the Streamed XML is not well formed. + \throws (anything) Anything from the callback object. + + This function will cause the finite state machine to process an + input character. It may generate some callback events. It will + throw a NotWellFormed exception should it encounter any Streamed XML + structure that is not well formed; it will also propagate any + exceptions thrown from the higher callback layer. + + */ + void feedChar(wchar_t ch); + + + + /*! \brief Feed parser some character data. + + \param data A pointer to the buffer of data. + \param amount The number of characters to parse (may be zero). + \throws NotWellFormed If the Streamed XML is not well formed. + \throws (anything) Anything from the callback object. + + This function will cause the finite state machine to process some + input characters. It may generate some callback events. It will + throw a NotWellFormed exception should it encounter any Streamed XML + structure that is not well formed; it will also propagate any + exceptions thrown from the higher callback layer. + + */ + inline void feedCharData(const wchar_t* data, size_t amount) + { + for(size_t pos = 0; pos < amount; ++pos) feedChar(data[pos]); + } + + + + /*! \brief Feed parser some character data. + + \param dataStr The data, as a string. + \throws NotWellFormed If the Streamed XML is not well formed. + \throws (anything) Anything from the callback object. + + This function will cause the finite state machine to process some + input characters. It may generate some callback events. It will + throw a NotWellFormed exception should it encounter any Streamed XML + structure that is not well formed; it will also propagate any + exceptions thrown from the higher callback layer. + + */ + inline void feedCharData(const std::wstring& dataStr) + { + std::wstring::size_type pos = 0, len = dataStr.size(); + for(; pos < len; ++pos) feedChar(dataStr[pos]); + } + + + + /*! \brief Reset the stream. + + This function should be called to reset the stream. This could be due to e.g. a new file or a + restart marker. + + */ + void reset(); + + + + /*! \brief Restart marker detected. + + This function should be called when a stream restart marker ( + \c <![RESTART[ ) has been detected. The next character of + data to be passed to the parser should be the first character of + restart marker data (or a \c ']', if there is no data). + + This function will cause the parser to generate a streamRestart() + callback. + + */ + void streamRestart(); +}; + + + +} diff --git a/src/libStreamedXML/TopHeader.h b/src/libStreamedXML/TopHeader.h new file mode 100644 index 0000000..837f7c2 --- /dev/null +++ b/src/libStreamedXML/TopHeader.h @@ -0,0 +1,16 @@ +/* libStreamedXML/src/lib/TopHeader.h + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#ifndef HEADER_libStreamedXML +#define HEADER_libStreamedXML + +// standard includes, or includes needed for type declarations +#include +#include +#include +#include +#include +#include diff --git a/src/libStreamedXML/TopSource.cpp b/src/libStreamedXML/TopSource.cpp new file mode 100644 index 0000000..2ef09e7 --- /dev/null +++ b/src/libStreamedXML/TopSource.cpp @@ -0,0 +1,10 @@ +/* libStreamedXML/src/lib/TopSource.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "StreamedXML" + +// Below are all the includes used throughout the library. + diff --git a/src/libStreamedXML/build.default b/src/libStreamedXML/build.default new file mode 100644 index 0000000..405b226 --- /dev/null +++ b/src/libStreamedXML/build.default @@ -0,0 +1 @@ +source src/libStreamedXML/build.lib diff --git a/src/libStreamedXML/build.install b/src/libStreamedXML/build.install new file mode 100644 index 0000000..8207b3d --- /dev/null +++ b/src/libStreamedXML/build.install @@ -0,0 +1 @@ +source src/libStreamedXML/build.install-lib diff --git a/src/libStreamedXML/build.install-lib b/src/libStreamedXML/build.install-lib new file mode 100644 index 0000000..729126c --- /dev/null +++ b/src/libStreamedXML/build.install-lib @@ -0,0 +1,36 @@ +build_target libStreamedXML + +# make paths (this is for Gentoo in particular) +build_dir_tree "${LIBDIR}" || return 1 +build_dir_tree "${PKGCONFDIR}" || return 1 +build_dir_tree "${INCLUDEDIR}" || return 1 + +# install library +echo "Installing libraries into '${LIBDIR}'" +install_file ${libStreamedXML} ${LIBDIR} 0755 || return 1 +BASE="${libStreamedXML_BASE}.so" +MAJOR="${BASE}.${SOMAJOR}" +MINOR="${MAJOR}.${SOMINOR}" +MICRO="${MINOR}.${SOMICRO}" +install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}" +install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}" +install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}" + +# install header +echo "Installing header file '${libStreamedXML_HEADER}' into ${INCLUDEDIR}" +install_header ${libStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1 + +# install pkgconfig file +echo "Installing package config file into ${PKGCONFDIR}" +PKGCONFFILE=${PKGCONFDIR}/libStreamedXML.pc +do_cmd rm -f ${PKGCONFFILE} +do_cmd_redir ${PKGCONFFILE} sed \ + -e "s,@VERSION@,${VERSION}," \ + -e "s,@LIBDIR@,${FINALLIBDIR}," \ + -e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \ + src/libStreamedXML/pkgconf.in +do_cmd chmod 0644 ${PKGCONFFILE} +print_success "Done" + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libStreamedXML/build.lib b/src/libStreamedXML/build.lib new file mode 100644 index 0000000..b685b96 --- /dev/null +++ b/src/libStreamedXML/build.lib @@ -0,0 +1,51 @@ +# These are external variables, and shouldn't clash with anything else +# libStreamedXML +# libStreamedXML_BUILT +# libStreamedXML_HEADER +# libStreamedXML_BASE + +if [ -z ${libStreamedXML_BUILT} ] +then + libStreamedXML_BASE=libStreamedXML + source src/libStreamedXML/soversion + + libStreamedXML="obj/${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}" + SO_EXTRA="$(pkg-config libutf8++ --libs --cflags) -lstdc++ -lc" + + echo "Building library ${libStreamedXML}..." + + do_cmd source src/libStreamedXML/build.monolithic || return 1 + + MODIFIED=0 + for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC} + do + if [ ${test} -nt ${libStreamedXML} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + echo " Compiling" + + SONAME="${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}" + do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libStreamedXML}" \ + -Wl,-soname,${SONAME} \ + ${SRC} ${SO_EXTRA} || return 1 + + # make tests work + do_cmd ln -sf $(basename ${libStreamedXML}) obj/${SONAME} || return 1 + + print_success "Library built" + else + print_success "Library up to date" + fi + + libStreamedXML_BUILT=1 + libStreamedXML_HEADER=${HDR} + +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libStreamedXML/build.monolithic b/src/libStreamedXML/build.monolithic new file mode 100644 index 0000000..2a05e4b --- /dev/null +++ b/src/libStreamedXML/build.monolithic @@ -0,0 +1,21 @@ +# These are external variables, and shouldn't clash with anything else +# libStreamedXML_MONOLITHIC + +SRC="obj/libStreamedXML.cpp" +HDR="obj/StreamedXML" + +MONOLITHIC_TESTS="src/libStreamedXML/build.lib src/libStreamedXML/build.monolithic" + +if [ -z "${libStreamedXML_MONOLITHIC}" ] +then + MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopHeader,ForwardDeclare,Exceptions,Callback,Parser,Decoder,BottomHeader}.h)" + make_monolithic ${HDR} C || return 1 + + MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopSource,Exceptions,Parser,Decoder}.cpp)" + make_monolithic ${SRC} C || return 1 + + libStreamedXML_MONOLITHIC=1 + MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}" +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libStreamedXML/pkgconf.in b/src/libStreamedXML/pkgconf.in new file mode 100644 index 0000000..a9b1553 --- /dev/null +++ b/src/libStreamedXML/pkgconf.in @@ -0,0 +1,21 @@ +# libStreamedXML/src/lib/libStreamedXML/pkgconf.in +# +# Metadata file for pkg-config +# ( http://www.freedesktop.org/software/pkgconfig/ ) +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + +# Name, description +Name: libStreamedXML +Description: C++ Streamed XML parser +Version: @VERSION@ + +# Requirements +Requires: libutf8++ + +# Compilation information +Libs: -L@LIBDIR@ -lStreamedXML +Cflags: -I@INCLUDEDIR@ diff --git a/src/libStreamedXML/soversion b/src/libStreamedXML/soversion new file mode 100644 index 0000000..b706c89 --- /dev/null +++ b/src/libStreamedXML/soversion @@ -0,0 +1,17 @@ +# libStreamedXML/src/libStreamedXML/soversion +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + + + +# SOMAJOR and SOMINOR are included in the library's soname. They need to +# be bumped on a binary-incompatible release. They are both single +# integers. +SOMAJOR=0 +SOMINOR=0 + +# SOMICRO is bumped every time there is a binary-compatible release. +SOMICRO=0 diff --git a/src/tests/.params b/src/tests/.params new file mode 100644 index 0000000..fe0fee1 --- /dev/null +++ b/src/tests/.params @@ -0,0 +1 @@ +c++ tests tests libStreamedXML diff --git a/src/tests/Callback.cpp b/src/tests/Callback.cpp new file mode 100644 index 0000000..10c830f --- /dev/null +++ b/src/tests/Callback.cpp @@ -0,0 +1,110 @@ +/* libStreamedXML/src/tests/Callback.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "StreamedXML" +#include +#include + + + +class PrintCallback : public lsx::Callback { +public: + virtual void comment(const std::wstring& data) + { + std::wcout << L"Callback: comment: ``" << data << L"''.\n"; + } + + virtual void PI(const std::wstring& target, const std::wstring& data) + { + std::wcout << L"Callback: PI: ``" << target << L"'': ``" + << data << L"''.\n"; + } + + virtual void element(const std::wstring& elemName, + const std::map& attrs) + { + std::wcout << L"Callback: element: ``" << elemName << "''\n"; + for(std::map::const_iterator i = attrs.begin(), + end = attrs.end(); i != end; ++i) + { + std::wcout << L"Callback: attribute: " + << i->first << L"=``" << i->second << L"''\n"; + } + std::wcout << L"Callback: end of attributes.\n"; + } + + virtual void closeTag(const std::wstring& elemName) + { + std::wcout << L"Callback: close element: ``" << elemName << "''.\n"; + } + + virtual void whiteSpace(const std::wstring& space) + { + (void)space; + std::wcout << L"Callback: whitespace.\n"; + } + + virtual void content(const std::wstring& data) + { + std::wcout << L"Callback: content: ``" << data << L"''.\n"; + } + + virtual void cdata(const std::wstring& data) + { + std::wcout << L"Callback: CDATA: ``" << data << "''.\n"; + } + + virtual std::wstring entityRef(const std::wstring& name) + { + std::wcout << L"Callback: entity: ``" << name + << L"'' (returning as data).\n"; + return name; + } +}; + + + +//END // Parser class ////////////////////////////////////////////////// + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::wcout << L"Tests the callback functions.\n"; + return 0; + } + + if(argc != 2) { + std::wcerr << L"Expecting name of XML file.\n"; + return 1; + } + + std::ifstream fin(argv[1]); + if(!fin) { + std::wcerr << L"Couldn't open file for input.\n"; + return 1; + } + + int ret = 0; + try { + PrintCallback printCallback; + lsx::Parser xmlParser(&printCallback, true); + lsx::Decoder xmlDecoder(&xmlParser); + char data[1024]; + + while(!fin.eof()) { + fin.read(data, sizeof(data)); + xmlDecoder.feedData(data, fin.gcount()); + } + } + catch(std::exception& e) { + std::cerr << e.what(); + ret = 1; + } + + return ret; +} diff --git a/src/tests/Structure.cpp b/src/tests/Structure.cpp new file mode 100644 index 0000000..6da6bf4 --- /dev/null +++ b/src/tests/Structure.cpp @@ -0,0 +1,479 @@ +/* libStreamedXML/src/tests/Structure.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "StreamedXML" + +#include +#include + + + +class TestFailed { }; + + + +//BEGIN // Test results //////////////////////////////////////////////// + + + +#define DYN_CAST_CHECK(_type, _name) \ + TestResult* r = getExpected( _name ); \ + _type * rr = dynamic_cast< _type *>(r); \ + if(!rr) { \ + std::wcerr << L"Expected " << r->name() << L", got " _name L".\n"; \ + delete r; \ + throw TestFailed(); \ + } + + + +class TestResult { +public: + virtual ~TestResult() { } + virtual std::wstring name() const = 0; +}; + +class TestResultException : public TestResult { +public: + virtual std::wstring name() const { return L"exception"; } +}; + +class TestResultStreamRestart : public TestResult { +public: + std::wstring data; + TestResultStreamRestart(const std::wstring& data) : data(data) { } + bool verify(const std::wstring& data) const { return data == this->data; } + virtual std::wstring name() const { return L"stream restart marker"; } +}; + +class TestResultComment : public TestResult { +public: + std::wstring data; + TestResultComment(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"comment"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultCloseTag : public TestResult { +public: + std::wstring data; + TestResultCloseTag(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"close tag"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultContent : public TestResult { +public: + std::wstring data; + TestResultContent(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"content"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultWhiteSpace : public TestResult { +public: + std::wstring data; + TestResultWhiteSpace(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"whitespace"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultCDATA : public TestResult { +public: + std::wstring data; + TestResultCDATA(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"CDATA marker"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultEntityRef : public TestResult { +public: + std::wstring data; + TestResultEntityRef(const std::wstring& data) : data(data) { } + virtual std::wstring name() const { return L"entity reference"; } + + void verify(const std::wstring& data) const + { + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultPI : public TestResult { +public: + std::wstring target, data; + TestResultPI(const std::wstring& target, const std::wstring& data) + : target(target), data(data) { } + virtual std::wstring name() const { return L"processing instruction"; } + + void verify(const std::wstring& target, const std::wstring& data) const + { + if(target != this->target) { + std::wcerr << name() << L": expecting ``" << this->target + << L"'', got ``" << target << L"''.\n"; + throw TestFailed(); + } + if(data != this->data) { + std::wcerr << name() << L": expecting ``" << this->data + << L"'', got ``" << data << L"''.\n"; + throw TestFailed(); + } + } +}; + +class TestResultElement : public TestResult { +public: + std::wstring ename; + std::map attrs; + TestResultElement(const std::wstring& ename) : ename(ename) { } + virtual std::wstring name() const { return L"open tag"; } + + void verify(const std::wstring& xname, const std::map& xattrs) + { + if(ename != xname) { + std::wcerr << name() << L": expecting ``" << ename + << L"'', got ``" << xname << L"''.\n"; + throw TestFailed(); + } + std::map::const_iterator i1, i2, end1, end2; + i1 = attrs.begin(); end1 = attrs.end(); + i2 = xattrs.begin(); end2 = xattrs.end(); + + for(; (i1 != end1) && (i2 != end2); ++i1, ++i2) { + if(i1->first != i2->first) { + std::wcerr << name() << L": attribute name: expecting ``" << i1->first + << L"'', got ``" << i2->first << L"''.\n"; + throw TestFailed(); + } + if(i1->second != i2->second) { + std::wcerr << name() << L": attribute value: expecting ``" << i1->second + << L"'', got ``" << i2->second << L"''.\n"; + throw TestFailed(); + } + } + + if((i1 != end1) || (i2 != end2)) { + std::wcerr << name() << L": attributes don't match.\n"; + throw TestFailed(); + } + } +}; + + + +//END // Test results ////////////////////////////////////////////////// +//BEGIN // Test parser ///////////////////////////////////////////////// + + + +class TestParser : private lsx::Callback { +private: + std::list expected; + TestResult* getExpected(const std::wstring& dataType) + { + if(expected.empty()) { + std::wcerr << L"Further data (" << dataType + << L") encountered after end of test.\n"; + throw TestFailed(); + } + TestResult* r = expected.front(); + expected.pop_front(); + return r; + } + + virtual void streamRestart(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultStreamRestart, L"stream restart marker") + rr->verify(data); + } + + virtual void comment(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultComment, L"comment") + rr->verify(data); + } + + virtual void closeTag(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultCloseTag, L"close tag") + rr->verify(data); + } + + virtual void whiteSpace(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultWhiteSpace, L"whitespace") + rr->verify(data); + } + + virtual void content(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultContent, L"Content") + rr->verify(data); + } + + virtual void cdata(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultCDATA, L"CDATA marker") + rr->verify(data); + } + + virtual std::wstring entityRef(const std::wstring& data) + { + DYN_CAST_CHECK(TestResultEntityRef, L"entity reference") + rr->verify(data); + return data; + } + + virtual void PI(const std::wstring& target, const std::wstring& data) + { + DYN_CAST_CHECK(TestResultPI, L"processing instruction") + rr->verify(target, data); + } + + virtual void element(const std::wstring& name, const std::map& attrs) + { + DYN_CAST_CHECK(TestResultElement, L"open tag") + rr->verify(name, attrs); + } + + // XML parser objects + lsx::Parser xmlParser; + lsx::Decoder xmlDecoder; + +public: + TestParser() + : xmlParser(this, true), xmlDecoder(&xmlParser) + { + } + + virtual ~TestParser() + { + while(!expected.empty()) { + delete expected.front(); + expected.pop_front(); + } + } + + void finish() + { + if(!expected.empty()) { + std::wcerr << L"Expecting some more data (" + << expected.front()->name() + << L").\n"; + throw TestFailed(); + } + } + + void expect(TestResult* r) + { + expected.push_back(r); + } + + virtual void feedData(const char* bytes, size_t amt) + { + while(amt) { + try { + xmlDecoder.feedData(bytes, std::min(amt, size_t(6))); + } + catch(lsx::Exception& e) { + DYN_CAST_CHECK(TestResultException, L"exception") + delete r; + } + amt -= std::min(amt, size_t(6)); + bytes += 6; + } + } +}; + + + +//END // Test parser /////////////////////////////////////////////////// +//BEGIN // Tests /////////////////////////////////////////////////////// + + + +void test1(TestParser* p) +{ + std::wcout << L"Test 1: " << std::flush; + + p->expect(new TestResultPI(L"targ", L" data data ")); + p->expect(new TestResultWhiteSpace(L"\n")); + p->expect(new TestResultComment(L" comment ")); + p->expect(new TestResultWhiteSpace(L"\n")); + p->expect(new TestResultElement(L"elemOne")); + p->expect(new TestResultWhiteSpace(L" ")); + p->expect(new TestResultContent(L"with added content\n")); + p->expect(new TestResultException()); + p->expect(new TestResultStreamRestart(L"data]>]]")); + p->expect(new TestResultWhiteSpace(L"\n")); + p->expect(new TestResultElement(L"elemTwo")); + p->expect(new TestResultWhiteSpace(L"\n")); + p->expect(new TestResultCloseTag(L"elemTwo")); + + std::string td = "\n" + "\n" + " with added content\n" + "]]]]>\n" + "\n" + "\n"; + p->feedData(td.data(), td.size()); + p->finish(); + + std::wcout << L"OK.\n"; +} + + + +void test2(TestParser* p) +{ + std::wcout << L"Test 2: " << std::flush; + + p->expect(new TestResultElement(L"reading")); + p->expect(new TestResultElement(L"type")); + p->expect(new TestResultContent(L"integer")); + p->expect(new TestResultCloseTag(L"type")); + p->expect(new TestResultElement(L"value")); + p->expect(new TestResultContent(L"0")); + p->expect(new TestResultCloseTag(L"value")); + p->expect(new TestResultCloseTag(L"reading")); + p->expect(new TestResultElement(L"reading")); + p->expect(new TestResultElement(L"type")); + p->expect(new TestResultContent(L"integer")); + p->expect(new TestResultCloseTag(L"type")); + p->expect(new TestResultElement(L"value")); + p->expect(new TestResultContent(L"1")); + p->expect(new TestResultCloseTag(L"value")); + p->expect(new TestResultCloseTag(L"reading")); + + std::string td = "integer0" + "integer1"; + p->feedData(td.data(), td.size()); + p->finish(); + + std::wcout << L"OK.\n"; +} + + + +void test3(TestParser* p) +{ + std::wcout << L"Test 3: " << std::flush; + + TestResultElement* el = new TestResultElement(L"elemName"); + el->attrs[L"attr1"] = L"\"value1\""; + el->attrs[L"attr2"] = L"'value2'"; + el->attrs[L"attr3"] = L"\"<&>'"; + p->expect(el); + + p->expect(new TestResultContent(L"<")); + p->expect(new TestResultContent(L"&")); + p->expect(new TestResultContent(L">")); + p->expect(new TestResultEntityRef(L"myEntity")); + p->expect(new TestResultContent(L"myEntity\n ")); + p->expect(new TestResultCDATA(L"<&>]]]]")); + p->expect(new TestResultWhiteSpace(L"\n")); + p->expect(new TestResultCloseTag(L"elemName")); + + std::string td = "<&>&myEntity;\n" + " ]]]]]]>\n" + "\n"; + p->feedData(td.data(), td.size()); + p->finish(); + + std::wcout << L"OK.\n"; +} + + + +//END // Tests ///////////////////////////////////////////////////////// + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::wcout << L"XML structure regression test.\n"; + return 0; + } + + if(argc != 1) { + std::wcerr << L"No arguments expected.\n"; + return 1; + } + + int ret = 0; + TestParser* p = 0; + +#define TEST_n(_n) \ + try { \ + std::wcout << "======== Test " #_n " ========" << std::endl; \ + p = new TestParser(); \ + test ## _n (p); \ + delete p; \ + p = 0; \ + } \ + catch(TestFailed&) { \ + std::wcerr << "Test failed." << std::endl; \ + ret = 1; \ + } \ + catch(std::exception& e) { \ + std::cerr << e.what() << std::endl; \ + ret = 1; \ + } \ + std::wcout << std::endl; + + TEST_n(1); + TEST_n(2); + TEST_n(3); + + delete p; + return ret; +} diff --git a/src/tests/UTF-8.cpp b/src/tests/UTF-8.cpp new file mode 100644 index 0000000..98c0a9b --- /dev/null +++ b/src/tests/UTF-8.cpp @@ -0,0 +1,120 @@ +/* libStreamedXML/src/tests/UTF-8.cpp + * + * (c)2005, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +#include "StreamedXML" +#include +#include + + + +class TestFailed { }; + + + +class MySimpleParser : private lsx::Callback { +private: + lsx::Parser xmlParser; + lsx::Decoder xmlDecoder; + +public: + MySimpleParser() + : xmlParser(this, true), xmlDecoder(&xmlParser) + { + } + + void feedBadData(const char* bytes, size_t amount) + { + try { + xmlDecoder.feedData(bytes, amount); + } + catch(utf8::Error&) { + return; + } + + throw TestFailed(); + } + + virtual void feedData(const char* bytes, size_t amount) + { + try { + xmlDecoder.feedData(bytes, amount); + } + catch(std::exception& e) { + std::wcerr << e.what() << std::endl; + throw TestFailed(); + } + } +}; + + + +char restartMarker[] = ""; +char okData[] = "" + "\x41\xE2\x89\xA2\xCE\x91\x2E" + "\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4" + "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E"; + +char badDataOverlong[] = "" + "\xC0\xB0"; // ascii '0', overlong +char badDataMalformed0[] = "" + "\xFF"; // never appears +char badDataMalformed2[] = "" + "\x8F"; // lone continuation char +char badDataMalformed3[] = "" + "\xC1\x3F"; // missing continuation char + +struct TestData { + wchar_t* desc; + char* str; + size_t len; +}; + +#define P(D, Q) { D, Q, sizeof( Q ) - 1 } +TestData testData[] = { + P(L"Overlong character encoding", badDataOverlong), + P(L"Illegal octet (0xFF)", badDataMalformed0), + P(L"Lone continuation char", badDataMalformed2), + P(L"Missing continuation char", badDataMalformed3), + { 0, 0, 0 } +}; + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::wcout << L"Regression test for the UTF-8 decoder.\n"; + return 0; + } + + if(argc != 1) { + std::wcout << L"No arguments expected.\n"; + return 1; + } + + int ret = 0; + try { + MySimpleParser p; + + std::wcout << L"Checking OK data: " << std::flush; + p.feedData(okData, sizeof(okData) - 1); + std::wcout << L"success" << std::endl; + + for(TestData* test = testData; test->desc; ++test) { + std::wcout << L"Checking bad data (" << test->desc + << L"): " << std::flush; + p.feedBadData(test->str, test->len); + std::wcout << "success" << std::endl; + p.feedData(restartMarker, sizeof(restartMarker) - 1); + } + } + catch(TestFailed&) { + std::wcerr << L"Test failed.\n"; + ret = 1; + } + + return ret; +} diff --git a/src/tests/build.default b/src/tests/build.default new file mode 100644 index 0000000..2d979e2 --- /dev/null +++ b/src/tests/build.default @@ -0,0 +1,3 @@ +source src/tests/build.tests +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/tests/build.tests b/src/tests/build.tests new file mode 100644 index 0000000..c86206b --- /dev/null +++ b/src/tests/build.tests @@ -0,0 +1,43 @@ +# These are external variables, and shouldn't clash with anything else +# tests_BUILT +# + +build_target libStreamedXML || return 1 + +if [ -z ${tests_BUILT} ] +then + LIBS="${libStreamedXML} " + EXTRAS="" + + echo "Building test programs..." + do_cmd mkdir -p obj/tests || return 1 + + for SRC in src/tests/*.cpp + do + TEST="obj/tests/$(basename ${SRC} | sed -e 's,.cpp$,,')" + MODIFIED=0 + for file in ${LIBS} ${SRC} src/tests/build.tests + do + if [ ${file} -nt ${TEST} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + do_cmd ${CXX} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1 + print_success "Built ${TEST}" + else + print_success "${TEST} is up to date" + fi + done + + print_success "All tests built" + + tests_BUILT=1 +fi + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/tests/template b/src/tests/template new file mode 100644 index 0000000..1f4f61b --- /dev/null +++ b/src/tests/template @@ -0,0 +1,44 @@ +/* libStreamedXML/src/tests/???.cpp + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#include "StreamedXML" + +#include + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + std::cout << "One line summary.\n"; + return 0; + } + + if(argc == 1) { + // empty argument list + } + + int ret = 0; + try { + // TODO + } + catch(std::exception& e) { + std::cerr << e.what() << std::endl; + ret = 1; + } + catch(...) { + std::cerr << "Unknown exception caught." << std::endl; + ret = 1; + } + + return ret; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/