Import old subversion tree.

This commit is contained in:
Laurence Withers 2006-07-31 17:21:55 +01:00
parent 4976710df9
commit 7610e244f4
33 changed files with 2565 additions and 0 deletions

1
src/docs/.params Normal file
View File

@ -0,0 +1 @@
doxygen docs docs

146
src/docs/Doxyfile.in Normal file
View File

@ -0,0 +1,146 @@
# libStreamedXML/src/docs/Doxyfile.in
#
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
# Released under the GNU GPLv2. See file COPYING or
# http://www.gnu.org/copyleft/gpl.html for details.
#
PROJECT_NAME = libStreamedXML
OUTPUT_DIRECTORY =
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = YES
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = YES
DETAILS_AT_TOP = YES
INHERIT_DOCS = YES
DISTRIBUTE_GROUP_DOC = NO
TAB_SIZE = 4
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
SUBGROUPING = YES
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = NO
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = YES
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = NO
SHOW_DIRECTORIES = NO
FILE_VERSION_FILTER =
QUIET = YES
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = YES
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH = src/docs
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
VERBATIM_HEADERS = NO
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
GENERATE_LATEX = NO
GENERATE_RTF = NO
GENERATE_MAN = NO
GENERATE_XML = NO
GENERATE_AUTOGEN_DEF = NO
GENERATE_PERLMOD = NO
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = DOXYGEN
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = YES
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = NO
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = NO
INCLUDED_BY_GRAPH = NO
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = NO
DOT_IMAGE_FORMAT = png
DOT_PATH =
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = YES
DOT_MULTI_TARGETS = YES
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
SEARCHENGINE = NO

47
src/docs/MainPage.dox Normal file
View File

@ -0,0 +1,47 @@
/* libStreamedXML/src/docs/MainPage.dox
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
/*! \mainpage
libStreamedXML is a C++ implementation of a
<a href="http://www.lwithers.me.uk/projects/StreamedXML/">
Streamed XML</a> processor. It is written using an efficient
state-machine and outputs UCS-4 data using the built-in std::wstring
type. It is written to accept an arbitrary byte stream, which may be
interrupted and restarted at any point, as input.
libStreamedXML uses a three-layer design. The lowest layer is the
character decoder layer, which takes raw bytes as input and outputs
UCS-4 characters. Its output is pushed to the middle layer, which is the
Streamed XML parser. This layer interprets character data into XML
structures, and pushes its results onto the top layer, which is the
callback layer. This layered design allows for flexibility and a
conciseness of both design and implementation.
The callback layer is implemented in the lsx::Callback class. You
simply override the functions in a derived class to achieve the
behaviour you want. The callback layer is only ever presented
well-formed Streamed XML data; errors are handled at the other end of
the stack.
The parser layer is implemented in the lsx::Parser class. You will have
to instantiate this class, but you should never have to derive from it,
since its functionality is fixed to the standard.
The character decoder layer is implemented in the lsx::Decoder class.
However, since the lsx::Parser class has no direct link to it, there is
no need to use this class (or any class at all). You can supply the
input in whatever way you see fit. The lsx::Decoder class provides a
UTF-8 decoder and allows exceptions to propagate out of its feedData()
method.
*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

1
src/docs/build.default Normal file
View File

@ -0,0 +1 @@
source src/docs/build.docs

43
src/docs/build.docs Normal file
View File

@ -0,0 +1,43 @@
# These are external variables, and shouldn't clash with anything else
# docs_BUILT
#
MONOLITHIC_DOC="${MONOLITHIC_DOC} $(echo src/docs/*.dox)"
build_target monolithic
if [ -z ${docs_BUILT} ]
then
echo "Building documentation with Doxygen..."
DOXYFILE=obj/Doxyfile.docs
if [ ! -e ${DOXYFILE} ]
then
do_cmd cp src/docs/Doxyfile.in ${DOXYFILE} || return 1
echo "INPUT = ${MONOLITHIC_DOC}" >> ${DOXYFILE}
echo "PROJECT_NUMBER = ${VERSION}" >> ${DOXYFILE}
fi
MODIFIED=0
for file in ${MONOLITHIC_DOC}
do
if [ ${file} -nt html/index.html ]
then
MODIFIED=1
break
fi
done
if [ ${MODIFIED} -ne 0 ]
then
do_cmd doxygen ${DOXYFILE} || return 1
print_success "Documentation built"
else
print_success "Documentation is up to date"
fi
docs_BUILT=1
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

1
src/docs/build.install Normal file
View File

@ -0,0 +1 @@
source src/docs/build.install-docs

View File

@ -0,0 +1,21 @@
build_target docs
# create documentation directories
echo "Installing documentation into ${DOCSDIR}"
build_dir_tree "${DOCSDIR}/html" || return 1
# copy across the Doxygen-generated documentation
for file in html/*
do
install_file ${file} ${DOCSDIR}/html 0644 || return 1
done
# copy across the generic files
for file in COPYING README
do
install_file ${file} ${DOCSDIR} 0644 || return 1
done
print_success "Documentation installed"
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1 @@
c++ lib libStreamedXML StreamedXML

View File

@ -0,0 +1,7 @@
/* libStreamedXML/src/lib/BottomHeader.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#endif

View File

@ -0,0 +1,197 @@
/* libStreamedXML/src/lib/Core/Callback.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
/*! \brief This object is the callback used in lsx::Parser.
The functions in this callback may be overridden to handle "events"
encountered when parsing Streamed XML. For instance, hitting an open
tag will generate an element() event. Only well-formed XML will ever
get through to the callback object.
Default implementations are provided for all the functions. These
implementations do nothing, so you are free to implement only those
functions that you need.
You may throw any exceptions you like from the callback handlers; these
will filter through the parser layer and reach the character decoder
layer.
*/
class Callback {
public:
/// Destructor (does nothing).
virtual ~Callback()
{ }
/*! \brief Stream restart marker.
\param data Data held in the marker (may be empty).
This function is called whenever a stream restart marker is
encountered. This means that, if you are keeping track of the parsed
data in any way, you should now reset that state (e.g. you should
clear a stack of which element you are currently in).
*/
virtual void streamRestart(const std::wstring& data)
{
(void)data;
}
/*! \brief Comment.
\param data Data held in the comment (may be empty).
This function is called whenever a comment is encountered. It is
mainly used if you want to transform the input in some ways, but
leave the comments intact.
*/
virtual void comment(const std::wstring& data)
{
(void)data;
}
/*! \brief Processing instruction.
\param target The processing instruction's target.
\param data Any data following the target (may be empty).
This function is called whenever a PI is encountered. It includes
both the target and any data following that target.
*/
virtual void PI(const std::wstring& target, const std::wstring& data)
{
(void)target;
(void)data;
}
/*! \brief Element start tag.
\param elemName The element's name.
\param elemAttrs Any attributes the element may have (may be empty).
This is called whenever an element start tag is encountered. It is
called with the element's name and attributes. If the element tag is
an empty tag, then closeTag() will be called immediately afterward.
*/
virtual void element(const std::wstring& elemName,
const std::map<std::wstring, std::wstring>& elemAttrs)
{
(void)elemName;
(void)elemAttrs;
}
/*! \brief Element close tag.
\param elemName The element's name.
This is called whenever a close tag is encountered. It is also
generated after the element() event if an empty element tag is
encountered.
*/
virtual void closeTag(const std::wstring& elemName)
{
(void)elemName;
}
/*! \brief Ignorable whitespace.
\param ws The actual whitespace encountered.
This is called whenever whitespace is encountered between element
tags, but there is no actual content. It is included simply because
you may wish to transform the input in some ways, but keep the
whitespace the same.
If an element does contain content but is surrounded by whitespace,
this function will not be called: the whitespace will be included
in the reported content.
*/
virtual void whiteSpace(const std::wstring& ws)
{
(void)ws;
}
/*! \brief Element content.
\param data The element's content.
This function is called whenever non-whitespace content is
encountered within an element. Any whitespace within the content (or
at the start or end) is reported verbatim.
*/
virtual void content(const std::wstring& data)
{
(void)data;
}
/*! \brief Character data.
\param data The character data.
This is used to report a chunk of character data enclosed in a CDATA
marked section. This means it might contain the text "&amp;", but
this shouldn't be expanded as an entity, so there would be no way of
disambiguating it from an entity reference if \a expandEntities is
\a false.
The default behaviour is to pass this straight to content().
*/
virtual void cdata(const std::wstring& data)
{
content(data);
}
/*! \brief Entity reference.
\param name The name of the entity being referred to.
\returns The expanded text.
\throws UnknownEntity if the entity is unknown.
This function is called whenever an entity reference is encountered,
and \a expandEntities is \a true in your parser. The five standard
entities will be expanded automatically, as will character entities,
but anything else will be passed to this function.
*/
virtual std::wstring entityRef(const std::wstring& name)
{
throw UnknownEntity(name);
}
};
};

View File

@ -0,0 +1,64 @@
/* libStreamedXML/src/lib/Core/Decoder.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
const char* Decoder::sxmlRestartSequence = "<![RESTART[";
Decoder::Decoder(Parser* parser, utf8::Decoder* decoder)
: parser(parser), decoder(decoder ?: new utf8::Decoder), sxmlRestartCount(0)
{
}
Decoder::~Decoder()
{
delete decoder;
}
void Decoder::feedData(const char* bytes, size_t amount)
{
/* We parse through the data once looking for restart sequences. If we encounter one, then we
throw away all data leading up to it and continue from just after the marker. This can happen
more than once per chunk of data, and can also happen over chunk boundaries. */
restart:
for(size_t pos = 0; pos < amount; ++pos) {
if(bytes[pos] == sxmlRestartSequence[sxmlRestartCount]) {
if(++sxmlRestartCount == 11) {
parser->streamRestart();
decoder->reset();
bytes += pos;
amount -= pos;
goto restart;
}
} else {
sxmlRestartCount = 0;
}
}
decoder->decoded.clear();
decoder->decode(bytes, amount);
parser->feedCharData(decoder->decoded);
}
void Decoder::restart()
{
decoder->reset();
sxmlRestartCount = 0;
}
}

View File

@ -0,0 +1,72 @@
/* libStreamedXML/src/lib/Core/Decoder.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
/*! \brief Basic UTF-8 character decoder.
This is a simple implementation of the character decoder layer. It takes
in a pure UTF-8 byte stream and outputs characters to the parser layer.
Any exceptions thrown from the above layers will simply be propagated to
the caller of feedData().
The decoder is stateless with respect to the Streamed XML stream, but stateful with respect to the
UTF-8 byte stream. It can be reset with a call to restart().
*/
class Decoder {
public:
/*! \brief Constructor.
\param parser The Streamed XML parser which will receive decoded
characters.
\param decoder If you wish to use a more complex decoder, you may pass it here. If you pass 0,
the default UTF-8 decoder will be used.
Sets up the finite state machine and records the Streamed XML parser
in use. Doesn't generate any data.
On deletion, \a parser will not be informed or deleted, but \a decoder will be deleted.
*/
Decoder(Parser* parser, utf8::Decoder* decoder = 0);
/// Destructor (frees \a decoder passed to ctor).
virtual ~Decoder();
/*! \brief Feed data to decoder.
\param bytes The raw input bytes.
\param amount The number of input bytes (may be zero).
\throws (anything) Any exceptions from the Parser layer.
\throws utf8::Error on UTF-8 decoding errors.
This function is called whenever raw input data becomes available.
It decodes a UTF-8 byte stream into characters, which it then passes
to the Parser layer.
*/
virtual void feedData(const char* bytes, size_t amount);
/// Restart encoder if it is pointed at a new byte stream, etc.
void restart();
private:
Parser* parser;
utf8::Decoder* decoder;
static const char* sxmlRestartSequence;
int sxmlRestartCount;
};
}

View File

@ -0,0 +1,61 @@
/* libStreamedXML/src/lib/Core/Exceptions.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
Exception::Exception(const std::wstring& reason)
: reason(reason)
{
}
const char* Exception::what()
{
if(utf8Reason.empty()) utf8Reason = utf8::encode(reason, true);
return utf8Reason.c_str();
}
NotWellFormed::NotWellFormed(const std::wstring& error)
: Exception(format(error)), error(error)
{
}
std::wstring NotWellFormed::format(const std::wstring& error)
{
std::wostringstream ost;
ost << L"Streamed XML is not well formed:\n"
<< error;
return ost.str();
}
UnknownEntity::UnknownEntity(const std::wstring& name)
: Exception(format(name)), name(name)
{
}
std::wstring UnknownEntity::format(const std::wstring& name)
{
std::wostringstream ost;
ost << L"Encountered an unknown entity named '"
<< name
<< L"' in the Streamed XML.";
return ost.str();
}
}

View File

@ -0,0 +1,97 @@
/* libStreamedXML/src/lib/Core/Exceptions.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
/*! \brief Exception base class.
This class is the base class of all exceptions. It simply provides a mechanism for storing a
message; more specific details must be stored in base classes.
*/
class Exception {
public:
/*! \brief Constructor.
\param reason Reason for the exception.
The constructor simply stores the reason for the exception, which may be later accessed for
reporting to the user.
*/
Exception(const std::wstring& reason);
/// Destructor.
virtual ~Exception() throw()
{ }
/// Find what caused the error.
virtual const char* what();
/// Reason for the exception.
const std::wstring reason;
private:
std::string utf8Reason;
};
/// Thrown when XML is not well formed.
class NotWellFormed : public Exception {
private:
static std::wstring format(const std::wstring& error);
public:
/*! \brief Constructor.
\param error Reason for the error.
The constructor will store the reason for the error. It will also prepare a suitable message
for the Exception base class.
*/
NotWellFormed(const std::wstring& error);
/// Destructor.
virtual ~NotWellFormed() throw()
{ }
/// Reason for the error.
const std::wstring error;
};
/// Thrown when an unknown entity is referred to.
class UnknownEntity : public Exception {
private:
static std::wstring format(const std::wstring& name);
public:
/*! \brief Constructor.
\param name Name of the unknown entity.
The constructor will store the name of the unknown entity. It will also prepare a suitable
message for the Exception base class.
*/
UnknownEntity(const std::wstring& name);
/// Destructor.
virtual ~UnknownEntity() throw()
{ }
/// Name of unknown entity.
const std::wstring name;
};
}

View File

@ -0,0 +1,24 @@
/* libStreamedXML/src/lib/ForwardDeclare.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
// This file simply contains forward declarations of all libStreamedXML
// classes, to facilitate header ordering, etc.
/// The libStreamedXML classes all go into this namespace.
namespace lsx {
class Callback;
class Decoder;
class Exception;
class NotWellFormed;
class Parser;
class UnknownEntity;
}

View File

@ -0,0 +1,585 @@
/* libStreamedXML/src/lib/Core/Parser.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
//#define DEBUG_STATE_MACHINE
#ifdef DEBUG_STATE_MACHINE
#include <iostream>
#endif
namespace lsx {
const wchar_t* Parser::xmlCdataMarker = L"<![CDATA[";
const wchar_t* Parser::xmlRestartMarker = L"<![RESTART[";
Parser::Parser(Callback* callback, bool expandEntities)
: callback(callback), expandEntities(expandEntities)
{
reset();
}
void Parser::reset()
{
elemStack.clear();
elemName.clear();
elemAttrs.clear();
buffer.clear();
elementDepth = 0;
skipNextNewline = false;
state = StateNone;
restartCount = 0;
}
void Parser::streamRestart()
{
reset();
state = StateRestartMarker;
}
void Parser::feedChar(wchar_t ch)
{
#ifdef DEBUG_STATE_MACHINE
std::wcout << L"Character: ``" << ch << L"'' (Unicode 0x"
<< std::hex << ch << std::dec
<< L"), state " << state << L".\n";
#endif
#define ERROR(_reason) do { \
state = StateError; \
throw NotWellFormed(_reason); \
}while(0)
if(ch == xmlRestartMarker[restartCount]) {
if(++restartCount == 11) {
streamRestart();
return;
}
} else {
restartCount = 0;
}
TokenClass c;
switch(ch) {
case L'&':
c = ClassEntity;
break;
case L'<':
c = ClassOpenTag;
break;
case L'\r':
skipNextNewline = true;
ch = L'\n';
c = ClassWhitespace;
goto doBuffer;
case 0x2028:
ch = L'\n';
c = ClassWhitespace;
break;
case L' ':
case L'\t':
c = ClassWhitespace;
break;
case 0x85:
case L'\n':
if(skipNextNewline) return;
ch = L'\n';
c = ClassWhitespace;
break;
case L':':
case L'_':
case L'a' ... L'z':
case L'A' ... L'Z':
case 0xC0 ... 0xD6:
case 0xD8 ... 0xF6:
case 0xF8 ... 0x2FF:
case 0x370 ... 0x37D:
case 0x37F ... 0x1FFF:
case 0x200C ... 0x200D:
case 0x2070 ... 0x218F:
case 0x2C00 ... 0x2FEF:
case 0x3001 ... 0xD7FF:
case 0xF900 ... 0xFDCF:
case 0xFDF0 ... 0xFFFD:
case 0x10000 ... 0xEFFFF:
c = ClassNameStartChar;
break;
case L'-':
case L'.':
case L'0' ... L'9':
case 0xB7:
case 0x300 ... 0x36F:
case 0x203F ... 0x2040:
c = ClassNameChar;
break;
default:
if((ch >= 0x00 && ch <= 0x08) ||
(ch >= 0x0B && ch <= 0x1F) ||
(ch >= 0x7F && ch <= 0x9F)
) {
ERROR(L"Restricted character encountered.");
}
c = ClassOther;
}
skipNextNewline = false;
doBuffer:
// deal with char appropriately, according to state
switch(state) {
case StateError:
return;
case StateNone:
switch(c) {
case ClassWhitespace:
buffer += ch;
break;
case ClassOpenTag:
if(!buffer.empty()) callback->whiteSpace(buffer);
state = StateOpen;
buffer.clear();
break;
case ClassEntity:
if(expandEntities) {
if(!elementDepth) ERROR(L"Entities cannot appear at stream level.");
if(!buffer.empty()) callback->whiteSpace(buffer);
buffer.clear();
parsingAttr = false;
state = StateEntity;
break;
}
// fall through
default:
if(!elementDepth) ERROR(L"Content cannot appear at stream level.");
if(!buffer.empty()) callback->whiteSpace(buffer);
state = StateData;
buffer = ch;
break;
}
break;
case StateData:
switch(c) {
case ClassOpenTag:
callback->content(buffer);
buffer.clear();
state = StateOpen;
break;
case ClassEntity:
callback->content(buffer);
buffer.clear();
parsingAttr = false;
state = StateEntity;
break;
default:
buffer += ch;
break;
}
break;
case StateCDATA:
if(ch == L']') state = StateCDATA1;
else buffer += ch;
break;
case StateCDATA1:
if(ch == L']') state = StateCDATA2;
else {
buffer += L']';
buffer += ch;
state = StateCDATA;
}
break;
case StateCDATA2:
if(ch == L'>') {
callback->cdata(buffer);
buffer.clear();
state = StateNone;
} else if(ch == L']') {
buffer += ch;
} else {
buffer += L"]]";
buffer += ch;
state = StateCDATA;
}
break;
case StateRestartMarker:
if(ch == L']') state = StateRestartMarker1;
else buffer += ch;
break;
case StateRestartMarker1:
if(ch == L']') state = StateRestartMarker2;
else {
buffer += L']';
buffer += ch;
state = StateRestartMarker;
}
break;
case StateRestartMarker2:
if(ch == L'>') {
callback->streamRestart(buffer);
buffer.clear();
reset();
} else if(ch == L']') {
buffer += L']';
break;
} else {
buffer += L"]]";
buffer += ch;
state = StateRestartMarker;
}
break;
case StateOpen:
switch(c) {
case ClassNameStartChar:
state = StateElemName;
elemAttrs.clear();
buffer = ch;
break;
default:
if(ch == L'!') state = StateOpenBang;
else if(ch == L'?') {
state = StatePI;
buffer2.clear();
} else if(ch == L'/') {
if(!elementDepth) ERROR(L"Encountered a close tag at stream level.");
state = StateClose;
} else ERROR(L"Invalid start character for element.");
break;
}
break;
case StatePI:
if(ch == L'?') state = StatePI2;
else if(c == ClassWhitespace) {
state = StatePIData;
buffer.clear();
} else buffer2 += ch;
break;
case StatePI2:
if(ch != L'>') ERROR(L"Invalid target for PI");
else {
callback->PI(buffer2, L"");
buffer.clear();
state = StateNone;
}
break;
case StatePIData:
if(ch == L'?') state = StatePI3;
else buffer += ch;
break;
case StatePI3:
if(ch == L'>') {
callback->PI(buffer2, buffer);
buffer.clear();
state = StateNone;
} else {
buffer += L'?';
buffer += ch;
}
break;
case StateOpenBang:
if(ch == L'[') {
// restart markers handled by lower layer
state = StateOpenCdataMarker;
xmlCount = 3;
if(!elementDepth) ERROR(L"CDATA sections not valid at stream level.");
} else if(ch == L'-') state = StateOpenComment;
else ERROR(L"Invalid special tag.");
break;
case StateOpenCdataMarker:
if(ch != xmlCdataMarker[xmlCount]) ERROR(L"Invalid marked section.");
if(!xmlCdataMarker[++xmlCount]) state = StateCDATA;
break;
case StateOpenComment:
if(ch != L'-') ERROR(L"Invalid special tag.");
state = StateComment;
buffer.clear();
break;
case StateComment:
if(ch == L'-') state = StateComment2;
else buffer += ch;
break;
case StateComment2:
if(ch == L'-') state = StateComment3;
else {
buffer += L'-';
buffer += ch;
}
break;
case StateComment3:
if(ch != L'>') ERROR(L"`--' not valid in comments");
callback->comment(buffer);
buffer.clear();
state = StateNone;
break;
case StateElemName:
switch(c) {
case ClassWhitespace:
state = StateElemTag;
elemName = buffer;
buffer.clear();
break;
case ClassNameStartChar:
case ClassNameChar:
buffer += ch;
break;
default:
switch(ch) {
case L'>':
elemStack.push_back(buffer);
callback->element(buffer, elemAttrs);
state = StateNone;
++elementDepth;
buffer.clear();
break;
case L'/':
state = StateNeedClose;
break;
default:
ERROR(L"Invalid character in tag name.");
}
}
break;
case StateElemTag:
switch(c) {
case ClassWhitespace:
break;
case ClassNameStartChar:
state = StateElemAttrName;
buffer = ch;
break;
default:
switch(ch) {
case L'>':
elemStack.push_back(elemName);
callback->element(elemName, elemAttrs);
buffer.clear();
state = StateNone;
++elementDepth;
break;
case L'/':
state = StateNeedClose;
break;
default:
ERROR(L"Invalid character in tag.");
}
}
break;
case StateElemAttrName:
switch(c) {
case ClassNameStartChar:
case ClassNameChar:
buffer += ch;
break;
default:
if(ch != L'=') ERROR(L"Invalid character in attribute name.");
state = StateElemAttrEq;
buffer2 = buffer;
buffer.clear();
break;
}
break;
case StateElemAttrEq:
if(ch == L'\'') singleQuote = true;
else if(ch == L'"') singleQuote = false;
else ERROR(L"Invalid character in attribute.");
state = StateElemAttrVal;
break;
case StateElemAttrVal:
if((singleQuote && ch == L'\'') || (!singleQuote && ch == L'"')) {
elemAttrs[buffer2] = buffer;
buffer.clear();
state = StateElemAttrDone;
} else if(expandEntities && ch == L'&') {
buffer3 = buffer;
buffer.clear();
parsingAttr = true;
state = StateEntity;
} else buffer += ch;
break;
case StateElemAttrDone:
switch(c) {
case ClassWhitespace:
state = StateElemTag;
break;
default:
if(ch == L'/') {
state = StateNeedClose;
} else if(ch == L'>') {
callback->element(elemName, elemAttrs);
elemStack.push_back(elemName);
buffer.clear();
state = StateNone;
++elementDepth;
} else ERROR(L"Invalid character after attribute.");
break;
}
break;
case StateNeedClose:
if(ch != L'>') ERROR(L"Stray `/' in open tag.");
callback->element(elemName, elemAttrs);
callback->closeTag(elemName);
buffer.clear();
state = StateNone;
break;
case StateClose:
if(c != ClassNameStartChar) ERROR(L"Invalid character in close tag name.");
buffer = ch;
state = StateClosing;
break;
case StateClosing:
switch(c) {
case ClassNameStartChar:
case ClassNameChar:
buffer += ch;
break;
case ClassWhitespace:
state = StateNeedClose2;
break;
default:
if(ch != L'>') ERROR(L"Invalid character in close tag name.");
if(elemStack.back() != buffer) ERROR(L"Mismatched close tag.");
elemStack.pop_back();
callback->closeTag(buffer);
buffer.clear();
state = StateNone;
--elementDepth;
}
break;
case StateNeedClose2:
if(c == ClassWhitespace) break;
if(ch != L'>') ERROR(L"Invalid data in close tag.");
if(elemStack.back() != elemName) ERROR(L"Mismatched close tag.");
elemStack.pop_back();
callback->closeTag(elemName);
buffer.clear();
state = StateNone;
--elementDepth;
break;
case StateEntity:
if(ch == L'#') {
state = StateCharEntity;
entityChar = 0;
} else if(c == ClassNameStartChar) {
buffer = ch;
state = StateEntityName;
} else ERROR(L"Invalid entity name.");
break;
case StateCharEntity:
if(ch == ';') {
if(parsingAttr) {
buffer = buffer3 + entityChar;
state = StateElemAttrVal;
break;
}
buffer = entityChar;
state = StateData;
break;
} else if(ch >= L'0' && ch <= L'9') {
if(entityChar > 214748364 || (entityChar == 214748364 && ch >= L'8'))
ERROR(L"Character code too large in character entity.");
entityChar *= 10;
entityChar += (ch - L'0');
} else ERROR(L"Invalid character in character entity.");
break;
case StateEntityName:
if(ch == L';') {
if(parsingAttr) {
buffer = buffer3 + entityRef(buffer);
state = StateElemAttrVal;
break;
}
buffer = entityRef(buffer);
state = StateData;
break;
}
if(c != ClassNameChar && c != ClassNameStartChar) ERROR(L"Invalid entity name.");
buffer += ch;
break;
}
#undef ERROR
}
std::wstring Parser::entityRef(const std::wstring& ent)
{
if(ent == L"quot") return L"\"";
if(ent == L"amp") return L"&";
if(ent == L"apos") return L"'";
if(ent == L"lt") return L"<";
if(ent == L"gt") return L">";
return callback->entityRef(ent);
}
}

223
src/libStreamedXML/Parser.h Normal file
View File

@ -0,0 +1,223 @@
/* libStreamedXML/src/lib/Core/Parser.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
namespace lsx {
/*! \brief Streamed XML parser object.
This object sits between the callback layer (see lsx::Callback) and the
character decoding layer. It is a finite state machine which parses
character data as input and gives Streamed XML events as output. The
FSM design gives both speed and robustness. FSMs are good for dealing
with true data streams since their behaviour can easily be controlled to
avoid becoming unstable, and also because it is easy to provide "reset"
functionality (as in the case of a stream restart marker).
Data must be fed into the parser through either of the feedChar() or
feedCharData() functions.
Any stream restarts detected by the character decoding layer can be
signalled through the streamRestart() function. The parser will accept
stream reset markers, but only if they occur at stream level; if the
data stream became corrupted (e.g. the end of a comment tag was missed),
the Parser class would not be looking for stream restart markers and
would thus ignore them.
A parser is tied to a single lsx::Callback object, which cannot be
changed. You could potentially get around this by providing a
multiplexing Callback object. As many streams as you like may provide
input to a Parser, but this would only make sense if you synchronised
their inputs in some manner. Similarly, as many Parser objects as you
like may share the same callback, but again some form of synchronisation
would be needed.
Should an error occur in processing the XML (i.e. it is not well
formed), a NotWellFormed exception will be thrown. It is also possible
for a callback function to throw any exception, which will propagate
through to the character decoding layer. If this occurs, the character
decoding layer should switch into a state where it searches for a
stream restart marker, and not pass any more data to the Parser until it
encounters one. At this point, the streamRestart() function should be
called and processing can continue.
*/
class Parser {
private:
//BEGIN // Enumerations for finite state machine ///////////////////
enum State {
StateNone, // at element or stream level
StateRestartMarker, // after <![RESTART[
StateRestartMarker1, // after first ]
StateRestartMarker2, // after second ]
StateOpen, // after <
StateOpenBang, // after <!
StateOpenCdataMarker, // after <![
StateCDATA,
StateCDATA1, // first ]
StateCDATA2, // second ]
StateData, // like StateNone, but don't skip whitespace
StateOpenComment, // after <!-
StateComment,
StateComment2,
StateComment3,
StatePI, // after <? (we are currently parsing the target)
StatePIData, // after target
StatePI2, // first ? encountered (in target)
StatePI3, // first ? encountered (in data)
StateClose, // "</" encountered
StateClosing, // "</x"
StateNeedClose2, // "</xyz "
StateElemName, // <x encountered
StateElemTag, // first space after "<name" encountered
StateElemAttrName,
StateElemAttrEq, // name=
StateElemAttrVal, // name=' or name="
StateElemAttrDone, // name='value'
StateNeedClose, // <elem/
StateEntity, // &
StateCharEntity, // &#
StateEntityName, // &x
StateError
};
enum TokenClass {
ClassEntity,
ClassNameChar,
ClassNameStartChar,
ClassOpenTag,
ClassWhitespace,
ClassOther
};
//END // Enumerations //////////////////////////////////////////////
static const wchar_t* xmlCdataMarker;
static const wchar_t* xmlRestartMarker;
// the callback object to use
Callback* callback;
// entity parsing
std::wstring entityRef(const std::wstring& ent);
bool expandEntities;
bool parsingAttr;
// used for parsing elements
std::vector<std::wstring> elemStack;
std::wstring elemName;
std::map<std::wstring, std::wstring> elemAttrs;
// parsing state
State state;
std::wstring buffer, buffer2, buffer3;
wchar_t entityChar;
bool singleQuote;
bool skipNextNewline;
int elementDepth, xmlCount, restartCount;
public:
/*! \brief Constructor.
\param callback The callback object to use.
\param expandEntities \a true if you want entities to be expanded,
\a false to keep them inline.
This sets up the finite state machine and records the callback
object to use. It doesn't generate any events.
*/
Parser(Callback* callback, bool expandEntities);
/*! \brief Feed parser a character.
\param ch The character to parse.
\throws NotWellFormed If the Streamed XML is not well formed.
\throws (anything) Anything from the callback object.
This function will cause the finite state machine to process an
input character. It may generate some callback events. It will
throw a NotWellFormed exception should it encounter any Streamed XML
structure that is not well formed; it will also propagate any
exceptions thrown from the higher callback layer.
*/
void feedChar(wchar_t ch);
/*! \brief Feed parser some character data.
\param data A pointer to the buffer of data.
\param amount The number of characters to parse (may be zero).
\throws NotWellFormed If the Streamed XML is not well formed.
\throws (anything) Anything from the callback object.
This function will cause the finite state machine to process some
input characters. It may generate some callback events. It will
throw a NotWellFormed exception should it encounter any Streamed XML
structure that is not well formed; it will also propagate any
exceptions thrown from the higher callback layer.
*/
inline void feedCharData(const wchar_t* data, size_t amount)
{
for(size_t pos = 0; pos < amount; ++pos) feedChar(data[pos]);
}
/*! \brief Feed parser some character data.
\param dataStr The data, as a string.
\throws NotWellFormed If the Streamed XML is not well formed.
\throws (anything) Anything from the callback object.
This function will cause the finite state machine to process some
input characters. It may generate some callback events. It will
throw a NotWellFormed exception should it encounter any Streamed XML
structure that is not well formed; it will also propagate any
exceptions thrown from the higher callback layer.
*/
inline void feedCharData(const std::wstring& dataStr)
{
std::wstring::size_type pos = 0, len = dataStr.size();
for(; pos < len; ++pos) feedChar(dataStr[pos]);
}
/*! \brief Reset the stream.
This function should be called to reset the stream. This could be due to e.g. a new file or a
restart marker.
*/
void reset();
/*! \brief Restart marker detected.
This function should be called when a stream restart marker (
\c &lt;![RESTART[ ) has been detected. The next character of
data to be passed to the parser should be the first character of
restart marker data (or a \c ']', if there is no data).
This function will cause the parser to generate a streamRestart()
callback.
*/
void streamRestart();
};
}

View File

@ -0,0 +1,16 @@
/* libStreamedXML/src/lib/TopHeader.h
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#ifndef HEADER_libStreamedXML
#define HEADER_libStreamedXML
// standard includes, or includes needed for type declarations
#include <string>
#include <map>
#include <vector>
#include <sstream>
#include <utf8>
#include <stdint.h>

View File

@ -0,0 +1,10 @@
/* libStreamedXML/src/lib/TopSource.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#include "StreamedXML"
// Below are all the includes used throughout the library.

View File

@ -0,0 +1 @@
source src/libStreamedXML/build.lib

View File

@ -0,0 +1 @@
source src/libStreamedXML/build.install-lib

View File

@ -0,0 +1,36 @@
build_target libStreamedXML
# make paths (this is for Gentoo in particular)
build_dir_tree "${LIBDIR}" || return 1
build_dir_tree "${PKGCONFDIR}" || return 1
build_dir_tree "${INCLUDEDIR}" || return 1
# install library
echo "Installing libraries into '${LIBDIR}'"
install_file ${libStreamedXML} ${LIBDIR} 0755 || return 1
BASE="${libStreamedXML_BASE}.so"
MAJOR="${BASE}.${SOMAJOR}"
MINOR="${MAJOR}.${SOMINOR}"
MICRO="${MINOR}.${SOMICRO}"
install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}"
install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}"
install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}"
# install header
echo "Installing header file '${libStreamedXML_HEADER}' into ${INCLUDEDIR}"
install_header ${libStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1
# install pkgconfig file
echo "Installing package config file into ${PKGCONFDIR}"
PKGCONFFILE=${PKGCONFDIR}/libStreamedXML.pc
do_cmd rm -f ${PKGCONFFILE}
do_cmd_redir ${PKGCONFFILE} sed \
-e "s,@VERSION@,${VERSION}," \
-e "s,@LIBDIR@,${FINALLIBDIR}," \
-e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \
src/libStreamedXML/pkgconf.in
do_cmd chmod 0644 ${PKGCONFFILE}
print_success "Done"
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,51 @@
# These are external variables, and shouldn't clash with anything else
# libStreamedXML
# libStreamedXML_BUILT
# libStreamedXML_HEADER
# libStreamedXML_BASE
if [ -z ${libStreamedXML_BUILT} ]
then
libStreamedXML_BASE=libStreamedXML
source src/libStreamedXML/soversion
libStreamedXML="obj/${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}"
SO_EXTRA="$(pkg-config libutf8++ --libs --cflags) -lstdc++ -lc"
echo "Building library ${libStreamedXML}..."
do_cmd source src/libStreamedXML/build.monolithic || return 1
MODIFIED=0
for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC}
do
if [ ${test} -nt ${libStreamedXML} ]
then
MODIFIED=1
break
fi
done
if [ ${MODIFIED} -ne 0 ]
then
echo " Compiling"
SONAME="${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}"
do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libStreamedXML}" \
-Wl,-soname,${SONAME} \
${SRC} ${SO_EXTRA} || return 1
# make tests work
do_cmd ln -sf $(basename ${libStreamedXML}) obj/${SONAME} || return 1
print_success "Library built"
else
print_success "Library up to date"
fi
libStreamedXML_BUILT=1
libStreamedXML_HEADER=${HDR}
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,21 @@
# These are external variables, and shouldn't clash with anything else
# libStreamedXML_MONOLITHIC
SRC="obj/libStreamedXML.cpp"
HDR="obj/StreamedXML"
MONOLITHIC_TESTS="src/libStreamedXML/build.lib src/libStreamedXML/build.monolithic"
if [ -z "${libStreamedXML_MONOLITHIC}" ]
then
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopHeader,ForwardDeclare,Exceptions,Callback,Parser,Decoder,BottomHeader}.h)"
make_monolithic ${HDR} C || return 1
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopSource,Exceptions,Parser,Decoder}.cpp)"
make_monolithic ${SRC} C || return 1
libStreamedXML_MONOLITHIC=1
MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}"
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,21 @@
# libStreamedXML/src/lib/libStreamedXML/pkgconf.in
#
# Metadata file for pkg-config
# ( http://www.freedesktop.org/software/pkgconfig/ )
#
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
# Released under the GNU GPLv2. See file COPYING or
# http://www.gnu.org/copyleft/gpl.html for details.
#
# Name, description
Name: libStreamedXML
Description: C++ Streamed XML parser
Version: @VERSION@
# Requirements
Requires: libutf8++
# Compilation information
Libs: -L@LIBDIR@ -lStreamedXML
Cflags: -I@INCLUDEDIR@

View File

@ -0,0 +1,17 @@
# libStreamedXML/src/libStreamedXML/soversion
#
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
# Released under the GNU GPLv2. See file COPYING or
# http://www.gnu.org/copyleft/gpl.html for details.
#
# SOMAJOR and SOMINOR are included in the library's soname. They need to
# be bumped on a binary-incompatible release. They are both single
# integers.
SOMAJOR=0
SOMINOR=0
# SOMICRO is bumped every time there is a binary-compatible release.
SOMICRO=0

1
src/tests/.params Normal file
View File

@ -0,0 +1 @@
c++ tests tests libStreamedXML

110
src/tests/Callback.cpp Normal file
View File

@ -0,0 +1,110 @@
/* libStreamedXML/src/tests/Callback.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#include "StreamedXML"
#include <iostream>
#include <fstream>
class PrintCallback : public lsx::Callback {
public:
virtual void comment(const std::wstring& data)
{
std::wcout << L"Callback: comment: ``" << data << L"''.\n";
}
virtual void PI(const std::wstring& target, const std::wstring& data)
{
std::wcout << L"Callback: PI: ``" << target << L"'': ``"
<< data << L"''.\n";
}
virtual void element(const std::wstring& elemName,
const std::map<std::wstring, std::wstring>& attrs)
{
std::wcout << L"Callback: element: ``" << elemName << "''\n";
for(std::map<std::wstring, std::wstring>::const_iterator i = attrs.begin(),
end = attrs.end(); i != end; ++i)
{
std::wcout << L"Callback: attribute: "
<< i->first << L"=``" << i->second << L"''\n";
}
std::wcout << L"Callback: end of attributes.\n";
}
virtual void closeTag(const std::wstring& elemName)
{
std::wcout << L"Callback: close element: ``" << elemName << "''.\n";
}
virtual void whiteSpace(const std::wstring& space)
{
(void)space;
std::wcout << L"Callback: whitespace.\n";
}
virtual void content(const std::wstring& data)
{
std::wcout << L"Callback: content: ``" << data << L"''.\n";
}
virtual void cdata(const std::wstring& data)
{
std::wcout << L"Callback: CDATA: ``" << data << "''.\n";
}
virtual std::wstring entityRef(const std::wstring& name)
{
std::wcout << L"Callback: entity: ``" << name
<< L"'' (returning as data).\n";
return name;
}
};
//END // Parser class //////////////////////////////////////////////////
int main(int argc, char* argv[])
{
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
std::wcout << L"Tests the callback functions.\n";
return 0;
}
if(argc != 2) {
std::wcerr << L"Expecting name of XML file.\n";
return 1;
}
std::ifstream fin(argv[1]);
if(!fin) {
std::wcerr << L"Couldn't open file for input.\n";
return 1;
}
int ret = 0;
try {
PrintCallback printCallback;
lsx::Parser xmlParser(&printCallback, true);
lsx::Decoder xmlDecoder(&xmlParser);
char data[1024];
while(!fin.eof()) {
fin.read(data, sizeof(data));
xmlDecoder.feedData(data, fin.gcount());
}
}
catch(std::exception& e) {
std::cerr << e.what();
ret = 1;
}
return ret;
}

479
src/tests/Structure.cpp Normal file
View File

@ -0,0 +1,479 @@
/* libStreamedXML/src/tests/Structure.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#include "StreamedXML"
#include <iostream>
#include <list>
class TestFailed { };
//BEGIN // Test results ////////////////////////////////////////////////
#define DYN_CAST_CHECK(_type, _name) \
TestResult* r = getExpected( _name ); \
_type * rr = dynamic_cast< _type *>(r); \
if(!rr) { \
std::wcerr << L"Expected " << r->name() << L", got " _name L".\n"; \
delete r; \
throw TestFailed(); \
}
class TestResult {
public:
virtual ~TestResult() { }
virtual std::wstring name() const = 0;
};
class TestResultException : public TestResult {
public:
virtual std::wstring name() const { return L"exception"; }
};
class TestResultStreamRestart : public TestResult {
public:
std::wstring data;
TestResultStreamRestart(const std::wstring& data) : data(data) { }
bool verify(const std::wstring& data) const { return data == this->data; }
virtual std::wstring name() const { return L"stream restart marker"; }
};
class TestResultComment : public TestResult {
public:
std::wstring data;
TestResultComment(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"comment"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultCloseTag : public TestResult {
public:
std::wstring data;
TestResultCloseTag(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"close tag"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultContent : public TestResult {
public:
std::wstring data;
TestResultContent(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"content"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultWhiteSpace : public TestResult {
public:
std::wstring data;
TestResultWhiteSpace(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"whitespace"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultCDATA : public TestResult {
public:
std::wstring data;
TestResultCDATA(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"CDATA marker"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultEntityRef : public TestResult {
public:
std::wstring data;
TestResultEntityRef(const std::wstring& data) : data(data) { }
virtual std::wstring name() const { return L"entity reference"; }
void verify(const std::wstring& data) const
{
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultPI : public TestResult {
public:
std::wstring target, data;
TestResultPI(const std::wstring& target, const std::wstring& data)
: target(target), data(data) { }
virtual std::wstring name() const { return L"processing instruction"; }
void verify(const std::wstring& target, const std::wstring& data) const
{
if(target != this->target) {
std::wcerr << name() << L": expecting ``" << this->target
<< L"'', got ``" << target << L"''.\n";
throw TestFailed();
}
if(data != this->data) {
std::wcerr << name() << L": expecting ``" << this->data
<< L"'', got ``" << data << L"''.\n";
throw TestFailed();
}
}
};
class TestResultElement : public TestResult {
public:
std::wstring ename;
std::map<std::wstring, std::wstring> attrs;
TestResultElement(const std::wstring& ename) : ename(ename) { }
virtual std::wstring name() const { return L"open tag"; }
void verify(const std::wstring& xname, const std::map<std::wstring, std::wstring>& xattrs)
{
if(ename != xname) {
std::wcerr << name() << L": expecting ``" << ename
<< L"'', got ``" << xname << L"''.\n";
throw TestFailed();
}
std::map<std::wstring, std::wstring>::const_iterator i1, i2, end1, end2;
i1 = attrs.begin(); end1 = attrs.end();
i2 = xattrs.begin(); end2 = xattrs.end();
for(; (i1 != end1) && (i2 != end2); ++i1, ++i2) {
if(i1->first != i2->first) {
std::wcerr << name() << L": attribute name: expecting ``" << i1->first
<< L"'', got ``" << i2->first << L"''.\n";
throw TestFailed();
}
if(i1->second != i2->second) {
std::wcerr << name() << L": attribute value: expecting ``" << i1->second
<< L"'', got ``" << i2->second << L"''.\n";
throw TestFailed();
}
}
if((i1 != end1) || (i2 != end2)) {
std::wcerr << name() << L": attributes don't match.\n";
throw TestFailed();
}
}
};
//END // Test results //////////////////////////////////////////////////
//BEGIN // Test parser /////////////////////////////////////////////////
class TestParser : private lsx::Callback {
private:
std::list<TestResult*> expected;
TestResult* getExpected(const std::wstring& dataType)
{
if(expected.empty()) {
std::wcerr << L"Further data (" << dataType
<< L") encountered after end of test.\n";
throw TestFailed();
}
TestResult* r = expected.front();
expected.pop_front();
return r;
}
virtual void streamRestart(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultStreamRestart, L"stream restart marker")
rr->verify(data);
}
virtual void comment(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultComment, L"comment")
rr->verify(data);
}
virtual void closeTag(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultCloseTag, L"close tag")
rr->verify(data);
}
virtual void whiteSpace(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultWhiteSpace, L"whitespace")
rr->verify(data);
}
virtual void content(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultContent, L"Content")
rr->verify(data);
}
virtual void cdata(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultCDATA, L"CDATA marker")
rr->verify(data);
}
virtual std::wstring entityRef(const std::wstring& data)
{
DYN_CAST_CHECK(TestResultEntityRef, L"entity reference")
rr->verify(data);
return data;
}
virtual void PI(const std::wstring& target, const std::wstring& data)
{
DYN_CAST_CHECK(TestResultPI, L"processing instruction")
rr->verify(target, data);
}
virtual void element(const std::wstring& name, const std::map<std::wstring, std::wstring>& attrs)
{
DYN_CAST_CHECK(TestResultElement, L"open tag")
rr->verify(name, attrs);
}
// XML parser objects
lsx::Parser xmlParser;
lsx::Decoder xmlDecoder;
public:
TestParser()
: xmlParser(this, true), xmlDecoder(&xmlParser)
{
}
virtual ~TestParser()
{
while(!expected.empty()) {
delete expected.front();
expected.pop_front();
}
}
void finish()
{
if(!expected.empty()) {
std::wcerr << L"Expecting some more data ("
<< expected.front()->name()
<< L").\n";
throw TestFailed();
}
}
void expect(TestResult* r)
{
expected.push_back(r);
}
virtual void feedData(const char* bytes, size_t amt)
{
while(amt) {
try {
xmlDecoder.feedData(bytes, std::min(amt, size_t(6)));
}
catch(lsx::Exception& e) {
DYN_CAST_CHECK(TestResultException, L"exception")
delete r;
}
amt -= std::min(amt, size_t(6));
bytes += 6;
}
}
};
//END // Test parser ///////////////////////////////////////////////////
//BEGIN // Tests ///////////////////////////////////////////////////////
void test1(TestParser* p)
{
std::wcout << L"Test 1: " << std::flush;
p->expect(new TestResultPI(L"targ", L" data data "));
p->expect(new TestResultWhiteSpace(L"\n"));
p->expect(new TestResultComment(L" comment "));
p->expect(new TestResultWhiteSpace(L"\n"));
p->expect(new TestResultElement(L"elemOne"));
p->expect(new TestResultWhiteSpace(L" "));
p->expect(new TestResultContent(L"with added content\n"));
p->expect(new TestResultException());
p->expect(new TestResultStreamRestart(L"data]>]]"));
p->expect(new TestResultWhiteSpace(L"\n"));
p->expect(new TestResultElement(L"elemTwo"));
p->expect(new TestResultWhiteSpace(L"\n"));
p->expect(new TestResultCloseTag(L"elemTwo"));
std::string td = "<?targ data data ?>\n"
"<!-- comment -->\n"
"<elemOne> with added content\n"
"<![RESTART[data]>]]]]>\n"
"<elemTwo>\n"
"</elemTwo>\n";
p->feedData(td.data(), td.size());
p->finish();
std::wcout << L"OK.\n";
}
void test2(TestParser* p)
{
std::wcout << L"Test 2: " << std::flush;
p->expect(new TestResultElement(L"reading"));
p->expect(new TestResultElement(L"type"));
p->expect(new TestResultContent(L"integer"));
p->expect(new TestResultCloseTag(L"type"));
p->expect(new TestResultElement(L"value"));
p->expect(new TestResultContent(L"0"));
p->expect(new TestResultCloseTag(L"value"));
p->expect(new TestResultCloseTag(L"reading"));
p->expect(new TestResultElement(L"reading"));
p->expect(new TestResultElement(L"type"));
p->expect(new TestResultContent(L"integer"));
p->expect(new TestResultCloseTag(L"type"));
p->expect(new TestResultElement(L"value"));
p->expect(new TestResultContent(L"1"));
p->expect(new TestResultCloseTag(L"value"));
p->expect(new TestResultCloseTag(L"reading"));
std::string td = "<reading><type>integer</type><value>0</value></reading>"
"<reading><type>integer</type><value>1</value></reading>";
p->feedData(td.data(), td.size());
p->finish();
std::wcout << L"OK.\n";
}
void test3(TestParser* p)
{
std::wcout << L"Test 3: " << std::flush;
TestResultElement* el = new TestResultElement(L"elemName");
el->attrs[L"attr1"] = L"\"value1\"";
el->attrs[L"attr2"] = L"'value2'";
el->attrs[L"attr3"] = L"\"<&>'";
p->expect(el);
p->expect(new TestResultContent(L"<"));
p->expect(new TestResultContent(L"&"));
p->expect(new TestResultContent(L">"));
p->expect(new TestResultEntityRef(L"myEntity"));
p->expect(new TestResultContent(L"myEntity\n "));
p->expect(new TestResultCDATA(L"<&>]]]]"));
p->expect(new TestResultWhiteSpace(L"\n"));
p->expect(new TestResultCloseTag(L"elemName"));
std::string td = "<elemName attr1='\"value1\"' attr2=\"'value2'\"\n"
" attr3='&quot;&lt;&amp;&gt;&apos;'\n"
" >&lt;&amp;&gt;&myEntity;\n"
" <![CDATA[<&>]]]]]]>\n"
"</elemName >\n";
p->feedData(td.data(), td.size());
p->finish();
std::wcout << L"OK.\n";
}
//END // Tests /////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
std::wcout << L"XML structure regression test.\n";
return 0;
}
if(argc != 1) {
std::wcerr << L"No arguments expected.\n";
return 1;
}
int ret = 0;
TestParser* p = 0;
#define TEST_n(_n) \
try { \
std::wcout << "======== Test " #_n " ========" << std::endl; \
p = new TestParser(); \
test ## _n (p); \
delete p; \
p = 0; \
} \
catch(TestFailed&) { \
std::wcerr << "Test failed." << std::endl; \
ret = 1; \
} \
catch(std::exception& e) { \
std::cerr << e.what() << std::endl; \
ret = 1; \
} \
std::wcout << std::endl;
TEST_n(1);
TEST_n(2);
TEST_n(3);
delete p;
return ret;
}

120
src/tests/UTF-8.cpp Normal file
View File

@ -0,0 +1,120 @@
/* libStreamedXML/src/tests/UTF-8.cpp
*
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
#include "StreamedXML"
#include <iostream>
#include <utf8>
class TestFailed { };
class MySimpleParser : private lsx::Callback {
private:
lsx::Parser xmlParser;
lsx::Decoder xmlDecoder;
public:
MySimpleParser()
: xmlParser(this, true), xmlDecoder(&xmlParser)
{
}
void feedBadData(const char* bytes, size_t amount)
{
try {
xmlDecoder.feedData(bytes, amount);
}
catch(utf8::Error&) {
return;
}
throw TestFailed();
}
virtual void feedData(const char* bytes, size_t amount)
{
try {
xmlDecoder.feedData(bytes, amount);
}
catch(std::exception& e) {
std::wcerr << e.what() << std::endl;
throw TestFailed();
}
}
};
char restartMarker[] = "<![RESTART[[]]>";
char okData[] = "<element>"
"\x41\xE2\x89\xA2\xCE\x91\x2E"
"\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4"
"\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E";
char badDataOverlong[] = "<overlong>"
"\xC0\xB0"; // ascii '0', overlong
char badDataMalformed0[] = "<malformed>"
"\xFF"; // never appears
char badDataMalformed2[] = "<malformed>"
"\x8F"; // lone continuation char
char badDataMalformed3[] = "<malformed>"
"\xC1\x3F"; // missing continuation char
struct TestData {
wchar_t* desc;
char* str;
size_t len;
};
#define P(D, Q) { D, Q, sizeof( Q ) - 1 }
TestData testData[] = {
P(L"Overlong character encoding", badDataOverlong),
P(L"Illegal octet (0xFF)", badDataMalformed0),
P(L"Lone continuation char", badDataMalformed2),
P(L"Missing continuation char", badDataMalformed3),
{ 0, 0, 0 }
};
int main(int argc, char* argv[])
{
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
std::wcout << L"Regression test for the UTF-8 decoder.\n";
return 0;
}
if(argc != 1) {
std::wcout << L"No arguments expected.\n";
return 1;
}
int ret = 0;
try {
MySimpleParser p;
std::wcout << L"Checking OK data: " << std::flush;
p.feedData(okData, sizeof(okData) - 1);
std::wcout << L"success" << std::endl;
for(TestData* test = testData; test->desc; ++test) {
std::wcout << L"Checking bad data (" << test->desc
<< L"): " << std::flush;
p.feedBadData(test->str, test->len);
std::wcout << "success" << std::endl;
p.feedData(restartMarker, sizeof(restartMarker) - 1);
}
}
catch(TestFailed&) {
std::wcerr << L"Test failed.\n";
ret = 1;
}
return ret;
}

3
src/tests/build.default Normal file
View File

@ -0,0 +1,3 @@
source src/tests/build.tests
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

43
src/tests/build.tests Normal file
View File

@ -0,0 +1,43 @@
# These are external variables, and shouldn't clash with anything else
# tests_BUILT
#
build_target libStreamedXML || return 1
if [ -z ${tests_BUILT} ]
then
LIBS="${libStreamedXML} "
EXTRAS=""
echo "Building test programs..."
do_cmd mkdir -p obj/tests || return 1
for SRC in src/tests/*.cpp
do
TEST="obj/tests/$(basename ${SRC} | sed -e 's,.cpp$,,')"
MODIFIED=0
for file in ${LIBS} ${SRC} src/tests/build.tests
do
if [ ${file} -nt ${TEST} ]
then
MODIFIED=1
break
fi
done
if [ ${MODIFIED} -ne 0 ]
then
do_cmd ${CXX} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1
print_success "Built ${TEST}"
else
print_success "${TEST} is up to date"
fi
done
print_success "All tests built"
tests_BUILT=1
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

44
src/tests/template Normal file
View File

@ -0,0 +1,44 @@
/* libStreamedXML/src/tests/???.cpp
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#include "StreamedXML"
#include <iostream>
int main(int argc, char* argv[])
{
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
std::cout << "One line summary.\n";
return 0;
}
if(argc == 1) {
// empty argument list
}
int ret = 0;
try {
// TODO
}
catch(std::exception& e) {
std::cerr << e.what() << std::endl;
ret = 1;
}
catch(...) {
std::cerr << "Unknown exception caught." << std::endl;
ret = 1;
}
return ret;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/