Import old subversion tree.
This commit is contained in:
parent
4976710df9
commit
7610e244f4
|
@ -0,0 +1 @@
|
|||
doxygen docs docs
|
|
@ -0,0 +1,146 @@
|
|||
# libStreamedXML/src/docs/Doxyfile.in
|
||||
#
|
||||
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
# Released under the GNU GPLv2. See file COPYING or
|
||||
# http://www.gnu.org/copyleft/gpl.html for details.
|
||||
#
|
||||
|
||||
PROJECT_NAME = libStreamedXML
|
||||
OUTPUT_DIRECTORY =
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
USE_WINDOWS_ENCODING = NO
|
||||
BRIEF_MEMBER_DESC = YES
|
||||
REPEAT_BRIEF = YES
|
||||
ABBREVIATE_BRIEF =
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = YES
|
||||
FULL_PATH_NAMES = NO
|
||||
STRIP_FROM_PATH =
|
||||
STRIP_FROM_INC_PATH =
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
MULTILINE_CPP_IS_BRIEF = YES
|
||||
DETAILS_AT_TOP = YES
|
||||
INHERIT_DOCS = YES
|
||||
DISTRIBUTE_GROUP_DOC = NO
|
||||
TAB_SIZE = 4
|
||||
ALIASES =
|
||||
OPTIMIZE_OUTPUT_FOR_C = NO
|
||||
OPTIMIZE_OUTPUT_JAVA = NO
|
||||
SUBGROUPING = YES
|
||||
EXTRACT_ALL = NO
|
||||
EXTRACT_PRIVATE = NO
|
||||
EXTRACT_STATIC = NO
|
||||
EXTRACT_LOCAL_CLASSES = NO
|
||||
EXTRACT_LOCAL_METHODS = NO
|
||||
HIDE_UNDOC_MEMBERS = NO
|
||||
HIDE_UNDOC_CLASSES = NO
|
||||
HIDE_FRIEND_COMPOUNDS = YES
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
INTERNAL_DOCS = NO
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = NO
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = YES
|
||||
SORT_BRIEF_DOCS = NO
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
GENERATE_TODOLIST = YES
|
||||
GENERATE_TESTLIST = YES
|
||||
GENERATE_BUGLIST = YES
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
ENABLED_SECTIONS =
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
SHOW_USED_FILES = NO
|
||||
SHOW_DIRECTORIES = NO
|
||||
FILE_VERSION_FILTER =
|
||||
QUIET = YES
|
||||
WARNINGS = YES
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
WARN_NO_PARAMDOC = YES
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
WARN_LOGFILE =
|
||||
FILE_PATTERNS =
|
||||
RECURSIVE = NO
|
||||
EXCLUDE =
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS =
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS =
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
IMAGE_PATH = src/docs
|
||||
INPUT_FILTER =
|
||||
FILTER_PATTERNS =
|
||||
FILTER_SOURCE_FILES = NO
|
||||
SOURCE_BROWSER = NO
|
||||
INLINE_SOURCES = NO
|
||||
STRIP_CODE_COMMENTS = YES
|
||||
REFERENCED_BY_RELATION = YES
|
||||
REFERENCES_RELATION = YES
|
||||
VERBATIM_HEADERS = NO
|
||||
ALPHABETICAL_INDEX = YES
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
IGNORE_PREFIX =
|
||||
GENERATE_HTML = YES
|
||||
HTML_OUTPUT = html
|
||||
HTML_FILE_EXTENSION = .html
|
||||
HTML_HEADER =
|
||||
HTML_FOOTER =
|
||||
HTML_STYLESHEET =
|
||||
HTML_ALIGN_MEMBERS = YES
|
||||
GENERATE_HTMLHELP = NO
|
||||
CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
GENERATE_CHI = NO
|
||||
BINARY_TOC = NO
|
||||
TOC_EXPAND = NO
|
||||
DISABLE_INDEX = NO
|
||||
ENUM_VALUES_PER_LINE = 4
|
||||
GENERATE_TREEVIEW = NO
|
||||
TREEVIEW_WIDTH = 250
|
||||
GENERATE_LATEX = NO
|
||||
GENERATE_RTF = NO
|
||||
GENERATE_MAN = NO
|
||||
GENERATE_XML = NO
|
||||
GENERATE_AUTOGEN_DEF = NO
|
||||
GENERATE_PERLMOD = NO
|
||||
ENABLE_PREPROCESSING = YES
|
||||
MACRO_EXPANSION = NO
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED = DOXYGEN
|
||||
EXPAND_AS_DEFINED =
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
TAGFILES =
|
||||
GENERATE_TAGFILE =
|
||||
ALLEXTERNALS = NO
|
||||
EXTERNAL_GROUPS = YES
|
||||
PERL_PATH = /usr/bin/perl
|
||||
CLASS_DIAGRAMS = YES
|
||||
HIDE_UNDOC_RELATIONS = YES
|
||||
HAVE_DOT = YES
|
||||
CLASS_GRAPH = YES
|
||||
COLLABORATION_GRAPH = YES
|
||||
GROUP_GRAPHS = NO
|
||||
UML_LOOK = NO
|
||||
TEMPLATE_RELATIONS = NO
|
||||
INCLUDE_GRAPH = NO
|
||||
INCLUDED_BY_GRAPH = NO
|
||||
CALL_GRAPH = NO
|
||||
GRAPHICAL_HIERARCHY = YES
|
||||
DIRECTORY_GRAPH = NO
|
||||
DOT_IMAGE_FORMAT = png
|
||||
DOT_PATH =
|
||||
DOTFILE_DIRS =
|
||||
MAX_DOT_GRAPH_WIDTH = 1024
|
||||
MAX_DOT_GRAPH_HEIGHT = 1024
|
||||
MAX_DOT_GRAPH_DEPTH = 0
|
||||
DOT_TRANSPARENT = YES
|
||||
DOT_MULTI_TARGETS = YES
|
||||
GENERATE_LEGEND = YES
|
||||
DOT_CLEANUP = YES
|
||||
SEARCHENGINE = NO
|
|
@ -0,0 +1,47 @@
|
|||
/* libStreamedXML/src/docs/MainPage.dox
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
/*! \mainpage
|
||||
|
||||
libStreamedXML is a C++ implementation of a
|
||||
<a href="http://www.lwithers.me.uk/projects/StreamedXML/">
|
||||
Streamed XML</a> processor. It is written using an efficient
|
||||
state-machine and outputs UCS-4 data using the built-in std::wstring
|
||||
type. It is written to accept an arbitrary byte stream, which may be
|
||||
interrupted and restarted at any point, as input.
|
||||
|
||||
libStreamedXML uses a three-layer design. The lowest layer is the
|
||||
character decoder layer, which takes raw bytes as input and outputs
|
||||
UCS-4 characters. Its output is pushed to the middle layer, which is the
|
||||
Streamed XML parser. This layer interprets character data into XML
|
||||
structures, and pushes its results onto the top layer, which is the
|
||||
callback layer. This layered design allows for flexibility and a
|
||||
conciseness of both design and implementation.
|
||||
|
||||
The callback layer is implemented in the lsx::Callback class. You
|
||||
simply override the functions in a derived class to achieve the
|
||||
behaviour you want. The callback layer is only ever presented
|
||||
well-formed Streamed XML data; errors are handled at the other end of
|
||||
the stack.
|
||||
|
||||
The parser layer is implemented in the lsx::Parser class. You will have
|
||||
to instantiate this class, but you should never have to derive from it,
|
||||
since its functionality is fixed to the standard.
|
||||
|
||||
The character decoder layer is implemented in the lsx::Decoder class.
|
||||
However, since the lsx::Parser class has no direct link to it, there is
|
||||
no need to use this class (or any class at all). You can supply the
|
||||
input in whatever way you see fit. The lsx::Decoder class provides a
|
||||
UTF-8 decoder and allows exceptions to propagate out of its feedData()
|
||||
method.
|
||||
|
||||
*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1 @@
|
|||
source src/docs/build.docs
|
|
@ -0,0 +1,43 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# docs_BUILT
|
||||
#
|
||||
|
||||
MONOLITHIC_DOC="${MONOLITHIC_DOC} $(echo src/docs/*.dox)"
|
||||
build_target monolithic
|
||||
|
||||
if [ -z ${docs_BUILT} ]
|
||||
then
|
||||
echo "Building documentation with Doxygen..."
|
||||
|
||||
DOXYFILE=obj/Doxyfile.docs
|
||||
|
||||
if [ ! -e ${DOXYFILE} ]
|
||||
then
|
||||
do_cmd cp src/docs/Doxyfile.in ${DOXYFILE} || return 1
|
||||
echo "INPUT = ${MONOLITHIC_DOC}" >> ${DOXYFILE}
|
||||
echo "PROJECT_NUMBER = ${VERSION}" >> ${DOXYFILE}
|
||||
fi
|
||||
|
||||
MODIFIED=0
|
||||
for file in ${MONOLITHIC_DOC}
|
||||
do
|
||||
if [ ${file} -nt html/index.html ]
|
||||
then
|
||||
MODIFIED=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${MODIFIED} -ne 0 ]
|
||||
then
|
||||
do_cmd doxygen ${DOXYFILE} || return 1
|
||||
print_success "Documentation built"
|
||||
else
|
||||
print_success "Documentation is up to date"
|
||||
fi
|
||||
|
||||
docs_BUILT=1
|
||||
fi
|
||||
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1 @@
|
|||
source src/docs/build.install-docs
|
|
@ -0,0 +1,21 @@
|
|||
build_target docs
|
||||
|
||||
# create documentation directories
|
||||
echo "Installing documentation into ${DOCSDIR}"
|
||||
build_dir_tree "${DOCSDIR}/html" || return 1
|
||||
|
||||
# copy across the Doxygen-generated documentation
|
||||
for file in html/*
|
||||
do
|
||||
install_file ${file} ${DOCSDIR}/html 0644 || return 1
|
||||
done
|
||||
|
||||
# copy across the generic files
|
||||
for file in COPYING README
|
||||
do
|
||||
install_file ${file} ${DOCSDIR} 0644 || return 1
|
||||
done
|
||||
|
||||
print_success "Documentation installed"
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1 @@
|
|||
c++ lib libStreamedXML StreamedXML
|
|
@ -0,0 +1,7 @@
|
|||
/* libStreamedXML/src/lib/BottomHeader.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#endif
|
|
@ -0,0 +1,197 @@
|
|||
/* libStreamedXML/src/lib/Core/Callback.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
/*! \brief This object is the callback used in lsx::Parser.
|
||||
|
||||
The functions in this callback may be overridden to handle "events"
|
||||
encountered when parsing Streamed XML. For instance, hitting an open
|
||||
tag will generate an element() event. Only well-formed XML will ever
|
||||
get through to the callback object.
|
||||
|
||||
Default implementations are provided for all the functions. These
|
||||
implementations do nothing, so you are free to implement only those
|
||||
functions that you need.
|
||||
|
||||
You may throw any exceptions you like from the callback handlers; these
|
||||
will filter through the parser layer and reach the character decoder
|
||||
layer.
|
||||
|
||||
*/
|
||||
class Callback {
|
||||
public:
|
||||
/// Destructor (does nothing).
|
||||
virtual ~Callback()
|
||||
{ }
|
||||
|
||||
|
||||
|
||||
/*! \brief Stream restart marker.
|
||||
|
||||
\param data Data held in the marker (may be empty).
|
||||
|
||||
This function is called whenever a stream restart marker is
|
||||
encountered. This means that, if you are keeping track of the parsed
|
||||
data in any way, you should now reset that state (e.g. you should
|
||||
clear a stack of which element you are currently in).
|
||||
|
||||
*/
|
||||
virtual void streamRestart(const std::wstring& data)
|
||||
{
|
||||
(void)data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Comment.
|
||||
|
||||
\param data Data held in the comment (may be empty).
|
||||
|
||||
This function is called whenever a comment is encountered. It is
|
||||
mainly used if you want to transform the input in some ways, but
|
||||
leave the comments intact.
|
||||
|
||||
*/
|
||||
virtual void comment(const std::wstring& data)
|
||||
{
|
||||
(void)data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Processing instruction.
|
||||
|
||||
\param target The processing instruction's target.
|
||||
\param data Any data following the target (may be empty).
|
||||
|
||||
This function is called whenever a PI is encountered. It includes
|
||||
both the target and any data following that target.
|
||||
|
||||
*/
|
||||
virtual void PI(const std::wstring& target, const std::wstring& data)
|
||||
{
|
||||
(void)target;
|
||||
(void)data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Element start tag.
|
||||
|
||||
\param elemName The element's name.
|
||||
\param elemAttrs Any attributes the element may have (may be empty).
|
||||
|
||||
This is called whenever an element start tag is encountered. It is
|
||||
called with the element's name and attributes. If the element tag is
|
||||
an empty tag, then closeTag() will be called immediately afterward.
|
||||
|
||||
*/
|
||||
virtual void element(const std::wstring& elemName,
|
||||
const std::map<std::wstring, std::wstring>& elemAttrs)
|
||||
{
|
||||
(void)elemName;
|
||||
(void)elemAttrs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Element close tag.
|
||||
|
||||
\param elemName The element's name.
|
||||
|
||||
This is called whenever a close tag is encountered. It is also
|
||||
generated after the element() event if an empty element tag is
|
||||
encountered.
|
||||
|
||||
*/
|
||||
virtual void closeTag(const std::wstring& elemName)
|
||||
{
|
||||
(void)elemName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Ignorable whitespace.
|
||||
|
||||
\param ws The actual whitespace encountered.
|
||||
|
||||
This is called whenever whitespace is encountered between element
|
||||
tags, but there is no actual content. It is included simply because
|
||||
you may wish to transform the input in some ways, but keep the
|
||||
whitespace the same.
|
||||
|
||||
If an element does contain content but is surrounded by whitespace,
|
||||
this function will not be called: the whitespace will be included
|
||||
in the reported content.
|
||||
|
||||
*/
|
||||
virtual void whiteSpace(const std::wstring& ws)
|
||||
{
|
||||
(void)ws;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Element content.
|
||||
|
||||
\param data The element's content.
|
||||
|
||||
This function is called whenever non-whitespace content is
|
||||
encountered within an element. Any whitespace within the content (or
|
||||
at the start or end) is reported verbatim.
|
||||
|
||||
*/
|
||||
virtual void content(const std::wstring& data)
|
||||
{
|
||||
(void)data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Character data.
|
||||
|
||||
\param data The character data.
|
||||
|
||||
This is used to report a chunk of character data enclosed in a CDATA
|
||||
marked section. This means it might contain the text "&", but
|
||||
this shouldn't be expanded as an entity, so there would be no way of
|
||||
disambiguating it from an entity reference if \a expandEntities is
|
||||
\a false.
|
||||
|
||||
The default behaviour is to pass this straight to content().
|
||||
|
||||
*/
|
||||
virtual void cdata(const std::wstring& data)
|
||||
{
|
||||
content(data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Entity reference.
|
||||
|
||||
\param name The name of the entity being referred to.
|
||||
\returns The expanded text.
|
||||
\throws UnknownEntity if the entity is unknown.
|
||||
|
||||
This function is called whenever an entity reference is encountered,
|
||||
and \a expandEntities is \a true in your parser. The five standard
|
||||
entities will be expanded automatically, as will character entities,
|
||||
but anything else will be passed to this function.
|
||||
|
||||
*/
|
||||
virtual std::wstring entityRef(const std::wstring& name)
|
||||
{
|
||||
throw UnknownEntity(name);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
};
|
|
@ -0,0 +1,64 @@
|
|||
/* libStreamedXML/src/lib/Core/Decoder.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
const char* Decoder::sxmlRestartSequence = "<![RESTART[";
|
||||
|
||||
|
||||
|
||||
Decoder::Decoder(Parser* parser, utf8::Decoder* decoder)
|
||||
: parser(parser), decoder(decoder ?: new utf8::Decoder), sxmlRestartCount(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
Decoder::~Decoder()
|
||||
{
|
||||
delete decoder;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Decoder::feedData(const char* bytes, size_t amount)
|
||||
{
|
||||
/* We parse through the data once looking for restart sequences. If we encounter one, then we
|
||||
throw away all data leading up to it and continue from just after the marker. This can happen
|
||||
more than once per chunk of data, and can also happen over chunk boundaries. */
|
||||
restart:
|
||||
for(size_t pos = 0; pos < amount; ++pos) {
|
||||
if(bytes[pos] == sxmlRestartSequence[sxmlRestartCount]) {
|
||||
if(++sxmlRestartCount == 11) {
|
||||
parser->streamRestart();
|
||||
decoder->reset();
|
||||
bytes += pos;
|
||||
amount -= pos;
|
||||
goto restart;
|
||||
}
|
||||
} else {
|
||||
sxmlRestartCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
decoder->decoded.clear();
|
||||
decoder->decode(bytes, amount);
|
||||
parser->feedCharData(decoder->decoded);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Decoder::restart()
|
||||
{
|
||||
decoder->reset();
|
||||
sxmlRestartCount = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/* libStreamedXML/src/lib/Core/Decoder.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
/*! \brief Basic UTF-8 character decoder.
|
||||
|
||||
This is a simple implementation of the character decoder layer. It takes
|
||||
in a pure UTF-8 byte stream and outputs characters to the parser layer.
|
||||
Any exceptions thrown from the above layers will simply be propagated to
|
||||
the caller of feedData().
|
||||
|
||||
The decoder is stateless with respect to the Streamed XML stream, but stateful with respect to the
|
||||
UTF-8 byte stream. It can be reset with a call to restart().
|
||||
|
||||
*/
|
||||
class Decoder {
|
||||
public:
|
||||
/*! \brief Constructor.
|
||||
|
||||
\param parser The Streamed XML parser which will receive decoded
|
||||
characters.
|
||||
\param decoder If you wish to use a more complex decoder, you may pass it here. If you pass 0,
|
||||
the default UTF-8 decoder will be used.
|
||||
|
||||
Sets up the finite state machine and records the Streamed XML parser
|
||||
in use. Doesn't generate any data.
|
||||
|
||||
On deletion, \a parser will not be informed or deleted, but \a decoder will be deleted.
|
||||
|
||||
*/
|
||||
Decoder(Parser* parser, utf8::Decoder* decoder = 0);
|
||||
|
||||
/// Destructor (frees \a decoder passed to ctor).
|
||||
virtual ~Decoder();
|
||||
|
||||
|
||||
|
||||
/*! \brief Feed data to decoder.
|
||||
|
||||
\param bytes The raw input bytes.
|
||||
\param amount The number of input bytes (may be zero).
|
||||
\throws (anything) Any exceptions from the Parser layer.
|
||||
\throws utf8::Error on UTF-8 decoding errors.
|
||||
|
||||
This function is called whenever raw input data becomes available.
|
||||
It decodes a UTF-8 byte stream into characters, which it then passes
|
||||
to the Parser layer.
|
||||
|
||||
*/
|
||||
virtual void feedData(const char* bytes, size_t amount);
|
||||
|
||||
|
||||
|
||||
/// Restart encoder if it is pointed at a new byte stream, etc.
|
||||
void restart();
|
||||
|
||||
private:
|
||||
Parser* parser;
|
||||
utf8::Decoder* decoder;
|
||||
static const char* sxmlRestartSequence;
|
||||
int sxmlRestartCount;
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* libStreamedXML/src/lib/Core/Exceptions.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
Exception::Exception(const std::wstring& reason)
|
||||
: reason(reason)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char* Exception::what()
|
||||
{
|
||||
if(utf8Reason.empty()) utf8Reason = utf8::encode(reason, true);
|
||||
return utf8Reason.c_str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
NotWellFormed::NotWellFormed(const std::wstring& error)
|
||||
: Exception(format(error)), error(error)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::wstring NotWellFormed::format(const std::wstring& error)
|
||||
{
|
||||
std::wostringstream ost;
|
||||
ost << L"Streamed XML is not well formed:\n"
|
||||
<< error;
|
||||
return ost.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
UnknownEntity::UnknownEntity(const std::wstring& name)
|
||||
: Exception(format(name)), name(name)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::wstring UnknownEntity::format(const std::wstring& name)
|
||||
{
|
||||
std::wostringstream ost;
|
||||
ost << L"Encountered an unknown entity named '"
|
||||
<< name
|
||||
<< L"' in the Streamed XML.";
|
||||
return ost.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/* libStreamedXML/src/lib/Core/Exceptions.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
/*! \brief Exception base class.
|
||||
|
||||
This class is the base class of all exceptions. It simply provides a mechanism for storing a
|
||||
message; more specific details must be stored in base classes.
|
||||
|
||||
*/
|
||||
class Exception {
|
||||
public:
|
||||
/*! \brief Constructor.
|
||||
|
||||
\param reason Reason for the exception.
|
||||
|
||||
The constructor simply stores the reason for the exception, which may be later accessed for
|
||||
reporting to the user.
|
||||
|
||||
*/
|
||||
Exception(const std::wstring& reason);
|
||||
|
||||
/// Destructor.
|
||||
virtual ~Exception() throw()
|
||||
{ }
|
||||
|
||||
/// Find what caused the error.
|
||||
virtual const char* what();
|
||||
|
||||
/// Reason for the exception.
|
||||
const std::wstring reason;
|
||||
|
||||
private:
|
||||
std::string utf8Reason;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/// Thrown when XML is not well formed.
|
||||
class NotWellFormed : public Exception {
|
||||
private:
|
||||
static std::wstring format(const std::wstring& error);
|
||||
|
||||
public:
|
||||
/*! \brief Constructor.
|
||||
|
||||
\param error Reason for the error.
|
||||
|
||||
The constructor will store the reason for the error. It will also prepare a suitable message
|
||||
for the Exception base class.
|
||||
|
||||
*/
|
||||
NotWellFormed(const std::wstring& error);
|
||||
|
||||
/// Destructor.
|
||||
virtual ~NotWellFormed() throw()
|
||||
{ }
|
||||
|
||||
/// Reason for the error.
|
||||
const std::wstring error;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/// Thrown when an unknown entity is referred to.
|
||||
class UnknownEntity : public Exception {
|
||||
private:
|
||||
static std::wstring format(const std::wstring& name);
|
||||
|
||||
public:
|
||||
/*! \brief Constructor.
|
||||
|
||||
\param name Name of the unknown entity.
|
||||
|
||||
The constructor will store the name of the unknown entity. It will also prepare a suitable
|
||||
message for the Exception base class.
|
||||
|
||||
*/
|
||||
UnknownEntity(const std::wstring& name);
|
||||
|
||||
/// Destructor.
|
||||
virtual ~UnknownEntity() throw()
|
||||
{ }
|
||||
|
||||
/// Name of unknown entity.
|
||||
const std::wstring name;
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/* libStreamedXML/src/lib/ForwardDeclare.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
// This file simply contains forward declarations of all libStreamedXML
|
||||
// classes, to facilitate header ordering, etc.
|
||||
|
||||
/// The libStreamedXML classes all go into this namespace.
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
class Callback;
|
||||
class Decoder;
|
||||
class Exception;
|
||||
class NotWellFormed;
|
||||
class Parser;
|
||||
class UnknownEntity;
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,585 @@
|
|||
/* libStreamedXML/src/lib/Core/Parser.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
//#define DEBUG_STATE_MACHINE
|
||||
|
||||
#ifdef DEBUG_STATE_MACHINE
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
const wchar_t* Parser::xmlCdataMarker = L"<![CDATA[";
|
||||
const wchar_t* Parser::xmlRestartMarker = L"<![RESTART[";
|
||||
|
||||
|
||||
|
||||
Parser::Parser(Callback* callback, bool expandEntities)
|
||||
: callback(callback), expandEntities(expandEntities)
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Parser::reset()
|
||||
{
|
||||
elemStack.clear();
|
||||
elemName.clear();
|
||||
elemAttrs.clear();
|
||||
buffer.clear();
|
||||
elementDepth = 0;
|
||||
skipNextNewline = false;
|
||||
state = StateNone;
|
||||
restartCount = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Parser::streamRestart()
|
||||
{
|
||||
reset();
|
||||
state = StateRestartMarker;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Parser::feedChar(wchar_t ch)
|
||||
{
|
||||
#ifdef DEBUG_STATE_MACHINE
|
||||
std::wcout << L"Character: ``" << ch << L"'' (Unicode 0x"
|
||||
<< std::hex << ch << std::dec
|
||||
<< L"), state " << state << L".\n";
|
||||
#endif
|
||||
|
||||
#define ERROR(_reason) do { \
|
||||
state = StateError; \
|
||||
throw NotWellFormed(_reason); \
|
||||
}while(0)
|
||||
if(ch == xmlRestartMarker[restartCount]) {
|
||||
if(++restartCount == 11) {
|
||||
streamRestart();
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
restartCount = 0;
|
||||
}
|
||||
|
||||
TokenClass c;
|
||||
switch(ch) {
|
||||
case L'&':
|
||||
c = ClassEntity;
|
||||
break;
|
||||
|
||||
case L'<':
|
||||
c = ClassOpenTag;
|
||||
break;
|
||||
|
||||
case L'\r':
|
||||
skipNextNewline = true;
|
||||
ch = L'\n';
|
||||
c = ClassWhitespace;
|
||||
goto doBuffer;
|
||||
|
||||
case 0x2028:
|
||||
ch = L'\n';
|
||||
c = ClassWhitespace;
|
||||
break;
|
||||
|
||||
case L' ':
|
||||
case L'\t':
|
||||
c = ClassWhitespace;
|
||||
break;
|
||||
|
||||
case 0x85:
|
||||
case L'\n':
|
||||
if(skipNextNewline) return;
|
||||
ch = L'\n';
|
||||
c = ClassWhitespace;
|
||||
break;
|
||||
|
||||
case L':':
|
||||
case L'_':
|
||||
case L'a' ... L'z':
|
||||
case L'A' ... L'Z':
|
||||
case 0xC0 ... 0xD6:
|
||||
case 0xD8 ... 0xF6:
|
||||
case 0xF8 ... 0x2FF:
|
||||
case 0x370 ... 0x37D:
|
||||
case 0x37F ... 0x1FFF:
|
||||
case 0x200C ... 0x200D:
|
||||
case 0x2070 ... 0x218F:
|
||||
case 0x2C00 ... 0x2FEF:
|
||||
case 0x3001 ... 0xD7FF:
|
||||
case 0xF900 ... 0xFDCF:
|
||||
case 0xFDF0 ... 0xFFFD:
|
||||
case 0x10000 ... 0xEFFFF:
|
||||
c = ClassNameStartChar;
|
||||
break;
|
||||
|
||||
case L'-':
|
||||
case L'.':
|
||||
case L'0' ... L'9':
|
||||
case 0xB7:
|
||||
case 0x300 ... 0x36F:
|
||||
case 0x203F ... 0x2040:
|
||||
c = ClassNameChar;
|
||||
break;
|
||||
|
||||
default:
|
||||
if((ch >= 0x00 && ch <= 0x08) ||
|
||||
(ch >= 0x0B && ch <= 0x1F) ||
|
||||
(ch >= 0x7F && ch <= 0x9F)
|
||||
) {
|
||||
ERROR(L"Restricted character encountered.");
|
||||
}
|
||||
c = ClassOther;
|
||||
}
|
||||
|
||||
skipNextNewline = false;
|
||||
doBuffer:
|
||||
// deal with char appropriately, according to state
|
||||
switch(state) {
|
||||
case StateError:
|
||||
return;
|
||||
|
||||
case StateNone:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
buffer += ch;
|
||||
break;
|
||||
|
||||
case ClassOpenTag:
|
||||
if(!buffer.empty()) callback->whiteSpace(buffer);
|
||||
state = StateOpen;
|
||||
buffer.clear();
|
||||
break;
|
||||
|
||||
case ClassEntity:
|
||||
if(expandEntities) {
|
||||
if(!elementDepth) ERROR(L"Entities cannot appear at stream level.");
|
||||
if(!buffer.empty()) callback->whiteSpace(buffer);
|
||||
buffer.clear();
|
||||
parsingAttr = false;
|
||||
state = StateEntity;
|
||||
break;
|
||||
}
|
||||
|
||||
// fall through
|
||||
default:
|
||||
if(!elementDepth) ERROR(L"Content cannot appear at stream level.");
|
||||
if(!buffer.empty()) callback->whiteSpace(buffer);
|
||||
state = StateData;
|
||||
buffer = ch;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateData:
|
||||
switch(c) {
|
||||
case ClassOpenTag:
|
||||
callback->content(buffer);
|
||||
buffer.clear();
|
||||
state = StateOpen;
|
||||
break;
|
||||
|
||||
case ClassEntity:
|
||||
callback->content(buffer);
|
||||
buffer.clear();
|
||||
parsingAttr = false;
|
||||
state = StateEntity;
|
||||
break;
|
||||
|
||||
default:
|
||||
buffer += ch;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateCDATA:
|
||||
if(ch == L']') state = StateCDATA1;
|
||||
else buffer += ch;
|
||||
break;
|
||||
|
||||
case StateCDATA1:
|
||||
if(ch == L']') state = StateCDATA2;
|
||||
else {
|
||||
buffer += L']';
|
||||
buffer += ch;
|
||||
state = StateCDATA;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateCDATA2:
|
||||
if(ch == L'>') {
|
||||
callback->cdata(buffer);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
} else if(ch == L']') {
|
||||
buffer += ch;
|
||||
} else {
|
||||
buffer += L"]]";
|
||||
buffer += ch;
|
||||
state = StateCDATA;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateRestartMarker:
|
||||
if(ch == L']') state = StateRestartMarker1;
|
||||
else buffer += ch;
|
||||
break;
|
||||
|
||||
case StateRestartMarker1:
|
||||
if(ch == L']') state = StateRestartMarker2;
|
||||
else {
|
||||
buffer += L']';
|
||||
buffer += ch;
|
||||
state = StateRestartMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateRestartMarker2:
|
||||
if(ch == L'>') {
|
||||
callback->streamRestart(buffer);
|
||||
buffer.clear();
|
||||
reset();
|
||||
} else if(ch == L']') {
|
||||
buffer += L']';
|
||||
break;
|
||||
} else {
|
||||
buffer += L"]]";
|
||||
buffer += ch;
|
||||
state = StateRestartMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateOpen:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
state = StateElemName;
|
||||
elemAttrs.clear();
|
||||
buffer = ch;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch == L'!') state = StateOpenBang;
|
||||
else if(ch == L'?') {
|
||||
state = StatePI;
|
||||
buffer2.clear();
|
||||
} else if(ch == L'/') {
|
||||
if(!elementDepth) ERROR(L"Encountered a close tag at stream level.");
|
||||
state = StateClose;
|
||||
} else ERROR(L"Invalid start character for element.");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StatePI:
|
||||
if(ch == L'?') state = StatePI2;
|
||||
else if(c == ClassWhitespace) {
|
||||
state = StatePIData;
|
||||
buffer.clear();
|
||||
} else buffer2 += ch;
|
||||
break;
|
||||
|
||||
case StatePI2:
|
||||
if(ch != L'>') ERROR(L"Invalid target for PI");
|
||||
else {
|
||||
callback->PI(buffer2, L"");
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
}
|
||||
break;
|
||||
|
||||
case StatePIData:
|
||||
if(ch == L'?') state = StatePI3;
|
||||
else buffer += ch;
|
||||
break;
|
||||
|
||||
case StatePI3:
|
||||
if(ch == L'>') {
|
||||
callback->PI(buffer2, buffer);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
} else {
|
||||
buffer += L'?';
|
||||
buffer += ch;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateOpenBang:
|
||||
if(ch == L'[') {
|
||||
// restart markers handled by lower layer
|
||||
state = StateOpenCdataMarker;
|
||||
xmlCount = 3;
|
||||
if(!elementDepth) ERROR(L"CDATA sections not valid at stream level.");
|
||||
} else if(ch == L'-') state = StateOpenComment;
|
||||
else ERROR(L"Invalid special tag.");
|
||||
break;
|
||||
|
||||
case StateOpenCdataMarker:
|
||||
if(ch != xmlCdataMarker[xmlCount]) ERROR(L"Invalid marked section.");
|
||||
if(!xmlCdataMarker[++xmlCount]) state = StateCDATA;
|
||||
break;
|
||||
|
||||
case StateOpenComment:
|
||||
if(ch != L'-') ERROR(L"Invalid special tag.");
|
||||
state = StateComment;
|
||||
buffer.clear();
|
||||
break;
|
||||
|
||||
case StateComment:
|
||||
if(ch == L'-') state = StateComment2;
|
||||
else buffer += ch;
|
||||
break;
|
||||
|
||||
case StateComment2:
|
||||
if(ch == L'-') state = StateComment3;
|
||||
else {
|
||||
buffer += L'-';
|
||||
buffer += ch;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateComment3:
|
||||
if(ch != L'>') ERROR(L"`--' not valid in comments");
|
||||
callback->comment(buffer);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
break;
|
||||
|
||||
case StateElemName:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
state = StateElemTag;
|
||||
elemName = buffer;
|
||||
buffer.clear();
|
||||
break;
|
||||
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
buffer += ch;
|
||||
break;
|
||||
|
||||
default:
|
||||
switch(ch) {
|
||||
case L'>':
|
||||
elemStack.push_back(buffer);
|
||||
callback->element(buffer, elemAttrs);
|
||||
state = StateNone;
|
||||
++elementDepth;
|
||||
buffer.clear();
|
||||
break;
|
||||
|
||||
case L'/':
|
||||
state = StateNeedClose;
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR(L"Invalid character in tag name.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemTag:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
break;
|
||||
|
||||
case ClassNameStartChar:
|
||||
state = StateElemAttrName;
|
||||
buffer = ch;
|
||||
break;
|
||||
|
||||
default:
|
||||
switch(ch) {
|
||||
case L'>':
|
||||
elemStack.push_back(elemName);
|
||||
callback->element(elemName, elemAttrs);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
++elementDepth;
|
||||
break;
|
||||
|
||||
case L'/':
|
||||
state = StateNeedClose;
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR(L"Invalid character in tag.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemAttrName:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
buffer += ch;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch != L'=') ERROR(L"Invalid character in attribute name.");
|
||||
state = StateElemAttrEq;
|
||||
buffer2 = buffer;
|
||||
buffer.clear();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemAttrEq:
|
||||
if(ch == L'\'') singleQuote = true;
|
||||
else if(ch == L'"') singleQuote = false;
|
||||
else ERROR(L"Invalid character in attribute.");
|
||||
state = StateElemAttrVal;
|
||||
break;
|
||||
|
||||
case StateElemAttrVal:
|
||||
if((singleQuote && ch == L'\'') || (!singleQuote && ch == L'"')) {
|
||||
elemAttrs[buffer2] = buffer;
|
||||
buffer.clear();
|
||||
state = StateElemAttrDone;
|
||||
} else if(expandEntities && ch == L'&') {
|
||||
buffer3 = buffer;
|
||||
buffer.clear();
|
||||
parsingAttr = true;
|
||||
state = StateEntity;
|
||||
} else buffer += ch;
|
||||
break;
|
||||
|
||||
case StateElemAttrDone:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
state = StateElemTag;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch == L'/') {
|
||||
state = StateNeedClose;
|
||||
} else if(ch == L'>') {
|
||||
callback->element(elemName, elemAttrs);
|
||||
elemStack.push_back(elemName);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
++elementDepth;
|
||||
} else ERROR(L"Invalid character after attribute.");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateNeedClose:
|
||||
if(ch != L'>') ERROR(L"Stray `/' in open tag.");
|
||||
callback->element(elemName, elemAttrs);
|
||||
callback->closeTag(elemName);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
break;
|
||||
|
||||
case StateClose:
|
||||
if(c != ClassNameStartChar) ERROR(L"Invalid character in close tag name.");
|
||||
buffer = ch;
|
||||
state = StateClosing;
|
||||
break;
|
||||
|
||||
case StateClosing:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
buffer += ch;
|
||||
break;
|
||||
|
||||
case ClassWhitespace:
|
||||
state = StateNeedClose2;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch != L'>') ERROR(L"Invalid character in close tag name.");
|
||||
if(elemStack.back() != buffer) ERROR(L"Mismatched close tag.");
|
||||
elemStack.pop_back();
|
||||
callback->closeTag(buffer);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
--elementDepth;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateNeedClose2:
|
||||
if(c == ClassWhitespace) break;
|
||||
if(ch != L'>') ERROR(L"Invalid data in close tag.");
|
||||
if(elemStack.back() != elemName) ERROR(L"Mismatched close tag.");
|
||||
elemStack.pop_back();
|
||||
callback->closeTag(elemName);
|
||||
buffer.clear();
|
||||
state = StateNone;
|
||||
--elementDepth;
|
||||
break;
|
||||
|
||||
case StateEntity:
|
||||
if(ch == L'#') {
|
||||
state = StateCharEntity;
|
||||
entityChar = 0;
|
||||
} else if(c == ClassNameStartChar) {
|
||||
buffer = ch;
|
||||
state = StateEntityName;
|
||||
} else ERROR(L"Invalid entity name.");
|
||||
break;
|
||||
|
||||
case StateCharEntity:
|
||||
if(ch == ';') {
|
||||
if(parsingAttr) {
|
||||
buffer = buffer3 + entityChar;
|
||||
state = StateElemAttrVal;
|
||||
break;
|
||||
}
|
||||
|
||||
buffer = entityChar;
|
||||
state = StateData;
|
||||
break;
|
||||
|
||||
} else if(ch >= L'0' && ch <= L'9') {
|
||||
if(entityChar > 214748364 || (entityChar == 214748364 && ch >= L'8'))
|
||||
ERROR(L"Character code too large in character entity.");
|
||||
entityChar *= 10;
|
||||
entityChar += (ch - L'0');
|
||||
} else ERROR(L"Invalid character in character entity.");
|
||||
break;
|
||||
|
||||
case StateEntityName:
|
||||
if(ch == L';') {
|
||||
if(parsingAttr) {
|
||||
buffer = buffer3 + entityRef(buffer);
|
||||
state = StateElemAttrVal;
|
||||
break;
|
||||
}
|
||||
|
||||
buffer = entityRef(buffer);
|
||||
state = StateData;
|
||||
break;
|
||||
}
|
||||
if(c != ClassNameChar && c != ClassNameStartChar) ERROR(L"Invalid entity name.");
|
||||
buffer += ch;
|
||||
break;
|
||||
}
|
||||
#undef ERROR
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::wstring Parser::entityRef(const std::wstring& ent)
|
||||
{
|
||||
if(ent == L"quot") return L"\"";
|
||||
if(ent == L"amp") return L"&";
|
||||
if(ent == L"apos") return L"'";
|
||||
if(ent == L"lt") return L"<";
|
||||
if(ent == L"gt") return L">";
|
||||
return callback->entityRef(ent);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
/* libStreamedXML/src/lib/Core/Parser.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
namespace lsx {
|
||||
|
||||
|
||||
|
||||
/*! \brief Streamed XML parser object.
|
||||
|
||||
This object sits between the callback layer (see lsx::Callback) and the
|
||||
character decoding layer. It is a finite state machine which parses
|
||||
character data as input and gives Streamed XML events as output. The
|
||||
FSM design gives both speed and robustness. FSMs are good for dealing
|
||||
with true data streams since their behaviour can easily be controlled to
|
||||
avoid becoming unstable, and also because it is easy to provide "reset"
|
||||
functionality (as in the case of a stream restart marker).
|
||||
|
||||
Data must be fed into the parser through either of the feedChar() or
|
||||
feedCharData() functions.
|
||||
Any stream restarts detected by the character decoding layer can be
|
||||
signalled through the streamRestart() function. The parser will accept
|
||||
stream reset markers, but only if they occur at stream level; if the
|
||||
data stream became corrupted (e.g. the end of a comment tag was missed),
|
||||
the Parser class would not be looking for stream restart markers and
|
||||
would thus ignore them.
|
||||
|
||||
A parser is tied to a single lsx::Callback object, which cannot be
|
||||
changed. You could potentially get around this by providing a
|
||||
multiplexing Callback object. As many streams as you like may provide
|
||||
input to a Parser, but this would only make sense if you synchronised
|
||||
their inputs in some manner. Similarly, as many Parser objects as you
|
||||
like may share the same callback, but again some form of synchronisation
|
||||
would be needed.
|
||||
|
||||
Should an error occur in processing the XML (i.e. it is not well
|
||||
formed), a NotWellFormed exception will be thrown. It is also possible
|
||||
for a callback function to throw any exception, which will propagate
|
||||
through to the character decoding layer. If this occurs, the character
|
||||
decoding layer should switch into a state where it searches for a
|
||||
stream restart marker, and not pass any more data to the Parser until it
|
||||
encounters one. At this point, the streamRestart() function should be
|
||||
called and processing can continue.
|
||||
|
||||
*/
|
||||
class Parser {
|
||||
private:
|
||||
//BEGIN // Enumerations for finite state machine ///////////////////
|
||||
enum State {
|
||||
StateNone, // at element or stream level
|
||||
StateRestartMarker, // after <![RESTART[
|
||||
StateRestartMarker1, // after first ]
|
||||
StateRestartMarker2, // after second ]
|
||||
StateOpen, // after <
|
||||
StateOpenBang, // after <!
|
||||
StateOpenCdataMarker, // after <![
|
||||
StateCDATA,
|
||||
StateCDATA1, // first ]
|
||||
StateCDATA2, // second ]
|
||||
StateData, // like StateNone, but don't skip whitespace
|
||||
StateOpenComment, // after <!-
|
||||
StateComment,
|
||||
StateComment2,
|
||||
StateComment3,
|
||||
StatePI, // after <? (we are currently parsing the target)
|
||||
StatePIData, // after target
|
||||
StatePI2, // first ? encountered (in target)
|
||||
StatePI3, // first ? encountered (in data)
|
||||
StateClose, // "</" encountered
|
||||
StateClosing, // "</x"
|
||||
StateNeedClose2, // "</xyz "
|
||||
StateElemName, // <x encountered
|
||||
StateElemTag, // first space after "<name" encountered
|
||||
StateElemAttrName,
|
||||
StateElemAttrEq, // name=
|
||||
StateElemAttrVal, // name=' or name="
|
||||
StateElemAttrDone, // name='value'
|
||||
StateNeedClose, // <elem/
|
||||
StateEntity, // &
|
||||
StateCharEntity, // &#
|
||||
StateEntityName, // &x
|
||||
StateError
|
||||
};
|
||||
|
||||
|
||||
|
||||
enum TokenClass {
|
||||
ClassEntity,
|
||||
ClassNameChar,
|
||||
ClassNameStartChar,
|
||||
ClassOpenTag,
|
||||
ClassWhitespace,
|
||||
ClassOther
|
||||
};
|
||||
//END // Enumerations //////////////////////////////////////////////
|
||||
|
||||
static const wchar_t* xmlCdataMarker;
|
||||
static const wchar_t* xmlRestartMarker;
|
||||
|
||||
// the callback object to use
|
||||
Callback* callback;
|
||||
|
||||
// entity parsing
|
||||
std::wstring entityRef(const std::wstring& ent);
|
||||
bool expandEntities;
|
||||
bool parsingAttr;
|
||||
|
||||
// used for parsing elements
|
||||
std::vector<std::wstring> elemStack;
|
||||
std::wstring elemName;
|
||||
std::map<std::wstring, std::wstring> elemAttrs;
|
||||
|
||||
// parsing state
|
||||
State state;
|
||||
std::wstring buffer, buffer2, buffer3;
|
||||
wchar_t entityChar;
|
||||
bool singleQuote;
|
||||
bool skipNextNewline;
|
||||
int elementDepth, xmlCount, restartCount;
|
||||
|
||||
public:
|
||||
/*! \brief Constructor.
|
||||
|
||||
\param callback The callback object to use.
|
||||
\param expandEntities \a true if you want entities to be expanded,
|
||||
\a false to keep them inline.
|
||||
|
||||
This sets up the finite state machine and records the callback
|
||||
object to use. It doesn't generate any events.
|
||||
|
||||
*/
|
||||
Parser(Callback* callback, bool expandEntities);
|
||||
|
||||
|
||||
|
||||
/*! \brief Feed parser a character.
|
||||
|
||||
\param ch The character to parse.
|
||||
\throws NotWellFormed If the Streamed XML is not well formed.
|
||||
\throws (anything) Anything from the callback object.
|
||||
|
||||
This function will cause the finite state machine to process an
|
||||
input character. It may generate some callback events. It will
|
||||
throw a NotWellFormed exception should it encounter any Streamed XML
|
||||
structure that is not well formed; it will also propagate any
|
||||
exceptions thrown from the higher callback layer.
|
||||
|
||||
*/
|
||||
void feedChar(wchar_t ch);
|
||||
|
||||
|
||||
|
||||
/*! \brief Feed parser some character data.
|
||||
|
||||
\param data A pointer to the buffer of data.
|
||||
\param amount The number of characters to parse (may be zero).
|
||||
\throws NotWellFormed If the Streamed XML is not well formed.
|
||||
\throws (anything) Anything from the callback object.
|
||||
|
||||
This function will cause the finite state machine to process some
|
||||
input characters. It may generate some callback events. It will
|
||||
throw a NotWellFormed exception should it encounter any Streamed XML
|
||||
structure that is not well formed; it will also propagate any
|
||||
exceptions thrown from the higher callback layer.
|
||||
|
||||
*/
|
||||
inline void feedCharData(const wchar_t* data, size_t amount)
|
||||
{
|
||||
for(size_t pos = 0; pos < amount; ++pos) feedChar(data[pos]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Feed parser some character data.
|
||||
|
||||
\param dataStr The data, as a string.
|
||||
\throws NotWellFormed If the Streamed XML is not well formed.
|
||||
\throws (anything) Anything from the callback object.
|
||||
|
||||
This function will cause the finite state machine to process some
|
||||
input characters. It may generate some callback events. It will
|
||||
throw a NotWellFormed exception should it encounter any Streamed XML
|
||||
structure that is not well formed; it will also propagate any
|
||||
exceptions thrown from the higher callback layer.
|
||||
|
||||
*/
|
||||
inline void feedCharData(const std::wstring& dataStr)
|
||||
{
|
||||
std::wstring::size_type pos = 0, len = dataStr.size();
|
||||
for(; pos < len; ++pos) feedChar(dataStr[pos]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief Reset the stream.
|
||||
|
||||
This function should be called to reset the stream. This could be due to e.g. a new file or a
|
||||
restart marker.
|
||||
|
||||
*/
|
||||
void reset();
|
||||
|
||||
|
||||
|
||||
/*! \brief Restart marker detected.
|
||||
|
||||
This function should be called when a stream restart marker (
|
||||
\c <![RESTART[ ) has been detected. The next character of
|
||||
data to be passed to the parser should be the first character of
|
||||
restart marker data (or a \c ']', if there is no data).
|
||||
|
||||
This function will cause the parser to generate a streamRestart()
|
||||
callback.
|
||||
|
||||
*/
|
||||
void streamRestart();
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/* libStreamedXML/src/lib/TopHeader.h
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#ifndef HEADER_libStreamedXML
|
||||
#define HEADER_libStreamedXML
|
||||
|
||||
// standard includes, or includes needed for type declarations
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <utf8>
|
||||
#include <stdint.h>
|
|
@ -0,0 +1,10 @@
|
|||
/* libStreamedXML/src/lib/TopSource.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#include "StreamedXML"
|
||||
|
||||
// Below are all the includes used throughout the library.
|
||||
|
|
@ -0,0 +1 @@
|
|||
source src/libStreamedXML/build.lib
|
|
@ -0,0 +1 @@
|
|||
source src/libStreamedXML/build.install-lib
|
|
@ -0,0 +1,36 @@
|
|||
build_target libStreamedXML
|
||||
|
||||
# make paths (this is for Gentoo in particular)
|
||||
build_dir_tree "${LIBDIR}" || return 1
|
||||
build_dir_tree "${PKGCONFDIR}" || return 1
|
||||
build_dir_tree "${INCLUDEDIR}" || return 1
|
||||
|
||||
# install library
|
||||
echo "Installing libraries into '${LIBDIR}'"
|
||||
install_file ${libStreamedXML} ${LIBDIR} 0755 || return 1
|
||||
BASE="${libStreamedXML_BASE}.so"
|
||||
MAJOR="${BASE}.${SOMAJOR}"
|
||||
MINOR="${MAJOR}.${SOMINOR}"
|
||||
MICRO="${MINOR}.${SOMICRO}"
|
||||
install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}"
|
||||
install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}"
|
||||
install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}"
|
||||
|
||||
# install header
|
||||
echo "Installing header file '${libStreamedXML_HEADER}' into ${INCLUDEDIR}"
|
||||
install_header ${libStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1
|
||||
|
||||
# install pkgconfig file
|
||||
echo "Installing package config file into ${PKGCONFDIR}"
|
||||
PKGCONFFILE=${PKGCONFDIR}/libStreamedXML.pc
|
||||
do_cmd rm -f ${PKGCONFFILE}
|
||||
do_cmd_redir ${PKGCONFFILE} sed \
|
||||
-e "s,@VERSION@,${VERSION}," \
|
||||
-e "s,@LIBDIR@,${FINALLIBDIR}," \
|
||||
-e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \
|
||||
src/libStreamedXML/pkgconf.in
|
||||
do_cmd chmod 0644 ${PKGCONFFILE}
|
||||
print_success "Done"
|
||||
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,51 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# libStreamedXML
|
||||
# libStreamedXML_BUILT
|
||||
# libStreamedXML_HEADER
|
||||
# libStreamedXML_BASE
|
||||
|
||||
if [ -z ${libStreamedXML_BUILT} ]
|
||||
then
|
||||
libStreamedXML_BASE=libStreamedXML
|
||||
source src/libStreamedXML/soversion
|
||||
|
||||
libStreamedXML="obj/${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}"
|
||||
SO_EXTRA="$(pkg-config libutf8++ --libs --cflags) -lstdc++ -lc"
|
||||
|
||||
echo "Building library ${libStreamedXML}..."
|
||||
|
||||
do_cmd source src/libStreamedXML/build.monolithic || return 1
|
||||
|
||||
MODIFIED=0
|
||||
for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC}
|
||||
do
|
||||
if [ ${test} -nt ${libStreamedXML} ]
|
||||
then
|
||||
MODIFIED=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${MODIFIED} -ne 0 ]
|
||||
then
|
||||
echo " Compiling"
|
||||
|
||||
SONAME="${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}"
|
||||
do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libStreamedXML}" \
|
||||
-Wl,-soname,${SONAME} \
|
||||
${SRC} ${SO_EXTRA} || return 1
|
||||
|
||||
# make tests work
|
||||
do_cmd ln -sf $(basename ${libStreamedXML}) obj/${SONAME} || return 1
|
||||
|
||||
print_success "Library built"
|
||||
else
|
||||
print_success "Library up to date"
|
||||
fi
|
||||
|
||||
libStreamedXML_BUILT=1
|
||||
libStreamedXML_HEADER=${HDR}
|
||||
|
||||
fi
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,21 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# libStreamedXML_MONOLITHIC
|
||||
|
||||
SRC="obj/libStreamedXML.cpp"
|
||||
HDR="obj/StreamedXML"
|
||||
|
||||
MONOLITHIC_TESTS="src/libStreamedXML/build.lib src/libStreamedXML/build.monolithic"
|
||||
|
||||
if [ -z "${libStreamedXML_MONOLITHIC}" ]
|
||||
then
|
||||
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopHeader,ForwardDeclare,Exceptions,Callback,Parser,Decoder,BottomHeader}.h)"
|
||||
make_monolithic ${HDR} C || return 1
|
||||
|
||||
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopSource,Exceptions,Parser,Decoder}.cpp)"
|
||||
make_monolithic ${SRC} C || return 1
|
||||
|
||||
libStreamedXML_MONOLITHIC=1
|
||||
MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}"
|
||||
fi
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,21 @@
|
|||
# libStreamedXML/src/lib/libStreamedXML/pkgconf.in
|
||||
#
|
||||
# Metadata file for pkg-config
|
||||
# ( http://www.freedesktop.org/software/pkgconfig/ )
|
||||
#
|
||||
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
# Released under the GNU GPLv2. See file COPYING or
|
||||
# http://www.gnu.org/copyleft/gpl.html for details.
|
||||
#
|
||||
|
||||
# Name, description
|
||||
Name: libStreamedXML
|
||||
Description: C++ Streamed XML parser
|
||||
Version: @VERSION@
|
||||
|
||||
# Requirements
|
||||
Requires: libutf8++
|
||||
|
||||
# Compilation information
|
||||
Libs: -L@LIBDIR@ -lStreamedXML
|
||||
Cflags: -I@INCLUDEDIR@
|
|
@ -0,0 +1,17 @@
|
|||
# libStreamedXML/src/libStreamedXML/soversion
|
||||
#
|
||||
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
# Released under the GNU GPLv2. See file COPYING or
|
||||
# http://www.gnu.org/copyleft/gpl.html for details.
|
||||
#
|
||||
|
||||
|
||||
|
||||
# SOMAJOR and SOMINOR are included in the library's soname. They need to
|
||||
# be bumped on a binary-incompatible release. They are both single
|
||||
# integers.
|
||||
SOMAJOR=0
|
||||
SOMINOR=0
|
||||
|
||||
# SOMICRO is bumped every time there is a binary-compatible release.
|
||||
SOMICRO=0
|
|
@ -0,0 +1 @@
|
|||
c++ tests tests libStreamedXML
|
|
@ -0,0 +1,110 @@
|
|||
/* libStreamedXML/src/tests/Callback.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#include "StreamedXML"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
|
||||
class PrintCallback : public lsx::Callback {
|
||||
public:
|
||||
virtual void comment(const std::wstring& data)
|
||||
{
|
||||
std::wcout << L"Callback: comment: ``" << data << L"''.\n";
|
||||
}
|
||||
|
||||
virtual void PI(const std::wstring& target, const std::wstring& data)
|
||||
{
|
||||
std::wcout << L"Callback: PI: ``" << target << L"'': ``"
|
||||
<< data << L"''.\n";
|
||||
}
|
||||
|
||||
virtual void element(const std::wstring& elemName,
|
||||
const std::map<std::wstring, std::wstring>& attrs)
|
||||
{
|
||||
std::wcout << L"Callback: element: ``" << elemName << "''\n";
|
||||
for(std::map<std::wstring, std::wstring>::const_iterator i = attrs.begin(),
|
||||
end = attrs.end(); i != end; ++i)
|
||||
{
|
||||
std::wcout << L"Callback: attribute: "
|
||||
<< i->first << L"=``" << i->second << L"''\n";
|
||||
}
|
||||
std::wcout << L"Callback: end of attributes.\n";
|
||||
}
|
||||
|
||||
virtual void closeTag(const std::wstring& elemName)
|
||||
{
|
||||
std::wcout << L"Callback: close element: ``" << elemName << "''.\n";
|
||||
}
|
||||
|
||||
virtual void whiteSpace(const std::wstring& space)
|
||||
{
|
||||
(void)space;
|
||||
std::wcout << L"Callback: whitespace.\n";
|
||||
}
|
||||
|
||||
virtual void content(const std::wstring& data)
|
||||
{
|
||||
std::wcout << L"Callback: content: ``" << data << L"''.\n";
|
||||
}
|
||||
|
||||
virtual void cdata(const std::wstring& data)
|
||||
{
|
||||
std::wcout << L"Callback: CDATA: ``" << data << "''.\n";
|
||||
}
|
||||
|
||||
virtual std::wstring entityRef(const std::wstring& name)
|
||||
{
|
||||
std::wcout << L"Callback: entity: ``" << name
|
||||
<< L"'' (returning as data).\n";
|
||||
return name;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
//END // Parser class //////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
std::wcout << L"Tests the callback functions.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc != 2) {
|
||||
std::wcerr << L"Expecting name of XML file.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::ifstream fin(argv[1]);
|
||||
if(!fin) {
|
||||
std::wcerr << L"Couldn't open file for input.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
try {
|
||||
PrintCallback printCallback;
|
||||
lsx::Parser xmlParser(&printCallback, true);
|
||||
lsx::Decoder xmlDecoder(&xmlParser);
|
||||
char data[1024];
|
||||
|
||||
while(!fin.eof()) {
|
||||
fin.read(data, sizeof(data));
|
||||
xmlDecoder.feedData(data, fin.gcount());
|
||||
}
|
||||
}
|
||||
catch(std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,479 @@
|
|||
/* libStreamedXML/src/tests/Structure.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#include "StreamedXML"
|
||||
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
|
||||
|
||||
|
||||
class TestFailed { };
|
||||
|
||||
|
||||
|
||||
//BEGIN // Test results ////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
#define DYN_CAST_CHECK(_type, _name) \
|
||||
TestResult* r = getExpected( _name ); \
|
||||
_type * rr = dynamic_cast< _type *>(r); \
|
||||
if(!rr) { \
|
||||
std::wcerr << L"Expected " << r->name() << L", got " _name L".\n"; \
|
||||
delete r; \
|
||||
throw TestFailed(); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
class TestResult {
|
||||
public:
|
||||
virtual ~TestResult() { }
|
||||
virtual std::wstring name() const = 0;
|
||||
};
|
||||
|
||||
class TestResultException : public TestResult {
|
||||
public:
|
||||
virtual std::wstring name() const { return L"exception"; }
|
||||
};
|
||||
|
||||
class TestResultStreamRestart : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultStreamRestart(const std::wstring& data) : data(data) { }
|
||||
bool verify(const std::wstring& data) const { return data == this->data; }
|
||||
virtual std::wstring name() const { return L"stream restart marker"; }
|
||||
};
|
||||
|
||||
class TestResultComment : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultComment(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"comment"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultCloseTag : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultCloseTag(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"close tag"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultContent : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultContent(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"content"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultWhiteSpace : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultWhiteSpace(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"whitespace"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultCDATA : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultCDATA(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"CDATA marker"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultEntityRef : public TestResult {
|
||||
public:
|
||||
std::wstring data;
|
||||
TestResultEntityRef(const std::wstring& data) : data(data) { }
|
||||
virtual std::wstring name() const { return L"entity reference"; }
|
||||
|
||||
void verify(const std::wstring& data) const
|
||||
{
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultPI : public TestResult {
|
||||
public:
|
||||
std::wstring target, data;
|
||||
TestResultPI(const std::wstring& target, const std::wstring& data)
|
||||
: target(target), data(data) { }
|
||||
virtual std::wstring name() const { return L"processing instruction"; }
|
||||
|
||||
void verify(const std::wstring& target, const std::wstring& data) const
|
||||
{
|
||||
if(target != this->target) {
|
||||
std::wcerr << name() << L": expecting ``" << this->target
|
||||
<< L"'', got ``" << target << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
if(data != this->data) {
|
||||
std::wcerr << name() << L": expecting ``" << this->data
|
||||
<< L"'', got ``" << data << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class TestResultElement : public TestResult {
|
||||
public:
|
||||
std::wstring ename;
|
||||
std::map<std::wstring, std::wstring> attrs;
|
||||
TestResultElement(const std::wstring& ename) : ename(ename) { }
|
||||
virtual std::wstring name() const { return L"open tag"; }
|
||||
|
||||
void verify(const std::wstring& xname, const std::map<std::wstring, std::wstring>& xattrs)
|
||||
{
|
||||
if(ename != xname) {
|
||||
std::wcerr << name() << L": expecting ``" << ename
|
||||
<< L"'', got ``" << xname << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
std::map<std::wstring, std::wstring>::const_iterator i1, i2, end1, end2;
|
||||
i1 = attrs.begin(); end1 = attrs.end();
|
||||
i2 = xattrs.begin(); end2 = xattrs.end();
|
||||
|
||||
for(; (i1 != end1) && (i2 != end2); ++i1, ++i2) {
|
||||
if(i1->first != i2->first) {
|
||||
std::wcerr << name() << L": attribute name: expecting ``" << i1->first
|
||||
<< L"'', got ``" << i2->first << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
if(i1->second != i2->second) {
|
||||
std::wcerr << name() << L": attribute value: expecting ``" << i1->second
|
||||
<< L"'', got ``" << i2->second << L"''.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
|
||||
if((i1 != end1) || (i2 != end2)) {
|
||||
std::wcerr << name() << L": attributes don't match.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
//END // Test results //////////////////////////////////////////////////
|
||||
//BEGIN // Test parser /////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
class TestParser : private lsx::Callback {
|
||||
private:
|
||||
std::list<TestResult*> expected;
|
||||
TestResult* getExpected(const std::wstring& dataType)
|
||||
{
|
||||
if(expected.empty()) {
|
||||
std::wcerr << L"Further data (" << dataType
|
||||
<< L") encountered after end of test.\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
TestResult* r = expected.front();
|
||||
expected.pop_front();
|
||||
return r;
|
||||
}
|
||||
|
||||
virtual void streamRestart(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultStreamRestart, L"stream restart marker")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual void comment(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultComment, L"comment")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual void closeTag(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultCloseTag, L"close tag")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual void whiteSpace(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultWhiteSpace, L"whitespace")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual void content(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultContent, L"Content")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual void cdata(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultCDATA, L"CDATA marker")
|
||||
rr->verify(data);
|
||||
}
|
||||
|
||||
virtual std::wstring entityRef(const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultEntityRef, L"entity reference")
|
||||
rr->verify(data);
|
||||
return data;
|
||||
}
|
||||
|
||||
virtual void PI(const std::wstring& target, const std::wstring& data)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultPI, L"processing instruction")
|
||||
rr->verify(target, data);
|
||||
}
|
||||
|
||||
virtual void element(const std::wstring& name, const std::map<std::wstring, std::wstring>& attrs)
|
||||
{
|
||||
DYN_CAST_CHECK(TestResultElement, L"open tag")
|
||||
rr->verify(name, attrs);
|
||||
}
|
||||
|
||||
// XML parser objects
|
||||
lsx::Parser xmlParser;
|
||||
lsx::Decoder xmlDecoder;
|
||||
|
||||
public:
|
||||
TestParser()
|
||||
: xmlParser(this, true), xmlDecoder(&xmlParser)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~TestParser()
|
||||
{
|
||||
while(!expected.empty()) {
|
||||
delete expected.front();
|
||||
expected.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
void finish()
|
||||
{
|
||||
if(!expected.empty()) {
|
||||
std::wcerr << L"Expecting some more data ("
|
||||
<< expected.front()->name()
|
||||
<< L").\n";
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
|
||||
void expect(TestResult* r)
|
||||
{
|
||||
expected.push_back(r);
|
||||
}
|
||||
|
||||
virtual void feedData(const char* bytes, size_t amt)
|
||||
{
|
||||
while(amt) {
|
||||
try {
|
||||
xmlDecoder.feedData(bytes, std::min(amt, size_t(6)));
|
||||
}
|
||||
catch(lsx::Exception& e) {
|
||||
DYN_CAST_CHECK(TestResultException, L"exception")
|
||||
delete r;
|
||||
}
|
||||
amt -= std::min(amt, size_t(6));
|
||||
bytes += 6;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
//END // Test parser ///////////////////////////////////////////////////
|
||||
//BEGIN // Tests ///////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
void test1(TestParser* p)
|
||||
{
|
||||
std::wcout << L"Test 1: " << std::flush;
|
||||
|
||||
p->expect(new TestResultPI(L"targ", L" data data "));
|
||||
p->expect(new TestResultWhiteSpace(L"\n"));
|
||||
p->expect(new TestResultComment(L" comment "));
|
||||
p->expect(new TestResultWhiteSpace(L"\n"));
|
||||
p->expect(new TestResultElement(L"elemOne"));
|
||||
p->expect(new TestResultWhiteSpace(L" "));
|
||||
p->expect(new TestResultContent(L"with added content\n"));
|
||||
p->expect(new TestResultException());
|
||||
p->expect(new TestResultStreamRestart(L"data]>]]"));
|
||||
p->expect(new TestResultWhiteSpace(L"\n"));
|
||||
p->expect(new TestResultElement(L"elemTwo"));
|
||||
p->expect(new TestResultWhiteSpace(L"\n"));
|
||||
p->expect(new TestResultCloseTag(L"elemTwo"));
|
||||
|
||||
std::string td = "<?targ data data ?>\n"
|
||||
"<!-- comment -->\n"
|
||||
"<elemOne> with added content\n"
|
||||
"<![RESTART[data]>]]]]>\n"
|
||||
"<elemTwo>\n"
|
||||
"</elemTwo>\n";
|
||||
p->feedData(td.data(), td.size());
|
||||
p->finish();
|
||||
|
||||
std::wcout << L"OK.\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test2(TestParser* p)
|
||||
{
|
||||
std::wcout << L"Test 2: " << std::flush;
|
||||
|
||||
p->expect(new TestResultElement(L"reading"));
|
||||
p->expect(new TestResultElement(L"type"));
|
||||
p->expect(new TestResultContent(L"integer"));
|
||||
p->expect(new TestResultCloseTag(L"type"));
|
||||
p->expect(new TestResultElement(L"value"));
|
||||
p->expect(new TestResultContent(L"0"));
|
||||
p->expect(new TestResultCloseTag(L"value"));
|
||||
p->expect(new TestResultCloseTag(L"reading"));
|
||||
p->expect(new TestResultElement(L"reading"));
|
||||
p->expect(new TestResultElement(L"type"));
|
||||
p->expect(new TestResultContent(L"integer"));
|
||||
p->expect(new TestResultCloseTag(L"type"));
|
||||
p->expect(new TestResultElement(L"value"));
|
||||
p->expect(new TestResultContent(L"1"));
|
||||
p->expect(new TestResultCloseTag(L"value"));
|
||||
p->expect(new TestResultCloseTag(L"reading"));
|
||||
|
||||
std::string td = "<reading><type>integer</type><value>0</value></reading>"
|
||||
"<reading><type>integer</type><value>1</value></reading>";
|
||||
p->feedData(td.data(), td.size());
|
||||
p->finish();
|
||||
|
||||
std::wcout << L"OK.\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test3(TestParser* p)
|
||||
{
|
||||
std::wcout << L"Test 3: " << std::flush;
|
||||
|
||||
TestResultElement* el = new TestResultElement(L"elemName");
|
||||
el->attrs[L"attr1"] = L"\"value1\"";
|
||||
el->attrs[L"attr2"] = L"'value2'";
|
||||
el->attrs[L"attr3"] = L"\"<&>'";
|
||||
p->expect(el);
|
||||
|
||||
p->expect(new TestResultContent(L"<"));
|
||||
p->expect(new TestResultContent(L"&"));
|
||||
p->expect(new TestResultContent(L">"));
|
||||
p->expect(new TestResultEntityRef(L"myEntity"));
|
||||
p->expect(new TestResultContent(L"myEntity\n "));
|
||||
p->expect(new TestResultCDATA(L"<&>]]]]"));
|
||||
p->expect(new TestResultWhiteSpace(L"\n"));
|
||||
p->expect(new TestResultCloseTag(L"elemName"));
|
||||
|
||||
std::string td = "<elemName attr1='\"value1\"' attr2=\"'value2'\"\n"
|
||||
" attr3='"<&>''\n"
|
||||
" ><&>&myEntity;\n"
|
||||
" <![CDATA[<&>]]]]]]>\n"
|
||||
"</elemName >\n";
|
||||
p->feedData(td.data(), td.size());
|
||||
p->finish();
|
||||
|
||||
std::wcout << L"OK.\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
//END // Tests /////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
std::wcout << L"XML structure regression test.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc != 1) {
|
||||
std::wcerr << L"No arguments expected.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
TestParser* p = 0;
|
||||
|
||||
#define TEST_n(_n) \
|
||||
try { \
|
||||
std::wcout << "======== Test " #_n " ========" << std::endl; \
|
||||
p = new TestParser(); \
|
||||
test ## _n (p); \
|
||||
delete p; \
|
||||
p = 0; \
|
||||
} \
|
||||
catch(TestFailed&) { \
|
||||
std::wcerr << "Test failed." << std::endl; \
|
||||
ret = 1; \
|
||||
} \
|
||||
catch(std::exception& e) { \
|
||||
std::cerr << e.what() << std::endl; \
|
||||
ret = 1; \
|
||||
} \
|
||||
std::wcout << std::endl;
|
||||
|
||||
TEST_n(1);
|
||||
TEST_n(2);
|
||||
TEST_n(3);
|
||||
|
||||
delete p;
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
/* libStreamedXML/src/tests/UTF-8.cpp
|
||||
*
|
||||
* (c)2005, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
#include "StreamedXML"
|
||||
#include <iostream>
|
||||
#include <utf8>
|
||||
|
||||
|
||||
|
||||
class TestFailed { };
|
||||
|
||||
|
||||
|
||||
class MySimpleParser : private lsx::Callback {
|
||||
private:
|
||||
lsx::Parser xmlParser;
|
||||
lsx::Decoder xmlDecoder;
|
||||
|
||||
public:
|
||||
MySimpleParser()
|
||||
: xmlParser(this, true), xmlDecoder(&xmlParser)
|
||||
{
|
||||
}
|
||||
|
||||
void feedBadData(const char* bytes, size_t amount)
|
||||
{
|
||||
try {
|
||||
xmlDecoder.feedData(bytes, amount);
|
||||
}
|
||||
catch(utf8::Error&) {
|
||||
return;
|
||||
}
|
||||
|
||||
throw TestFailed();
|
||||
}
|
||||
|
||||
virtual void feedData(const char* bytes, size_t amount)
|
||||
{
|
||||
try {
|
||||
xmlDecoder.feedData(bytes, amount);
|
||||
}
|
||||
catch(std::exception& e) {
|
||||
std::wcerr << e.what() << std::endl;
|
||||
throw TestFailed();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
char restartMarker[] = "<![RESTART[[]]>";
|
||||
char okData[] = "<element>"
|
||||
"\x41\xE2\x89\xA2\xCE\x91\x2E"
|
||||
"\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4"
|
||||
"\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E";
|
||||
|
||||
char badDataOverlong[] = "<overlong>"
|
||||
"\xC0\xB0"; // ascii '0', overlong
|
||||
char badDataMalformed0[] = "<malformed>"
|
||||
"\xFF"; // never appears
|
||||
char badDataMalformed2[] = "<malformed>"
|
||||
"\x8F"; // lone continuation char
|
||||
char badDataMalformed3[] = "<malformed>"
|
||||
"\xC1\x3F"; // missing continuation char
|
||||
|
||||
struct TestData {
|
||||
wchar_t* desc;
|
||||
char* str;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
#define P(D, Q) { D, Q, sizeof( Q ) - 1 }
|
||||
TestData testData[] = {
|
||||
P(L"Overlong character encoding", badDataOverlong),
|
||||
P(L"Illegal octet (0xFF)", badDataMalformed0),
|
||||
P(L"Lone continuation char", badDataMalformed2),
|
||||
P(L"Missing continuation char", badDataMalformed3),
|
||||
{ 0, 0, 0 }
|
||||
};
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
std::wcout << L"Regression test for the UTF-8 decoder.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc != 1) {
|
||||
std::wcout << L"No arguments expected.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
try {
|
||||
MySimpleParser p;
|
||||
|
||||
std::wcout << L"Checking OK data: " << std::flush;
|
||||
p.feedData(okData, sizeof(okData) - 1);
|
||||
std::wcout << L"success" << std::endl;
|
||||
|
||||
for(TestData* test = testData; test->desc; ++test) {
|
||||
std::wcout << L"Checking bad data (" << test->desc
|
||||
<< L"): " << std::flush;
|
||||
p.feedBadData(test->str, test->len);
|
||||
std::wcout << "success" << std::endl;
|
||||
p.feedData(restartMarker, sizeof(restartMarker) - 1);
|
||||
}
|
||||
}
|
||||
catch(TestFailed&) {
|
||||
std::wcerr << L"Test failed.\n";
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
source src/tests/build.tests
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,43 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# tests_BUILT
|
||||
#
|
||||
|
||||
build_target libStreamedXML || return 1
|
||||
|
||||
if [ -z ${tests_BUILT} ]
|
||||
then
|
||||
LIBS="${libStreamedXML} "
|
||||
EXTRAS=""
|
||||
|
||||
echo "Building test programs..."
|
||||
do_cmd mkdir -p obj/tests || return 1
|
||||
|
||||
for SRC in src/tests/*.cpp
|
||||
do
|
||||
TEST="obj/tests/$(basename ${SRC} | sed -e 's,.cpp$,,')"
|
||||
MODIFIED=0
|
||||
for file in ${LIBS} ${SRC} src/tests/build.tests
|
||||
do
|
||||
if [ ${file} -nt ${TEST} ]
|
||||
then
|
||||
MODIFIED=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${MODIFIED} -ne 0 ]
|
||||
then
|
||||
do_cmd ${CXX} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1
|
||||
print_success "Built ${TEST}"
|
||||
else
|
||||
print_success "${TEST} is up to date"
|
||||
fi
|
||||
done
|
||||
|
||||
print_success "All tests built"
|
||||
|
||||
tests_BUILT=1
|
||||
fi
|
||||
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,44 @@
|
|||
/* libStreamedXML/src/tests/???.cpp
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#include "StreamedXML"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
std::cout << "One line summary.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc == 1) {
|
||||
// empty argument list
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
try {
|
||||
// TODO
|
||||
}
|
||||
catch(std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
catch(...) {
|
||||
std::cerr << "Unknown exception caught." << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
Loading…
Reference in New Issue