diff --git a/src/ctests/.params b/src/ctests/.params new file mode 100644 index 0000000..5dcd0de --- /dev/null +++ b/src/ctests/.params @@ -0,0 +1 @@ +c tests ctests libCStreamedXML diff --git a/src/ctests/build.default b/src/ctests/build.default new file mode 100644 index 0000000..183fd69 --- /dev/null +++ b/src/ctests/build.default @@ -0,0 +1,3 @@ +source src/ctests/build.tests +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/ctests/build.tests b/src/ctests/build.tests new file mode 100644 index 0000000..ebf1a82 --- /dev/null +++ b/src/ctests/build.tests @@ -0,0 +1,43 @@ +# These are external variables, and shouldn't clash with anything else +# ctests_BUILT +# + +build_target libCStreamedXML || return 1 + +if [ -z ${ctests_BUILT} ] +then + LIBS="${libCStreamedXML} " + EXTRAS="" + + echo "Building test programs..." + do_cmd mkdir -p obj/tests || return 1 + + for SRC in src/ctests/*.c + do + TEST="obj/tests/$(basename ${SRC} | sed -e 's,.c$,,')" + MODIFIED=0 + for file in ${LIBS} ${SRC} src/ctests/build.tests + do + if [ ${file} -nt ${TEST} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + do_cmd ${CC} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1 + print_success "Built ${TEST}" + else + print_success "${TEST} is up to date" + fi + done + + print_success "All tests built" + + ctests_BUILT=1 +fi + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/ctests/cCallback.c b/src/ctests/cCallback.c new file mode 100644 index 0000000..0540df3 --- /dev/null +++ b/src/ctests/cCallback.c @@ -0,0 +1,137 @@ +/* libStreamedXML/src/ctests/cCallback.c + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#include "StreamedXML.h" + +#include +#include + + + +int echo_whiteSpace(const struct csxml* ctx, const char* ws) +{ + (void)ctx; + printf("Whitespace: ``%s''\n", ws); + return 0; +} + +int echo_content(const struct csxml* ctx, const char* data) +{ + (void)ctx; + printf("Content: ``%s''\n", data); + return 0; +} + +int echo_cdata(const struct csxml* ctx, const char* data) +{ + (void)ctx; + printf("CDATA: ``%s''\n", data); + return 0; +} + +int echo_streamRestart(const struct csxml* ctx, const char* data) +{ + (void)ctx; + printf("Stream restart marker: ``%s''\n", data); + return 0; +} + +int echo_PI(const struct csxml* ctx, const char* target, const char* data) +{ + (void)ctx; + printf("PI: target ``%s'', data ", target); + if(data) printf("``%s''\n", data); + else printf("not specified\n"); + return 0; +} + +int echo_comment(const struct csxml* ctx, const char* data) +{ + (void)ctx; + printf("Comment: ``%s''\n", data); + return 0; +} + +int echo_element(const struct csxml* ctx, const char* elemName, int numAttrs) +{ + int i; + + printf("Open tag: <%s> (%d attributes)\n", elemName, numAttrs); + for(i = 0; i < numAttrs; ++i) printf(" %s='%s'\n", ctx->elemAttrNames.data[i].data, ctx->elemAttrVals.data[i].data); + return 0; +} + +int echo_closeTag(const struct csxml* ctx, const char* elemName) +{ + (void)ctx; + printf("Close tag: \n", elemName); + return 0; +} + +const char* echo_entityRef(const struct csxml* ctx, const char* ent) +{ + (void)ctx; + printf("Entity reference: ``%s''\n", ent); + return 0; +} + + + +int main(int argc, char* argv[]) +{ + FILE* fp = 0; + char buf[1024]; + size_t amt; + struct csxml* ctx = 0; + + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + printf("{C library} Tests the callback functions.\n"); + return 0; + } + + if(argc != 2) { + fprintf(stderr, "Expecting filename.\n"); + return 1; + } + + fp = fopen(argv[1], "r"); + if(!fp) { + fprintf(stderr, "Error opening '%s' for input.\n", argv[1]); + return 1; + } + + ctx = csxml_newParser(); + if(!ctx) { + fprintf(stderr, "Couldn't allocate a new parser.\n"); + return 1; + } + + ctx->whiteSpace = echo_whiteSpace; + ctx->content = echo_content; + ctx->cdata = echo_cdata; + ctx->streamRestart = echo_streamRestart; + ctx->PI = echo_PI; + ctx->comment = echo_comment; + ctx->element = echo_element; + ctx->closeTag = echo_closeTag; + ctx->entityRef = echo_entityRef; + + while(!feof(fp)) { + amt = fread(buf, 1, sizeof(buf), fp); + csxml_feedData(ctx, buf, amt); + } + + csxml_freeParser(ctx); + fclose(fp); + + return 0; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/ctests/template b/src/ctests/template new file mode 100644 index 0000000..a829314 --- /dev/null +++ b/src/ctests/template @@ -0,0 +1,35 @@ +/* libStreamedXML/src/ctests/???.c + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#include "StreamedXML.h" + +#include + + + +int main(int argc, char* argv[]) +{ + if(argc == 2 && !strcmp(argv[1], "--print-summary")) { + printf("One line summary.\n"); + return 0; + } + + if(argc == 1) { + // empty argument list + } + + int ret = 0; + + // TODO + + return ret; +} + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/.functions.h.swp b/src/libCStreamedXML/.functions.h.swp new file mode 100644 index 0000000..9dd2f80 Binary files /dev/null and b/src/libCStreamedXML/.functions.h.swp differ diff --git a/src/libCStreamedXML/.params b/src/libCStreamedXML/.params new file mode 100644 index 0000000..eca3504 --- /dev/null +++ b/src/libCStreamedXML/.params @@ -0,0 +1 @@ +c lib libCStreamedXML StreamedXML.h diff --git a/src/libCStreamedXML/BottomHeader.h b/src/libCStreamedXML/BottomHeader.h new file mode 100644 index 0000000..072d08e --- /dev/null +++ b/src/libCStreamedXML/BottomHeader.h @@ -0,0 +1,14 @@ +/* libCStreamedXML/src/clib/BottomHeader.h + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#endif +/*!@}*/ + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/TopHeader.h b/src/libCStreamedXML/TopHeader.h new file mode 100644 index 0000000..729d936 --- /dev/null +++ b/src/libCStreamedXML/TopHeader.h @@ -0,0 +1,32 @@ +/* libCStreamedXML/src/libCStreamedXML/TopHeader.h + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#ifndef HEADER_libCStreamedXML +#define HEADER_libCStreamedXML + +// standard includes, or includes needed for type declarations +#include + +/*! \defgroup libCStreamedXML libCStreamedXML interface. + +libCStreamedXML is a C library for parsing Streamed XML data. It is somewhat simpler than the C++ +library (it only parses ASCII data, not Unicode data). In future, the API could be extended to +use only fixed-size buffers, and not to call malloc() at all. This would be advantageous for small +embedded systems (e.g. PICs). + +To parse Streamed XML, you must first create a parser context object with csxml_newParser(). Then +you set up the returned object by changing options and callback functions. Once set up correctly, +pass in data using the csxml_feedChar() and csxml_feedData() functions. Finally, free the parser +object with csxml_freeParser(). + +*/ +/*!@{*/ + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/TopSource.c b/src/libCStreamedXML/TopSource.c new file mode 100644 index 0000000..90d401c --- /dev/null +++ b/src/libCStreamedXML/TopSource.c @@ -0,0 +1,20 @@ +/* libCStreamedXML/src/libCStreamedXMLlibCStreamedXML/TopSource.c + * + * (c)2006, Laurence Withers, . + * Released under the GNU GPLv2. See file COPYING or + * http://www.gnu.org/copyleft/gpl.html for details. +*/ + +#include "StreamedXML.h" + +// Below are all the includes used throughout the library. +#include +#include +#include + +//#define DEBUG_STATE_MACHINE + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/buffer.c b/src/libCStreamedXML/buffer.c new file mode 100644 index 0000000..10cab8a --- /dev/null +++ b/src/libCStreamedXML/buffer.c @@ -0,0 +1,100 @@ +/* libCStreamedXML/src/libCStreamedXML/buffer.c + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +static int do_realloc(struct csxml* ctx, struct csxml_buf* buf) +{ + char* n = realloc(buf->data, buf->size << 1); + if(!n) { + ctx->outOfMemory(ctx, buf->size << 1); + return -1; + } + + buf->size <<= 1; + buf->data = n; + return 0; +} + +static int buffer_copy(struct csxml* ctx, struct csxml_buf* dest, const struct csxml_buf* src) +{ + while(dest->size <= src->len) if(do_realloc(ctx, dest)) return -1; + strcpy(dest->data, src->data); + dest->len = src->len; + return 0; +} + +static int buffer_strcat(struct csxml* ctx, struct csxml_buf* dest, const char* src) +{ + size_t req = strlen(src) + dest->len; + while(req >= dest->size) if(do_realloc(ctx, dest)) return -1; + strcat(dest->data, src); + return 0; +} + +static int buffer_init(struct csxml* ctx, struct csxml_buf* buf) +{ + memset(buf, 0, sizeof(buf)); + buf->size = 256; + buf->data = malloc(buf->size); + if(!buf->data) { + ctx->outOfMemory(ctx, buf->size); + return -1; + } + return 0; +} + +static void buffer_free(struct csxml_buf* buf) +{ + free(buf->data); + memset(buf, 0, sizeof(struct csxml_buf)); +} + +static int do_realloc2(struct csxml* ctx, struct csxml_list* list) +{ + size_t i, newlen = list->size ? (list->size << 1) : 4; + struct csxml_buf* n = realloc(list->data, newlen * sizeof(struct csxml_buf)); + if(!n) { + ctx->outOfMemory(ctx, newlen * sizeof(struct csxml_buf)); + return -1; + } + + memset(n + list->size * sizeof(struct csxml_buf), 0, newlen * sizeof(struct csxml_buf)); + for(i = 0; i < newlen; ++i) { + if(buffer_init(ctx, n + list->size + i)) return -1; + } + + list->size = newlen; + list->data = n; + return 0; +} + +static int list_push(struct csxml* ctx, struct csxml_list* list, struct csxml_buf* buf) +{ + if((list->size == list->len) && do_realloc2(ctx, list)) return -1; + if(buffer_copy(ctx, list->data + list->len, buf)) return -1; + ++list->len; + return 0; +} + +static struct csxml_buf* list_pop(struct csxml_list* list) +{ + return list->data + --list->len; +} + +static void list_free(struct csxml_list* list) +{ + size_t i; + + for(i = 0; i < list->size; ++i) buffer_free(list->data + i); + free(list->data); + memset(list, 0, sizeof(struct csxml_list)); +} + + + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/build.default b/src/libCStreamedXML/build.default new file mode 100644 index 0000000..a24840e --- /dev/null +++ b/src/libCStreamedXML/build.default @@ -0,0 +1 @@ +source src/libCStreamedXML/build.lib diff --git a/src/libCStreamedXML/build.install b/src/libCStreamedXML/build.install new file mode 100644 index 0000000..a8df5b5 --- /dev/null +++ b/src/libCStreamedXML/build.install @@ -0,0 +1 @@ +source src/libCStreamedXML/build.install-lib diff --git a/src/libCStreamedXML/build.install-lib b/src/libCStreamedXML/build.install-lib new file mode 100644 index 0000000..beec81b --- /dev/null +++ b/src/libCStreamedXML/build.install-lib @@ -0,0 +1,36 @@ +build_target libCStreamedXML + +# make paths (this is for Gentoo in particular) +build_dir_tree "${LIBDIR}" || return 1 +build_dir_tree "${PKGCONFDIR}" || return 1 +build_dir_tree "${INCLUDEDIR}" || return 1 + +# install library +echo "Installing libraries into '${LIBDIR}'" +install_file ${libCStreamedXML} ${LIBDIR} 0755 || return 1 +BASE="${libCStreamedXML_BASE}.so" +MAJOR="${BASE}.${SOMAJOR}" +MINOR="${MAJOR}.${SOMINOR}" +MICRO="${MINOR}.${SOMICRO}" +install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}" +install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}" +install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}" + +# install header +echo "Installing header file '${libCStreamedXML_HEADER}' into ${INCLUDEDIR}" +install_header ${libCStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1 + +# install pkgconfig file +echo "Installing package config file into ${PKGCONFDIR}" +PKGCONFFILE=${PKGCONFDIR}/libCStreamedXML.pc +do_cmd rm -f ${PKGCONFFILE} +do_cmd_redir ${PKGCONFFILE} sed \ + -e "s,@VERSION@,${VERSION}," \ + -e "s,@LIBDIR@,${FINALLIBDIR}," \ + -e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \ + src/libCStreamedXML/pkgconf.in +do_cmd chmod 0644 ${PKGCONFFILE} +print_success "Done" + +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libCStreamedXML/build.lib b/src/libCStreamedXML/build.lib new file mode 100644 index 0000000..5d316e8 --- /dev/null +++ b/src/libCStreamedXML/build.lib @@ -0,0 +1,51 @@ +# These are external variables, and shouldn't clash with anything else +# libCStreamedXML +# libCStreamedXML_BUILT +# libCStreamedXML_HEADER +# libCStreamedXML_BASE + +if [ -z ${libCStreamedXML_BUILT} ] +then + libCStreamedXML_BASE=libCStreamedXML + source src/libCStreamedXML/soversion + + libCStreamedXML="obj/${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}" + SO_EXTRA="-lc" # @TODO@ libs, cflags + + echo "Building library ${libCStreamedXML}..." + + do_cmd source src/libCStreamedXML/build.monolithic || return 1 + + MODIFIED=0 + for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC} + do + if [ ${test} -nt ${libCStreamedXML} ] + then + MODIFIED=1 + break + fi + done + + if [ ${MODIFIED} -ne 0 ] + then + echo " Compiling" + + SONAME="${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}" + do_cmd ${CC} ${CFLAGS} -shared -fpic -o "${libCStreamedXML}" \ + -Wl,-soname,${SONAME} \ + ${SRC} ${SO_EXTRA} || return 1 + + # make tests work + do_cmd ln -sf $(basename ${libCStreamedXML}) obj/${SONAME} || return 1 + + print_success "Library built" + else + print_success "Library up to date" + fi + + libCStreamedXML_BUILT=1 + libCStreamedXML_HEADER=${HDR} + +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libCStreamedXML/build.monolithic b/src/libCStreamedXML/build.monolithic new file mode 100644 index 0000000..a6027c4 --- /dev/null +++ b/src/libCStreamedXML/build.monolithic @@ -0,0 +1,21 @@ +# These are external variables, and shouldn't clash with anything else +# libCStreamedXML_MONOLITHIC + +SRC="obj/libCStreamedXML.c" +HDR="obj/StreamedXML.h" + +MONOLITHIC_TESTS="src/libCStreamedXML/build.lib src/libCStreamedXML/build.monolithic" + +if [ -z "${libCStreamedXML_MONOLITHIC}" ] +then + MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopHeader,types,functions,BottomHeader}.h)" + make_monolithic ${HDR} C || return 1 + + MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopSource,buffer,defaults,parser}.c)" + make_monolithic ${SRC} C || return 1 + + libCStreamedXML_MONOLITHIC=1 + MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}" +fi +# kate: replace-trailing-space-save true; space-indent true; tab-width 4; +# vim: expandtab:ts=4:sw=4 diff --git a/src/libCStreamedXML/defaults.c b/src/libCStreamedXML/defaults.c new file mode 100644 index 0000000..b4d147e --- /dev/null +++ b/src/libCStreamedXML/defaults.c @@ -0,0 +1,79 @@ +/* libCStreamedXML/src/libCStreamedXML/defaults.c + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +static void default_notWellFormed(const struct csxml* ctx, const char* reason) +{ + fprintf(stderr, "Streamed XML is not well formed.\n Line : %d\n Col : %d\n Reason: %s\n", + ctx->line + 1, ctx->col + 1, reason); +} + + + +static void default_outOfMemory(const struct csxml* ctx, size_t amount) +{ + (void)ctx; + fprintf(stderr, "Streamed XML parser: out of memory allocating %lu bytes\n", amount); +} + + + +static void default_unknownEntity(const struct csxml* ctx, const char* ent) +{ + fprintf(stderr, "Unknown entity referenced in Streamed XML.\n Line: %d\n Col : %d\n Name: %s\n", + ctx->line + 1, ctx->col + 1, ent); +} + + + +static int default_discard(const struct csxml* ctx, const char* x) +{ + (void)ctx; + (void)x; + return 0; +} + + + +static int default_cdata(const struct csxml* ctx, const char* data) +{ + return ctx->content(ctx, data); +} + + + +static int default_discardPI(const struct csxml* ctx, const char* target, const char* data) +{ + (void)ctx; + (void)target; + (void)data; + return 0; +} + + + +static int default_discardElem(const struct csxml* ctx, const char* elemName, int numAttrs) +{ + (void)ctx; + (void)elemName; + (void)numAttrs; + return 0; +} + + + +static const char* default_discardEnt(const struct csxml* ctx, const char* ent) +{ + (void)ctx; + (void)ent; + return 0; +} + + + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/functions.h b/src/libCStreamedXML/functions.h new file mode 100644 index 0000000..9075ba9 --- /dev/null +++ b/src/libCStreamedXML/functions.h @@ -0,0 +1,102 @@ +/* libCStreamedXML/src/libCStreamedXML/types.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + + + +/*! \brief Allocate and set up a new parser object. + +\retval 0 on error (see \a errno). +\returns Pointer to newly-allocated parser object. + +Call this function to obtain a parser context object. The object is initially populated with +sensible defaults (\a expandEntities is 1, the error callback functions print messages to stderr, +the data callback functions discard the data, etc.). + +*/ +struct csxml* csxml_newParser(); + + + +/*! \brief Free an existing parser object. + +\param ctx The object to free (may be 0). + +Frees a parser object and all of its associated data structures (strings, lists, etc.). + +*/ +void csxml_freeParser(struct csxml* ctx); + + + +/*! \brief Parse a character. + +\param ctx Parser context. +\param ch Character to parse. +\retval 0 on success. +\retval -1 on XML error. +\retval (non-0) if any callback function signals an error. + +This function parses a single character. It will generate callbacks as appropriate. If one of the +callback functions returns a non-zero value, that value is returned from this function and the +object is put into an error state. If the streamed XML is not well formed, or some other parsing +error occurs, -1 will be returned and the object will be put into an error state. + +Once in an error state, the function will simply discard data passed to it and return 0. However, +a lower-level parser is still running and is capable of matching stream restart markers, allowing +the parser to be reset. + +*/ +int csxml_feedChar(struct csxml* ctx, char ch); + + + +/*! \brief Look up an entity. + +\param ctx Parser context. +\param ent Entity name. +\retval 0 on error. +\returns Pointer to null-terminated string of expanded entity text. + +This function is called to dereference entities. It handles the standard entities itself, and chains +on to the entityRef callback of the parser if needed. If the entity is not found, it calls the +unknownEntity callback and returns 0. + +*/ +const char* csxml_entityRef(struct csxml* ctx, const char* ent); + + + +/*! \brief Reset parser. + +\param ctx Parser context. + +This function will reset the parser into its initial state, as though no data had yet been +encountered. This is called after stream restart markers for example. + +*/ +void csxml_reset(struct csxml* ctx); + + + + +/*! \brief Parse a block of data. + +\param ctx Parser context. +\param data Pointer to block of data. +\param amt Number of bytes to parse. + +This function will parse each character in the block of data. If an error is encountered, the usual +process (callback, error state) is followed, but this function will continue to feed in data. The +idea is that you continue to feed in data from your data source, which might have a restart marker +(at which point the parsing will once again continue). + +*/ +void csxml_feedData(struct csxml* ctx, const char* data, size_t amt); + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/parser.c b/src/libCStreamedXML/parser.c new file mode 100644 index 0000000..cf1fcfb --- /dev/null +++ b/src/libCStreamedXML/parser.c @@ -0,0 +1,692 @@ +/* libCStreamedXML/src/libCStreamedXML/parser.c + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + +//#define DEBUG_STATE_MACHINE + +#ifdef DEBUG_STATE_MACHINE +#include +#endif + +#include +#include +#include + + + +enum csxml_State { + StateNone, // at element or stream level + StateRestartMarker, // after ', '?' + ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '@', 'A'--'C' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'D'--'G' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'H'--'K' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'L'--'O' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'P'--'S' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'T'--'W' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'X'--'Z', '[' + ClassOther, ClassOther, ClassOther, ClassNameStartChar, // '\\', ']', '^', '_' + ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '`', 'a'--'c' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'd'--'g' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'h'--'k' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'l'--'o' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'p'--'s' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 't'--'w' + ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'x'--'z', '{' + ClassOther, ClassOther, ClassOther, ClassInvalid // '|', '}', '~', control 7F +}; + + + +int csxml_feedChar(struct csxml* ctx, char ch) +{ + enum TokenClass c; + int try; + +#ifdef DEBUG_STATE_MACHINE + printf("Character '%c' (%02X), state %d\n", ch, ch, ctx->state); +#endif + +#define ERROR(_reason) do { \ + ctx->state = StateError; \ + ctx->notWellFormed(ctx, _reason); \ + return -1; \ +}while(0) + +#define APPEND_CH(_whichbuf, _ch) do { \ + ctx-> _whichbuf .data[ctx-> _whichbuf .len++] = _ch; \ + if((ctx-> _whichbuf .len == ctx-> _whichbuf .size) && \ + do_realloc(ctx, &ctx-> _whichbuf)) return -1; \ + ctx-> _whichbuf .data[ctx-> _whichbuf .len] = 0; \ +}while(0) + +#define CLEAR_BUFFER(_whichbuf) do { \ + ctx-> _whichbuf .data[0] = 0; \ + ctx-> _whichbuf .len = 0; \ +}while(0) + +#define TRY(_x) do { \ + try = (_x); \ + if(try) { \ + ctx->state = StateError; \ + return try; \ + } \ +}while(0) + + if(ch == xmlRestartMarker[ctx->restartCount]) { + if(++ctx->restartCount == 11) { + csxml_reset(ctx); + ctx->state = StateRestartMarker; + return 0; + } + } else { + ctx->restartCount = 0; + } + + if(ch & ~0x7F) c = ClassInvalid; + else c = token_classes[(int)ch]; + + if(ch == '\r') { + ctx->skipNextNewline = 1; + ch = '\n'; + ++ctx->line; + ctx->col = 0; + } else if(ch == '\n') { + if(ctx->skipNextNewline) return 0; + ++ctx->line; + ctx->col = 0; + } else { + ctx->skipNextNewline = 0; + } + + if(c == ClassInvalid) ERROR("Restricted character encountered."); + + // deal with char appropriately, according to state + switch(ctx->state) { + case StateError: + return 0; + + case StateNone: + switch(c) { + case ClassWhitespace: + APPEND_CH(buffer, ch); + break; + + case ClassOpenTag: + if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data)); + ctx->state = StateOpen; + CLEAR_BUFFER(buffer); + break; + + case ClassEntity: + if(ctx->expandEntities) { + if(!ctx->elementDepth) ERROR("Entities cannot appear at stream level."); + if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data)); + CLEAR_BUFFER(buffer); + ctx->parsingAttr = 0; + ctx->state = StateEntity; + break; + } + + // fall through + default: + if(!ctx->elementDepth) ERROR("Content cannot appear at stream level."); + if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data)); + ctx->state = StateData; + CLEAR_BUFFER(buffer); + APPEND_CH(buffer, ch); + break; + } + break; + + case StateData: + switch(c) { + case ClassOpenTag: + TRY(ctx->content(ctx, ctx->buffer.data)); + CLEAR_BUFFER(buffer); + ctx->state = StateOpen; + break; + + case ClassEntity: + ctx->parsingAttr = 0; + ctx->state = StateEntity; + break; + + default: + APPEND_CH(buffer, ch); + break; + } + break; + + case StateCDATA: + if(ch == ']') ctx->state = StateCDATA1; + else APPEND_CH(buffer, ch); + break; + + case StateCDATA1: + if(ch == ']') ctx->state = StateCDATA2; + else { + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ch); + ctx->state = StateCDATA; + } + break; + + case StateCDATA2: + if(ch == '>') { + TRY(ctx->cdata(ctx, ctx->buffer.data)); + CLEAR_BUFFER(buffer); + ctx->state = StateNone; + } else if(ch == ']') { + APPEND_CH(buffer, ']'); + } else { + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ch); + ctx->state = StateCDATA; + } + break; + + case StateRestartMarker: + if(ch == ']') ctx->state = StateRestartMarker1; + else APPEND_CH(buffer, ch); + break; + + case StateRestartMarker1: + if(ch == ']') ctx->state = StateRestartMarker2; + else { + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ch); + ctx->state = StateRestartMarker; + } + break; + + case StateRestartMarker2: + if(ch == '>') { + TRY(ctx->streamRestart(ctx, ctx->buffer.data)); + csxml_reset(ctx); + + } else if(ch == ']') { + APPEND_CH(buffer, ']'); + + } else { + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ']'); + APPEND_CH(buffer, ch); + ctx->state = StateRestartMarker; + } + break; + + case StateOpen: + switch(c) { + case ClassNameStartChar: + ctx->state = StateElemName; + ctx->elemAttrNames.len = 0; + ctx->elemAttrVals.len = 0; + CLEAR_BUFFER(elemName); + APPEND_CH(elemName, ch); + break; + + default: + if(ch == '!') ctx->state = StateOpenBang; + else if(ch == '?') { + ctx->state = StatePI; + CLEAR_BUFFER(buffer2); + } else if(ch == '/') { + if(!ctx->elementDepth) ERROR("Encountered a close tag at stream level."); + ctx->state = StateClose; + } else ERROR("Invalid start character for element."); + break; + } + break; + + case StatePI: + if(ch == '?') ctx->state = StatePI2; + else if(c == ClassWhitespace) { + ctx->state = StatePIData; + CLEAR_BUFFER(buffer); + } else APPEND_CH(buffer2, ch); + break; + + case StatePI2: + if(ch != '>') ERROR("Invalid target for PI"); + else { + TRY(ctx->PI(ctx, ctx->buffer2.data, 0)); + ctx->state = StateNone; + } + break; + + case StatePIData: + if(ch == '?') ctx->state = StatePI3; + else APPEND_CH(buffer, ch); + break; + + case StatePI3: + if(ch == '>') { + TRY(ctx->PI(ctx, ctx->buffer2.data, ctx->buffer.data)); + CLEAR_BUFFER(buffer); + ctx->state = StateNone; + } else if(ch == '?') { + APPEND_CH(buffer, '?'); + } else { + APPEND_CH(buffer, '?'); + APPEND_CH(buffer, ch); + ctx->state = StatePIData; + } + break; + + case StateOpenBang: + if(ch == '[') { + // restart markers handled by lower layer + ctx->state = StateOpenCdataMarker; + ctx->xmlCount = 3; + if(!ctx->elementDepth) ERROR("CDATA sections not valid at stream level."); + } else if(ch == '-') ctx->state = StateOpenComment; + else ERROR("Invalid special tag."); + break; + + case StateOpenCdataMarker: + if(ch != xmlCdataMarker[ctx->xmlCount]) ERROR("Invalid marked section."); + if(!xmlCdataMarker[++ctx->xmlCount]) ctx->state = StateCDATA; + break; + + case StateOpenComment: + if(ch != '-') ERROR("Invalid special tag."); + ctx->state = StateComment; + CLEAR_BUFFER(buffer); + break; + + case StateComment: + if(ch == '-') ctx->state = StateComment2; + else APPEND_CH(buffer, ch); + break; + + case StateComment2: + if(ch == '-') ctx->state = StateComment3; + else { + APPEND_CH(buffer, '-'); + APPEND_CH(buffer, ch); + } + break; + + case StateComment3: + if(ch != '>') ERROR("`--' not valid in comments"); + TRY(ctx->comment(ctx, ctx->buffer.data)); + CLEAR_BUFFER(buffer); + ctx->state = StateNone; + break; + + case StateElemName: + switch(c) { + case ClassWhitespace: + ctx->state = StateElemTag; + CLEAR_BUFFER(buffer); + break; + + case ClassNameStartChar: + case ClassNameChar: + APPEND_CH(elemName, ch); + break; + + default: + switch(ch) { + case L'>': + TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName)); + TRY(ctx->element(ctx, ctx->elemName.data, 0)); + ctx->state = StateNone; + ++ctx->elementDepth; + CLEAR_BUFFER(buffer); + break; + + case L'/': + ctx->state = StateNeedClose; + break; + + default: + ERROR("Invalid character in tag name."); + } + } + break; + + case StateElemTag: + switch(c) { + case ClassWhitespace: + break; + + case ClassNameStartChar: + ctx->state = StateElemAttrName; + CLEAR_BUFFER(buffer); + APPEND_CH(buffer, ch); + break; + + default: + switch(ch) { + case '>': + TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName)); + TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrNames.len)); + ctx->state = StateNone; + ++ctx->elementDepth; + break; + + case '/': + ctx->state = StateNeedClose; + break; + + default: + ERROR("Invalid character in tag."); + } + } + break; + + case StateElemAttrName: + switch(c) { + case ClassNameStartChar: + case ClassNameChar: + APPEND_CH(buffer, ch); + break; + + default: + if(ch != '=') ERROR("Invalid character in attribute name."); + TRY(list_push(ctx, &ctx->elemAttrNames, &ctx->buffer)); + CLEAR_BUFFER(buffer); + ctx->state = StateElemAttrEq; + break; + } + break; + + case StateElemAttrEq: + if(ch == '\'') ctx->singleQuote = 1; + else if(ch == '"') ctx->singleQuote = 0; + else ERROR("Invalid character in attribute."); + ctx->state = StateElemAttrVal; + break; + + case StateElemAttrVal: + if((ctx->singleQuote && ch == '\'') || (!ctx->singleQuote && ch == '"')) { + TRY(list_push(ctx, &ctx->elemAttrVals, &ctx->buffer)); + ctx->state = StateElemAttrDone; + } else if(ctx->expandEntities && ch == L'&') { + ctx->parsingAttr = 1; + ctx->state = StateEntity; + } else APPEND_CH(buffer, ch); + break; + + case StateElemAttrDone: + switch(c) { + case ClassWhitespace: + ctx->state = StateElemTag; + break; + + default: + if(ch == '/') { + ctx->state = StateNeedClose; + } else if(ch == '>') { + TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len)); + TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName)); + CLEAR_BUFFER(buffer); + ctx->state = StateNone; + ++ctx->elementDepth; + } else ERROR("Invalid character after attribute."); + break; + } + break; + + case StateNeedClose: + if(ch != '>') ERROR("Stray `/' in open tag."); + TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len)); + TRY(ctx->closeTag(ctx, ctx->elemName.data)); + CLEAR_BUFFER(buffer); + ctx->state = StateNone; + break; + + case StateClose: + if(c != ClassNameStartChar) ERROR("Invalid character in close tag name."); + APPEND_CH(buffer, ch); + ctx->state = StateClosing; + break; + + case StateClosing: + switch(c) { + case ClassNameStartChar: + case ClassNameChar: + APPEND_CH(buffer, ch); + break; + + case ClassWhitespace: + ctx->state = StateNeedClose2; + break; + + default: + if(ch != '>') ERROR("Invalid character in close tag name."); + TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack))); + if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag."); + TRY(ctx->closeTag(ctx, ctx->elemName.data)); + ctx->state = StateNone; + --ctx->elementDepth; + CLEAR_BUFFER(buffer); + + } + break; + + case StateNeedClose2: + if(c == ClassWhitespace) break; + if(ch != '>') ERROR("Invalid data in close tag."); + TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack))); + if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag."); + TRY(ctx->closeTag(ctx, ctx->elemName.data)); + ctx->state = StateNone; + --ctx->elementDepth; + CLEAR_BUFFER(buffer); + break; + + case StateEntity: + if(ch == '#') { + ctx->state = StateCharEntity; + ctx->entityChar = 0; + } else if(c == ClassNameStartChar) { + APPEND_CH(buffer, ch); + ctx->state = StateEntityName; + } else ERROR("Invalid entity name."); + break; + + case StateCharEntity: + if(ch == ';') { + APPEND_CH(buffer, ctx->entityChar); + ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData; + break; + + } else if(ch >= '0' && ch <= '9') { + ctx->entityChar *= 10; + ctx->entityChar += (ch - '0'); + if(!ctx->entityChar || ctx->entityChar > 126) ERROR("Character code out of range in character entity."); + } else ERROR("Invalid character in character entity."); + break; + + case StateEntityName: + if(ch == ';') { + const char* e = csxml_entityRef(ctx, ctx->buffer2.data); + TRY(!e || buffer_strcat(ctx, &ctx->buffer, e)); + ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData; + break; + } + if(c != ClassNameChar && c != ClassNameStartChar) ERROR("Invalid entity name."); + APPEND_CH(buffer2, ch); + break; + } + ++ctx->col; + return 0; +} + + + +const char* csxml_entityRef(struct csxml* ctx, const char* ent) +{ + const char* q = 0; + + if(!strcmp(ent, "quot")) return "\""; + if(!strcmp(ent, "amp")) return "&"; + if(!strcmp(ent, "apos")) return "'"; + if(!strcmp(ent, "lt")) return "<"; + if(!strcmp(ent, "gt")) return ">"; + + q = ctx->entityRef(ctx, ent); + if(!q) { + ctx->unknownEntity(ctx, ent); + ctx->state = StateError; + return 0; + } + + return q; +} + + + +void csxml_reset(struct csxml* ctx) +{ + CLEAR_BUFFER(buffer); + CLEAR_BUFFER(buffer2); + CLEAR_BUFFER(elemName); + ctx->state = StateNone; + ctx->xmlCount = 0; + ctx->elementDepth = 0; + ctx->restartCount = 0; + ctx->skipNextNewline = 0; + ctx->parsingAttr = 0; + ctx->line = 0; + ctx->col = 0; + ctx->elemStack.len = 0; + ctx->elemAttrNames.len = 0; + ctx->elemAttrVals.len = 0; +} + + + +#undef ERROR +#undef APPEND_CH +#undef CLEAR_BUFFER + + + +void csxml_freeParser(struct csxml* ctx) +{ + if(!ctx) return; + buffer_free(&ctx->buffer); + buffer_free(&ctx->buffer2); + buffer_free(&ctx->elemName); + list_free(&ctx->elemStack); + list_free(&ctx->elemAttrNames); + list_free(&ctx->elemAttrVals); +} + + + +struct csxml* csxml_newParser() +{ + struct csxml* ctx = 0; + + ctx = malloc(sizeof(struct csxml)); + if(!ctx) return 0; + memset(ctx, 0, sizeof(struct csxml)); + + if(buffer_init(ctx, &ctx->buffer) || buffer_init(ctx, &ctx->buffer2) || buffer_init(ctx, &ctx->elemName)) { + csxml_freeParser(ctx); + return 0; + } + + ctx->expandEntities = 1; + + ctx->notWellFormed = default_notWellFormed; + ctx->outOfMemory = default_outOfMemory; + ctx->unknownEntity = default_unknownEntity; + ctx->whiteSpace = default_discard; + ctx->content = default_discard; + ctx->cdata = default_cdata; + ctx->streamRestart = default_discard; + ctx->PI = default_discardPI; + ctx->comment = default_discard; + ctx->element = default_discardElem; + ctx->closeTag = default_discard; + ctx->entityRef = default_discardEnt; + + return ctx; +} + + + +void csxml_feedData(struct csxml* ctx, const char* data, size_t amt) +{ + while(amt--) csxml_feedChar(ctx, *data++); +} + + + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/ diff --git a/src/libCStreamedXML/pkgconf.in b/src/libCStreamedXML/pkgconf.in new file mode 100644 index 0000000..ba36045 --- /dev/null +++ b/src/libCStreamedXML/pkgconf.in @@ -0,0 +1,21 @@ +# libCStreamedXML/src/lib/clib/pkgconf.in +# +# Metadata file for pkg-config +# ( http://www.freedesktop.org/software/pkgconfig/ ) +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + +# Name, description +Name: @TODO@ +Description: @TODO@ +Version: @VERSION@ + +# Requirements +Requires: + +# Compilation information +Libs: -L@LIBDIR@ -lCStreamedXML +Cflags: -I@INCLUDEDIR@ diff --git a/src/libCStreamedXML/soversion b/src/libCStreamedXML/soversion new file mode 100644 index 0000000..af384de --- /dev/null +++ b/src/libCStreamedXML/soversion @@ -0,0 +1,17 @@ +# libCStreamedXML/src/libCStreamedXML/soversion +# +# (c)2006, Laurence Withers, . +# Released under the GNU GPLv2. See file COPYING or +# http://www.gnu.org/copyleft/gpl.html for details. +# + + + +# SOMAJOR and SOMINOR are included in the library's soname. They need to +# be bumped on a binary-incompatible release. They are both single +# integers. +SOMAJOR=0 +SOMINOR=0 + +# SOMICRO is bumped every time there is a binary-compatible release. +SOMICRO=0 diff --git a/src/libCStreamedXML/types.h b/src/libCStreamedXML/types.h new file mode 100644 index 0000000..95442b6 --- /dev/null +++ b/src/libCStreamedXML/types.h @@ -0,0 +1,320 @@ +/* libCStreamedXML/src/libCStreamedXML/types.h + * + * (c)2006, Laurence Withers. Released under the GNU GPL. See file + * COPYING for more information / terms of license. +*/ + + + +/*! \brief String buffer. + +This type implements a simple string buffer. The string should be kept null-terminated. The \a data +pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the +string length (excluding terminating null) and \a size is the size of the allocated buffer. It is +never valid for \a data or \a size to be 0. + +*/ +struct csxml_buf { + /// Pointer to string (null terminated). + char* data; + + /// Length of string (excluding null). + size_t len; + + /// Size of allocated buffer. + size_t size; +}; + + + +/*! \brief String list. + +This type implements a dynamic array of strings (using the struct csxml_buf type). The \a data +pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the +number of valid elements in the array and \a size is the size of the allocated buffer. It is valid +for \a data and \a size to be zero. + +*/ +struct csxml_list { + /// Array of buffers (all allocated elements are valid and initialised). + struct csxml_buf* data; + + /// Number of elements in use. + size_t len; + + /// Number of allocated elements. + size_t size; +}; + + + +/*! \brief Streamed XML parser context. + +This structure contains all the details of a Streamed XML parsing operation. Most fields are +internal to the parser and should not be touched; those are briefly documented here. You may want to +change the \a expandEntities option, examine the \a line and \a col variables, and change the +callback functions. + +Those members marked Internal: are subject to change (either in semantics, type or existence). + +*/ +struct csxml { + /// Option: change "&lt;" to "<", etc. if non-zero (the default). Change to 0 to leave + /// entities inline. + int expandEntities; + + /// Current line (from 0, starts from last restart marker). + int line; + /// Current column (from 0, starts from last restart marker). + int col; + + /// List of attribute names for current element. + struct csxml_list elemAttrNames; + /// List of attribute values for current element. + struct csxml_list elemAttrVals; + + + + /*! \brief Error callback: Streamed XML is not well formed. + + \param ctx Parsing context. + \param reason Human-readable description of error. + + This function is called whenever badly-formed Streamed XML is encountered (e.g. if content is + encountered at stream-level). + + The default implementation prints a formatted message to \a stderr. + + */ + void (*notWellFormed)(const struct csxml* ctx, const char* reason); + + + + /*! \brief Error callback: out of memory. + + \param ctx Parsing context. + \param amount Number of bytes we tried to allocate. + + This function is called whenever a dynamic string resizing operation (or similar) fails. It is + called with the number of bytes the library attempted to allocate. + + The default implementation prints a formatted message to \a stderr. + + \todo When parsing extremely long content sections, we could just call the content() callback + when memory is low rather than failing altogether. + + */ + void (*outOfMemory)(const struct csxml* ctx, size_t amount); + + + + /*! \brief Error callback: reference to unknown entity. + + \param ctx Parsing context. + \param ent Name of the referenced entity. + + If an entity is referenced by name (e.g. "&myEntity;" is encountered) but the lookup in the + default entity list and through the entityRef() callback fails to resolve the content, this + function is called. + + The default implementation prints a formatted message to \a stderr. + + */ + void (*unknownEntity)(const struct csxml* ctx, const char* ent); + + + + /*! \brief Callback: whitespace. + + \param ctx Parsing context. + \param ws Pointer to string of whitespace. + \returns Zero on success, non-zero on error. + + This function is called whenever a block of whitespace is encountered at stream level or + immediately after some structural element (i.e. on leading whitespace within elements). It is + generally safe to discard it, unless you need to reproduce the file verbatim. Once content is + encountered within an element, any further whitespace will be reported through the content() + callback, up to the next structural element. + + The default implementation discards the data. + + */ + int (*whiteSpace)(const struct csxml* ctx, const char* ws); + + + + /*! \brief Callback: content. + + \param ctx Parsing context. + \param content Pointer to string of cdata. + \returns Zero on success, non-zero on error. + + This function is called whenever textual content (cdata) is encountered inside an element. It + may be called multiple times simultaneously, in which case the data from each call should be + concatenated. If \a expandEntities is non-zero, then this function will also be called with the + expanded text from any entities encountered, rather than the entities themselves. + + The default implementation discards the data. + + */ + int (*content)(const struct csxml* ctx, const char* content); + + + + /*! \brief Callback: cdata block. + + \param ctx Parsing context. + \param cdata Pointer to string of cdata. + \returns Zero on success, non-zero on error. + + This function is called for all data inside a CDATA marked section. It is useful to + differentiate between cdata encoded in this manner and normal cdata if you are trying to + reproduce the orginal file verbatim. + + The default implementation calls content() with the same data. + + */ + int (*cdata)(const struct csxml* ctx, const char* cdata); + + + + /*! \brief Callback: stream restart marker. + + \param ctx Parsing context. + \param marker Pointer to marker string. + \returns Zero on success, non-zero on error. + + Whenever a stream restart marker is encountered, this function is called with the data found + inside the marker. + + The default implementation ignores the data (but note that the parser state is always reset, + regardless of what the callback does). + + */ + int (*streamRestart)(const struct csxml* ctx, const char* marker); + + + + /*! \brief Callback: processing instruction. + + \param ctx Parsing context. + \param target PI target. + \param data PI data (may be 0). + \returns Zero on success, non-zero on error. + + This callback is used to report processing instructions (PIs). If the PI has no data, then the + \a data variable can be set to 0. + + The default implementation ignores the data. + + */ + int (*PI)(const struct csxml* ctx, const char* target, const char* data); + + + + /*! \brief Callback: comment. + + \param ctx Parsing context. + \param comment Pointer to comment string data. + \returns Zero on success, non-zero on error. + + This callback is used to report comments. It is only useful if you wish to reproduce the source + file verbatim (for instance, you want to alter some data in a file but preserve user comments). + + The default implementation ignores the data. + + */ + int (*comment)(const struct csxml* ctx, const char* comment); + + + + /*! \brief Callback: element. + + \param ctx Parsing context. + \param elemName Pointer to element name string. + \param numAttrs Number of attributes. + \returns Zero on success, non-zero on error. + + This callback is used whenever an element open tag is encountered. To parse attributes, use the + numAttrs variable to determine how many there are, and then access them with: + +
        // get attribute name
+        const char* elemName = ctx->elemAttrNames.data[i].data;
+
+        // get attribute value
+        const char* elemValue = ctx->elemAttrValues.data[i].data;
+ + In the case of an empty element, the closeTag() callback will be called immediately afterwards. + + The default implemenation ignores the data. + + */ + int (*element)(const struct csxml* ctx, const char* elemName, int numAttrs); + + + + /*! \brief Callback: close tag. + + \param ctx Parsing context. + \param elemName Element name. + \returns Zero on success, non-zero on error. + + This function is called whenever an element close tag is encountered. The element name will + always match the open tag element name (if not, the Streamed XML is not well formed, and an + error will be signalled before this callback is triggered). + + The default implementation ignores the data. + + */ + int (*closeTag)(const struct csxml* ctx, const char* elemName); + + + + /*! \brief Callback: entity reference. + + \param ctx Parsing context. + \param ent Entity name. + \retval 0 if the entity name does not match any known entity. + \returns Pointer to null-terminated string data for expanding entity. + + This function is called whenever \a expandEntities is true and an entity is encountered in cdata + or an attribute value. It is only called if the entity does not match one of the five built-in + defaults. You only need to provide an implementation if you know about some additional entities. + + The default implementation returns 0 ("no such entity"). + + */ + const char* (*entityRef)(const struct csxml* ctx, const char* ent); + + + + /// Internal: string buffer. + struct csxml_buf buffer; + /// Internal: string buffer. + struct csxml_buf buffer2; + /// Internal: string buffer. + struct csxml_buf elemName; + /// Internal: stack of element names, used to match open/close tags. + struct csxml_list elemStack; + /// Internal: state machine state. + int state; + /// Internal: for matching strings. + int xmlCount; + /// Internal: depth in element tree (0 = stream level). + int elementDepth; + /// Internal: for matching against restart marker string. + int restartCount; + /// Internal: for stripping windows line endings. + int skipNextNewline; + /// Internal: flag to check if we are currently parsing an attribute while expanding an entity. + int parsingAttr; + /// Internal: flag to record if current attribute was quoted with ' or " char. + int singleQuote; + /// Internal: used to expand character entities. + int entityChar; +}; + +/* options for text editors +kate: replace-trailing-space-save true; space-indent true; tab-width 4; +vim: expandtab:ts=4:sw=4 +*/