Add in libCStreamedXML and a test program for it.

This commit is contained in:
Laurence Withers 2006-08-08 12:19:08 +01:00
parent dc6a4cf430
commit 4533df2947
22 changed files with 1727 additions and 0 deletions

1
src/ctests/.params Normal file
View File

@ -0,0 +1 @@
c tests ctests libCStreamedXML

3
src/ctests/build.default Normal file
View File

@ -0,0 +1,3 @@
source src/ctests/build.tests
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

43
src/ctests/build.tests Normal file
View File

@ -0,0 +1,43 @@
# These are external variables, and shouldn't clash with anything else
# ctests_BUILT
#
build_target libCStreamedXML || return 1
if [ -z ${ctests_BUILT} ]
then
LIBS="${libCStreamedXML} "
EXTRAS=""
echo "Building test programs..."
do_cmd mkdir -p obj/tests || return 1
for SRC in src/ctests/*.c
do
TEST="obj/tests/$(basename ${SRC} | sed -e 's,.c$,,')"
MODIFIED=0
for file in ${LIBS} ${SRC} src/ctests/build.tests
do
if [ ${file} -nt ${TEST} ]
then
MODIFIED=1
break
fi
done
if [ ${MODIFIED} -ne 0 ]
then
do_cmd ${CC} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1
print_success "Built ${TEST}"
else
print_success "${TEST} is up to date"
fi
done
print_success "All tests built"
ctests_BUILT=1
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

137
src/ctests/cCallback.c Normal file
View File

@ -0,0 +1,137 @@
/* libStreamedXML/src/ctests/cCallback.c
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#include "StreamedXML.h"
#include <stdio.h>
#include <string.h>
int echo_whiteSpace(const struct csxml* ctx, const char* ws)
{
(void)ctx;
printf("Whitespace: ``%s''\n", ws);
return 0;
}
int echo_content(const struct csxml* ctx, const char* data)
{
(void)ctx;
printf("Content: ``%s''\n", data);
return 0;
}
int echo_cdata(const struct csxml* ctx, const char* data)
{
(void)ctx;
printf("CDATA: ``%s''\n", data);
return 0;
}
int echo_streamRestart(const struct csxml* ctx, const char* data)
{
(void)ctx;
printf("Stream restart marker: ``%s''\n", data);
return 0;
}
int echo_PI(const struct csxml* ctx, const char* target, const char* data)
{
(void)ctx;
printf("PI: target ``%s'', data ", target);
if(data) printf("``%s''\n", data);
else printf("not specified\n");
return 0;
}
int echo_comment(const struct csxml* ctx, const char* data)
{
(void)ctx;
printf("Comment: ``%s''\n", data);
return 0;
}
int echo_element(const struct csxml* ctx, const char* elemName, int numAttrs)
{
int i;
printf("Open tag: <%s> (%d attributes)\n", elemName, numAttrs);
for(i = 0; i < numAttrs; ++i) printf(" %s='%s'\n", ctx->elemAttrNames.data[i].data, ctx->elemAttrVals.data[i].data);
return 0;
}
int echo_closeTag(const struct csxml* ctx, const char* elemName)
{
(void)ctx;
printf("Close tag: </%s>\n", elemName);
return 0;
}
const char* echo_entityRef(const struct csxml* ctx, const char* ent)
{
(void)ctx;
printf("Entity reference: ``%s''\n", ent);
return 0;
}
int main(int argc, char* argv[])
{
FILE* fp = 0;
char buf[1024];
size_t amt;
struct csxml* ctx = 0;
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
printf("{C library} Tests the callback functions.\n");
return 0;
}
if(argc != 2) {
fprintf(stderr, "Expecting filename.\n");
return 1;
}
fp = fopen(argv[1], "r");
if(!fp) {
fprintf(stderr, "Error opening '%s' for input.\n", argv[1]);
return 1;
}
ctx = csxml_newParser();
if(!ctx) {
fprintf(stderr, "Couldn't allocate a new parser.\n");
return 1;
}
ctx->whiteSpace = echo_whiteSpace;
ctx->content = echo_content;
ctx->cdata = echo_cdata;
ctx->streamRestart = echo_streamRestart;
ctx->PI = echo_PI;
ctx->comment = echo_comment;
ctx->element = echo_element;
ctx->closeTag = echo_closeTag;
ctx->entityRef = echo_entityRef;
while(!feof(fp)) {
amt = fread(buf, 1, sizeof(buf), fp);
csxml_feedData(ctx, buf, amt);
}
csxml_freeParser(ctx);
fclose(fp);
return 0;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

35
src/ctests/template Normal file
View File

@ -0,0 +1,35 @@
/* libStreamedXML/src/ctests/???.c
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#include "StreamedXML.h"
#include <stdio.h>
int main(int argc, char* argv[])
{
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
printf("One line summary.\n");
return 0;
}
if(argc == 1) {
// empty argument list
}
int ret = 0;
// TODO
return ret;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

Binary file not shown.

View File

@ -0,0 +1 @@
c lib libCStreamedXML StreamedXML.h

View File

@ -0,0 +1,14 @@
/* libCStreamedXML/src/clib/BottomHeader.h
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#endif
/*!@}*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,32 @@
/* libCStreamedXML/src/libCStreamedXML/TopHeader.h
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#ifndef HEADER_libCStreamedXML
#define HEADER_libCStreamedXML
// standard includes, or includes needed for type declarations
#include <sys/types.h>
/*! \defgroup libCStreamedXML libCStreamedXML interface.
libCStreamedXML is a C library for parsing Streamed XML data. It is somewhat simpler than the C++
library (it only parses ASCII data, not Unicode data). In future, the API could be extended to
use only fixed-size buffers, and not to call malloc() at all. This would be advantageous for small
embedded systems (e.g. PICs).
To parse Streamed XML, you must first create a parser context object with csxml_newParser(). Then
you set up the returned object by changing options and callback functions. Once set up correctly,
pass in data using the csxml_feedChar() and csxml_feedData() functions. Finally, free the parser
object with csxml_freeParser().
*/
/*!@{*/
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,20 @@
/* libCStreamedXML/src/libCStreamedXMLlibCStreamedXML/TopSource.c
*
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
* Released under the GNU GPLv2. See file COPYING or
* http://www.gnu.org/copyleft/gpl.html for details.
*/
#include "StreamedXML.h"
// Below are all the includes used throughout the library.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
//#define DEBUG_STATE_MACHINE
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,100 @@
/* libCStreamedXML/src/libCStreamedXML/buffer.c
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
static int do_realloc(struct csxml* ctx, struct csxml_buf* buf)
{
char* n = realloc(buf->data, buf->size << 1);
if(!n) {
ctx->outOfMemory(ctx, buf->size << 1);
return -1;
}
buf->size <<= 1;
buf->data = n;
return 0;
}
static int buffer_copy(struct csxml* ctx, struct csxml_buf* dest, const struct csxml_buf* src)
{
while(dest->size <= src->len) if(do_realloc(ctx, dest)) return -1;
strcpy(dest->data, src->data);
dest->len = src->len;
return 0;
}
static int buffer_strcat(struct csxml* ctx, struct csxml_buf* dest, const char* src)
{
size_t req = strlen(src) + dest->len;
while(req >= dest->size) if(do_realloc(ctx, dest)) return -1;
strcat(dest->data, src);
return 0;
}
static int buffer_init(struct csxml* ctx, struct csxml_buf* buf)
{
memset(buf, 0, sizeof(buf));
buf->size = 256;
buf->data = malloc(buf->size);
if(!buf->data) {
ctx->outOfMemory(ctx, buf->size);
return -1;
}
return 0;
}
static void buffer_free(struct csxml_buf* buf)
{
free(buf->data);
memset(buf, 0, sizeof(struct csxml_buf));
}
static int do_realloc2(struct csxml* ctx, struct csxml_list* list)
{
size_t i, newlen = list->size ? (list->size << 1) : 4;
struct csxml_buf* n = realloc(list->data, newlen * sizeof(struct csxml_buf));
if(!n) {
ctx->outOfMemory(ctx, newlen * sizeof(struct csxml_buf));
return -1;
}
memset(n + list->size * sizeof(struct csxml_buf), 0, newlen * sizeof(struct csxml_buf));
for(i = 0; i < newlen; ++i) {
if(buffer_init(ctx, n + list->size + i)) return -1;
}
list->size = newlen;
list->data = n;
return 0;
}
static int list_push(struct csxml* ctx, struct csxml_list* list, struct csxml_buf* buf)
{
if((list->size == list->len) && do_realloc2(ctx, list)) return -1;
if(buffer_copy(ctx, list->data + list->len, buf)) return -1;
++list->len;
return 0;
}
static struct csxml_buf* list_pop(struct csxml_list* list)
{
return list->data + --list->len;
}
static void list_free(struct csxml_list* list)
{
size_t i;
for(i = 0; i < list->size; ++i) buffer_free(list->data + i);
free(list->data);
memset(list, 0, sizeof(struct csxml_list));
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1 @@
source src/libCStreamedXML/build.lib

View File

@ -0,0 +1 @@
source src/libCStreamedXML/build.install-lib

View File

@ -0,0 +1,36 @@
build_target libCStreamedXML
# make paths (this is for Gentoo in particular)
build_dir_tree "${LIBDIR}" || return 1
build_dir_tree "${PKGCONFDIR}" || return 1
build_dir_tree "${INCLUDEDIR}" || return 1
# install library
echo "Installing libraries into '${LIBDIR}'"
install_file ${libCStreamedXML} ${LIBDIR} 0755 || return 1
BASE="${libCStreamedXML_BASE}.so"
MAJOR="${BASE}.${SOMAJOR}"
MINOR="${MAJOR}.${SOMINOR}"
MICRO="${MINOR}.${SOMICRO}"
install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}"
install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}"
install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}"
# install header
echo "Installing header file '${libCStreamedXML_HEADER}' into ${INCLUDEDIR}"
install_header ${libCStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1
# install pkgconfig file
echo "Installing package config file into ${PKGCONFDIR}"
PKGCONFFILE=${PKGCONFDIR}/libCStreamedXML.pc
do_cmd rm -f ${PKGCONFFILE}
do_cmd_redir ${PKGCONFFILE} sed \
-e "s,@VERSION@,${VERSION}," \
-e "s,@LIBDIR@,${FINALLIBDIR}," \
-e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \
src/libCStreamedXML/pkgconf.in
do_cmd chmod 0644 ${PKGCONFFILE}
print_success "Done"
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,51 @@
# These are external variables, and shouldn't clash with anything else
# libCStreamedXML
# libCStreamedXML_BUILT
# libCStreamedXML_HEADER
# libCStreamedXML_BASE
if [ -z ${libCStreamedXML_BUILT} ]
then
libCStreamedXML_BASE=libCStreamedXML
source src/libCStreamedXML/soversion
libCStreamedXML="obj/${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}"
SO_EXTRA="-lc" # @TODO@ libs, cflags
echo "Building library ${libCStreamedXML}..."
do_cmd source src/libCStreamedXML/build.monolithic || return 1
MODIFIED=0
for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC}
do
if [ ${test} -nt ${libCStreamedXML} ]
then
MODIFIED=1
break
fi
done
if [ ${MODIFIED} -ne 0 ]
then
echo " Compiling"
SONAME="${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}"
do_cmd ${CC} ${CFLAGS} -shared -fpic -o "${libCStreamedXML}" \
-Wl,-soname,${SONAME} \
${SRC} ${SO_EXTRA} || return 1
# make tests work
do_cmd ln -sf $(basename ${libCStreamedXML}) obj/${SONAME} || return 1
print_success "Library built"
else
print_success "Library up to date"
fi
libCStreamedXML_BUILT=1
libCStreamedXML_HEADER=${HDR}
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,21 @@
# These are external variables, and shouldn't clash with anything else
# libCStreamedXML_MONOLITHIC
SRC="obj/libCStreamedXML.c"
HDR="obj/StreamedXML.h"
MONOLITHIC_TESTS="src/libCStreamedXML/build.lib src/libCStreamedXML/build.monolithic"
if [ -z "${libCStreamedXML_MONOLITHIC}" ]
then
MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopHeader,types,functions,BottomHeader}.h)"
make_monolithic ${HDR} C || return 1
MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopSource,buffer,defaults,parser}.c)"
make_monolithic ${SRC} C || return 1
libCStreamedXML_MONOLITHIC=1
MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}"
fi
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
# vim: expandtab:ts=4:sw=4

View File

@ -0,0 +1,79 @@
/* libCStreamedXML/src/libCStreamedXML/defaults.c
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
static void default_notWellFormed(const struct csxml* ctx, const char* reason)
{
fprintf(stderr, "Streamed XML is not well formed.\n Line : %d\n Col : %d\n Reason: %s\n",
ctx->line + 1, ctx->col + 1, reason);
}
static void default_outOfMemory(const struct csxml* ctx, size_t amount)
{
(void)ctx;
fprintf(stderr, "Streamed XML parser: out of memory allocating %lu bytes\n", amount);
}
static void default_unknownEntity(const struct csxml* ctx, const char* ent)
{
fprintf(stderr, "Unknown entity referenced in Streamed XML.\n Line: %d\n Col : %d\n Name: %s\n",
ctx->line + 1, ctx->col + 1, ent);
}
static int default_discard(const struct csxml* ctx, const char* x)
{
(void)ctx;
(void)x;
return 0;
}
static int default_cdata(const struct csxml* ctx, const char* data)
{
return ctx->content(ctx, data);
}
static int default_discardPI(const struct csxml* ctx, const char* target, const char* data)
{
(void)ctx;
(void)target;
(void)data;
return 0;
}
static int default_discardElem(const struct csxml* ctx, const char* elemName, int numAttrs)
{
(void)ctx;
(void)elemName;
(void)numAttrs;
return 0;
}
static const char* default_discardEnt(const struct csxml* ctx, const char* ent)
{
(void)ctx;
(void)ent;
return 0;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,102 @@
/* libCStreamedXML/src/libCStreamedXML/types.h
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
/*! \brief Allocate and set up a new parser object.
\retval 0 on error (see \a errno).
\returns Pointer to newly-allocated parser object.
Call this function to obtain a parser context object. The object is initially populated with
sensible defaults (\a expandEntities is 1, the error callback functions print messages to stderr,
the data callback functions discard the data, etc.).
*/
struct csxml* csxml_newParser();
/*! \brief Free an existing parser object.
\param ctx The object to free (may be 0).
Frees a parser object and all of its associated data structures (strings, lists, etc.).
*/
void csxml_freeParser(struct csxml* ctx);
/*! \brief Parse a character.
\param ctx Parser context.
\param ch Character to parse.
\retval 0 on success.
\retval -1 on XML error.
\retval (non-0) if any callback function signals an error.
This function parses a single character. It will generate callbacks as appropriate. If one of the
callback functions returns a non-zero value, that value is returned from this function and the
object is put into an error state. If the streamed XML is not well formed, or some other parsing
error occurs, -1 will be returned and the object will be put into an error state.
Once in an error state, the function will simply discard data passed to it and return 0. However,
a lower-level parser is still running and is capable of matching stream restart markers, allowing
the parser to be reset.
*/
int csxml_feedChar(struct csxml* ctx, char ch);
/*! \brief Look up an entity.
\param ctx Parser context.
\param ent Entity name.
\retval 0 on error.
\returns Pointer to null-terminated string of expanded entity text.
This function is called to dereference entities. It handles the standard entities itself, and chains
on to the entityRef callback of the parser if needed. If the entity is not found, it calls the
unknownEntity callback and returns 0.
*/
const char* csxml_entityRef(struct csxml* ctx, const char* ent);
/*! \brief Reset parser.
\param ctx Parser context.
This function will reset the parser into its initial state, as though no data had yet been
encountered. This is called after stream restart markers for example.
*/
void csxml_reset(struct csxml* ctx);
/*! \brief Parse a block of data.
\param ctx Parser context.
\param data Pointer to block of data.
\param amt Number of bytes to parse.
This function will parse each character in the block of data. If an error is encountered, the usual
process (callback, error state) is followed, but this function will continue to feed in data. The
idea is that you continue to feed in data from your data source, which might have a restart marker
(at which point the parsing will once again continue).
*/
void csxml_feedData(struct csxml* ctx, const char* data, size_t amt);
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,692 @@
/* libCStreamedXML/src/libCStreamedXML/parser.c
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
//#define DEBUG_STATE_MACHINE
#ifdef DEBUG_STATE_MACHINE
#include <stdio.h>
#endif
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
enum csxml_State {
StateNone, // at element or stream level
StateRestartMarker, // after <![RESTART[
StateRestartMarker1, // after first ]
StateRestartMarker2, // after second ]
StateOpen, // after <
StateOpenBang, // after <!
StateOpenCdataMarker, // after <![
StateCDATA,
StateCDATA1, // first ]
StateCDATA2, // second ]
StateData, // like StateNone, but don't skip whitespace
StateOpenComment, // after <!-
StateComment,
StateComment2,
StateComment3,
StatePI, // after <? (we are currently parsing the target)
StatePIData, // after target
StatePI2, // first ? encountered (in target)
StatePI3, // first ? encountered (in data)
StateClose, // "</" encountered
StateClosing, // "</x"
StateNeedClose2, // "</xyz "
StateElemName, // <x encountered
StateElemTag, // first space after "<name" encountered
StateElemAttrName,
StateElemAttrEq, // name=
StateElemAttrVal, // name=' or name="
StateElemAttrDone, // name='value'
StateNeedClose, // <elem/
StateEntity, // &
StateCharEntity, // &#
StateEntityName, // &x
StateError
};
static const char* xmlCdataMarker = "<![CDATA[";
static const char* xmlRestartMarker = "<![RESTART[";
enum TokenClass {
ClassInvalid,
ClassWhitespace,
ClassEntity,
ClassOther,
ClassOpenTag,
ClassNameChar,
ClassNameStartChar
};
static const enum TokenClass token_classes[] = {
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 00--03
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 04--07
ClassInvalid, ClassWhitespace, ClassWhitespace, ClassInvalid, // control 08, tab, newline, control 0B
ClassInvalid, ClassWhitespace, ClassInvalid, ClassInvalid, // control 0C, carriage return, control 0E--0F
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 10--13
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 14--17
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 18--1B
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 1C--1F
ClassWhitespace, ClassOther, ClassOther, ClassOther, // ' ', '!', '"', '#'
ClassOther, ClassOther, ClassEntity, ClassOther, // '$', '%', '&', '\''
ClassOther, ClassOther, ClassOther, ClassOther, // '(', ')', '*', '+'
ClassOther, ClassNameChar, ClassNameChar, ClassOther, // ',', '-', '.', '/'
ClassNameChar, ClassNameChar, ClassNameChar, ClassNameChar, // '0'--'3'
ClassNameChar, ClassNameChar, ClassNameChar, ClassNameChar, // '4'--'7'
ClassNameChar, ClassNameChar, ClassNameStartChar, ClassOther, // '8', '9', ':', ';'
ClassOpenTag, ClassOther, ClassOther, ClassOther, // '<', '=', '>', '?'
ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '@', 'A'--'C'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'D'--'G'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'H'--'K'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'L'--'O'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'P'--'S'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'T'--'W'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'X'--'Z', '['
ClassOther, ClassOther, ClassOther, ClassNameStartChar, // '\\', ']', '^', '_'
ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '`', 'a'--'c'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'd'--'g'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'h'--'k'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'l'--'o'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'p'--'s'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 't'--'w'
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'x'--'z', '{'
ClassOther, ClassOther, ClassOther, ClassInvalid // '|', '}', '~', control 7F
};
int csxml_feedChar(struct csxml* ctx, char ch)
{
enum TokenClass c;
int try;
#ifdef DEBUG_STATE_MACHINE
printf("Character '%c' (%02X), state %d\n", ch, ch, ctx->state);
#endif
#define ERROR(_reason) do { \
ctx->state = StateError; \
ctx->notWellFormed(ctx, _reason); \
return -1; \
}while(0)
#define APPEND_CH(_whichbuf, _ch) do { \
ctx-> _whichbuf .data[ctx-> _whichbuf .len++] = _ch; \
if((ctx-> _whichbuf .len == ctx-> _whichbuf .size) && \
do_realloc(ctx, &ctx-> _whichbuf)) return -1; \
ctx-> _whichbuf .data[ctx-> _whichbuf .len] = 0; \
}while(0)
#define CLEAR_BUFFER(_whichbuf) do { \
ctx-> _whichbuf .data[0] = 0; \
ctx-> _whichbuf .len = 0; \
}while(0)
#define TRY(_x) do { \
try = (_x); \
if(try) { \
ctx->state = StateError; \
return try; \
} \
}while(0)
if(ch == xmlRestartMarker[ctx->restartCount]) {
if(++ctx->restartCount == 11) {
csxml_reset(ctx);
ctx->state = StateRestartMarker;
return 0;
}
} else {
ctx->restartCount = 0;
}
if(ch & ~0x7F) c = ClassInvalid;
else c = token_classes[(int)ch];
if(ch == '\r') {
ctx->skipNextNewline = 1;
ch = '\n';
++ctx->line;
ctx->col = 0;
} else if(ch == '\n') {
if(ctx->skipNextNewline) return 0;
++ctx->line;
ctx->col = 0;
} else {
ctx->skipNextNewline = 0;
}
if(c == ClassInvalid) ERROR("Restricted character encountered.");
// deal with char appropriately, according to state
switch(ctx->state) {
case StateError:
return 0;
case StateNone:
switch(c) {
case ClassWhitespace:
APPEND_CH(buffer, ch);
break;
case ClassOpenTag:
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
ctx->state = StateOpen;
CLEAR_BUFFER(buffer);
break;
case ClassEntity:
if(ctx->expandEntities) {
if(!ctx->elementDepth) ERROR("Entities cannot appear at stream level.");
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
CLEAR_BUFFER(buffer);
ctx->parsingAttr = 0;
ctx->state = StateEntity;
break;
}
// fall through
default:
if(!ctx->elementDepth) ERROR("Content cannot appear at stream level.");
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
ctx->state = StateData;
CLEAR_BUFFER(buffer);
APPEND_CH(buffer, ch);
break;
}
break;
case StateData:
switch(c) {
case ClassOpenTag:
TRY(ctx->content(ctx, ctx->buffer.data));
CLEAR_BUFFER(buffer);
ctx->state = StateOpen;
break;
case ClassEntity:
ctx->parsingAttr = 0;
ctx->state = StateEntity;
break;
default:
APPEND_CH(buffer, ch);
break;
}
break;
case StateCDATA:
if(ch == ']') ctx->state = StateCDATA1;
else APPEND_CH(buffer, ch);
break;
case StateCDATA1:
if(ch == ']') ctx->state = StateCDATA2;
else {
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ch);
ctx->state = StateCDATA;
}
break;
case StateCDATA2:
if(ch == '>') {
TRY(ctx->cdata(ctx, ctx->buffer.data));
CLEAR_BUFFER(buffer);
ctx->state = StateNone;
} else if(ch == ']') {
APPEND_CH(buffer, ']');
} else {
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ch);
ctx->state = StateCDATA;
}
break;
case StateRestartMarker:
if(ch == ']') ctx->state = StateRestartMarker1;
else APPEND_CH(buffer, ch);
break;
case StateRestartMarker1:
if(ch == ']') ctx->state = StateRestartMarker2;
else {
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ch);
ctx->state = StateRestartMarker;
}
break;
case StateRestartMarker2:
if(ch == '>') {
TRY(ctx->streamRestart(ctx, ctx->buffer.data));
csxml_reset(ctx);
} else if(ch == ']') {
APPEND_CH(buffer, ']');
} else {
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ']');
APPEND_CH(buffer, ch);
ctx->state = StateRestartMarker;
}
break;
case StateOpen:
switch(c) {
case ClassNameStartChar:
ctx->state = StateElemName;
ctx->elemAttrNames.len = 0;
ctx->elemAttrVals.len = 0;
CLEAR_BUFFER(elemName);
APPEND_CH(elemName, ch);
break;
default:
if(ch == '!') ctx->state = StateOpenBang;
else if(ch == '?') {
ctx->state = StatePI;
CLEAR_BUFFER(buffer2);
} else if(ch == '/') {
if(!ctx->elementDepth) ERROR("Encountered a close tag at stream level.");
ctx->state = StateClose;
} else ERROR("Invalid start character for element.");
break;
}
break;
case StatePI:
if(ch == '?') ctx->state = StatePI2;
else if(c == ClassWhitespace) {
ctx->state = StatePIData;
CLEAR_BUFFER(buffer);
} else APPEND_CH(buffer2, ch);
break;
case StatePI2:
if(ch != '>') ERROR("Invalid target for PI");
else {
TRY(ctx->PI(ctx, ctx->buffer2.data, 0));
ctx->state = StateNone;
}
break;
case StatePIData:
if(ch == '?') ctx->state = StatePI3;
else APPEND_CH(buffer, ch);
break;
case StatePI3:
if(ch == '>') {
TRY(ctx->PI(ctx, ctx->buffer2.data, ctx->buffer.data));
CLEAR_BUFFER(buffer);
ctx->state = StateNone;
} else if(ch == '?') {
APPEND_CH(buffer, '?');
} else {
APPEND_CH(buffer, '?');
APPEND_CH(buffer, ch);
ctx->state = StatePIData;
}
break;
case StateOpenBang:
if(ch == '[') {
// restart markers handled by lower layer
ctx->state = StateOpenCdataMarker;
ctx->xmlCount = 3;
if(!ctx->elementDepth) ERROR("CDATA sections not valid at stream level.");
} else if(ch == '-') ctx->state = StateOpenComment;
else ERROR("Invalid special tag.");
break;
case StateOpenCdataMarker:
if(ch != xmlCdataMarker[ctx->xmlCount]) ERROR("Invalid marked section.");
if(!xmlCdataMarker[++ctx->xmlCount]) ctx->state = StateCDATA;
break;
case StateOpenComment:
if(ch != '-') ERROR("Invalid special tag.");
ctx->state = StateComment;
CLEAR_BUFFER(buffer);
break;
case StateComment:
if(ch == '-') ctx->state = StateComment2;
else APPEND_CH(buffer, ch);
break;
case StateComment2:
if(ch == '-') ctx->state = StateComment3;
else {
APPEND_CH(buffer, '-');
APPEND_CH(buffer, ch);
}
break;
case StateComment3:
if(ch != '>') ERROR("`--' not valid in comments");
TRY(ctx->comment(ctx, ctx->buffer.data));
CLEAR_BUFFER(buffer);
ctx->state = StateNone;
break;
case StateElemName:
switch(c) {
case ClassWhitespace:
ctx->state = StateElemTag;
CLEAR_BUFFER(buffer);
break;
case ClassNameStartChar:
case ClassNameChar:
APPEND_CH(elemName, ch);
break;
default:
switch(ch) {
case L'>':
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
TRY(ctx->element(ctx, ctx->elemName.data, 0));
ctx->state = StateNone;
++ctx->elementDepth;
CLEAR_BUFFER(buffer);
break;
case L'/':
ctx->state = StateNeedClose;
break;
default:
ERROR("Invalid character in tag name.");
}
}
break;
case StateElemTag:
switch(c) {
case ClassWhitespace:
break;
case ClassNameStartChar:
ctx->state = StateElemAttrName;
CLEAR_BUFFER(buffer);
APPEND_CH(buffer, ch);
break;
default:
switch(ch) {
case '>':
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrNames.len));
ctx->state = StateNone;
++ctx->elementDepth;
break;
case '/':
ctx->state = StateNeedClose;
break;
default:
ERROR("Invalid character in tag.");
}
}
break;
case StateElemAttrName:
switch(c) {
case ClassNameStartChar:
case ClassNameChar:
APPEND_CH(buffer, ch);
break;
default:
if(ch != '=') ERROR("Invalid character in attribute name.");
TRY(list_push(ctx, &ctx->elemAttrNames, &ctx->buffer));
CLEAR_BUFFER(buffer);
ctx->state = StateElemAttrEq;
break;
}
break;
case StateElemAttrEq:
if(ch == '\'') ctx->singleQuote = 1;
else if(ch == '"') ctx->singleQuote = 0;
else ERROR("Invalid character in attribute.");
ctx->state = StateElemAttrVal;
break;
case StateElemAttrVal:
if((ctx->singleQuote && ch == '\'') || (!ctx->singleQuote && ch == '"')) {
TRY(list_push(ctx, &ctx->elemAttrVals, &ctx->buffer));
ctx->state = StateElemAttrDone;
} else if(ctx->expandEntities && ch == L'&') {
ctx->parsingAttr = 1;
ctx->state = StateEntity;
} else APPEND_CH(buffer, ch);
break;
case StateElemAttrDone:
switch(c) {
case ClassWhitespace:
ctx->state = StateElemTag;
break;
default:
if(ch == '/') {
ctx->state = StateNeedClose;
} else if(ch == '>') {
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len));
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
CLEAR_BUFFER(buffer);
ctx->state = StateNone;
++ctx->elementDepth;
} else ERROR("Invalid character after attribute.");
break;
}
break;
case StateNeedClose:
if(ch != '>') ERROR("Stray `/' in open tag.");
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len));
TRY(ctx->closeTag(ctx, ctx->elemName.data));
CLEAR_BUFFER(buffer);
ctx->state = StateNone;
break;
case StateClose:
if(c != ClassNameStartChar) ERROR("Invalid character in close tag name.");
APPEND_CH(buffer, ch);
ctx->state = StateClosing;
break;
case StateClosing:
switch(c) {
case ClassNameStartChar:
case ClassNameChar:
APPEND_CH(buffer, ch);
break;
case ClassWhitespace:
ctx->state = StateNeedClose2;
break;
default:
if(ch != '>') ERROR("Invalid character in close tag name.");
TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack)));
if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag.");
TRY(ctx->closeTag(ctx, ctx->elemName.data));
ctx->state = StateNone;
--ctx->elementDepth;
CLEAR_BUFFER(buffer);
}
break;
case StateNeedClose2:
if(c == ClassWhitespace) break;
if(ch != '>') ERROR("Invalid data in close tag.");
TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack)));
if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag.");
TRY(ctx->closeTag(ctx, ctx->elemName.data));
ctx->state = StateNone;
--ctx->elementDepth;
CLEAR_BUFFER(buffer);
break;
case StateEntity:
if(ch == '#') {
ctx->state = StateCharEntity;
ctx->entityChar = 0;
} else if(c == ClassNameStartChar) {
APPEND_CH(buffer, ch);
ctx->state = StateEntityName;
} else ERROR("Invalid entity name.");
break;
case StateCharEntity:
if(ch == ';') {
APPEND_CH(buffer, ctx->entityChar);
ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData;
break;
} else if(ch >= '0' && ch <= '9') {
ctx->entityChar *= 10;
ctx->entityChar += (ch - '0');
if(!ctx->entityChar || ctx->entityChar > 126) ERROR("Character code out of range in character entity.");
} else ERROR("Invalid character in character entity.");
break;
case StateEntityName:
if(ch == ';') {
const char* e = csxml_entityRef(ctx, ctx->buffer2.data);
TRY(!e || buffer_strcat(ctx, &ctx->buffer, e));
ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData;
break;
}
if(c != ClassNameChar && c != ClassNameStartChar) ERROR("Invalid entity name.");
APPEND_CH(buffer2, ch);
break;
}
++ctx->col;
return 0;
}
const char* csxml_entityRef(struct csxml* ctx, const char* ent)
{
const char* q = 0;
if(!strcmp(ent, "quot")) return "\"";
if(!strcmp(ent, "amp")) return "&";
if(!strcmp(ent, "apos")) return "'";
if(!strcmp(ent, "lt")) return "<";
if(!strcmp(ent, "gt")) return ">";
q = ctx->entityRef(ctx, ent);
if(!q) {
ctx->unknownEntity(ctx, ent);
ctx->state = StateError;
return 0;
}
return q;
}
void csxml_reset(struct csxml* ctx)
{
CLEAR_BUFFER(buffer);
CLEAR_BUFFER(buffer2);
CLEAR_BUFFER(elemName);
ctx->state = StateNone;
ctx->xmlCount = 0;
ctx->elementDepth = 0;
ctx->restartCount = 0;
ctx->skipNextNewline = 0;
ctx->parsingAttr = 0;
ctx->line = 0;
ctx->col = 0;
ctx->elemStack.len = 0;
ctx->elemAttrNames.len = 0;
ctx->elemAttrVals.len = 0;
}
#undef ERROR
#undef APPEND_CH
#undef CLEAR_BUFFER
void csxml_freeParser(struct csxml* ctx)
{
if(!ctx) return;
buffer_free(&ctx->buffer);
buffer_free(&ctx->buffer2);
buffer_free(&ctx->elemName);
list_free(&ctx->elemStack);
list_free(&ctx->elemAttrNames);
list_free(&ctx->elemAttrVals);
}
struct csxml* csxml_newParser()
{
struct csxml* ctx = 0;
ctx = malloc(sizeof(struct csxml));
if(!ctx) return 0;
memset(ctx, 0, sizeof(struct csxml));
if(buffer_init(ctx, &ctx->buffer) || buffer_init(ctx, &ctx->buffer2) || buffer_init(ctx, &ctx->elemName)) {
csxml_freeParser(ctx);
return 0;
}
ctx->expandEntities = 1;
ctx->notWellFormed = default_notWellFormed;
ctx->outOfMemory = default_outOfMemory;
ctx->unknownEntity = default_unknownEntity;
ctx->whiteSpace = default_discard;
ctx->content = default_discard;
ctx->cdata = default_cdata;
ctx->streamRestart = default_discard;
ctx->PI = default_discardPI;
ctx->comment = default_discard;
ctx->element = default_discardElem;
ctx->closeTag = default_discard;
ctx->entityRef = default_discardEnt;
return ctx;
}
void csxml_feedData(struct csxml* ctx, const char* data, size_t amt)
{
while(amt--) csxml_feedChar(ctx, *data++);
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -0,0 +1,21 @@
# libCStreamedXML/src/lib/clib/pkgconf.in
#
# Metadata file for pkg-config
# ( http://www.freedesktop.org/software/pkgconfig/ )
#
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
# Released under the GNU GPLv2. See file COPYING or
# http://www.gnu.org/copyleft/gpl.html for details.
#
# Name, description
Name: @TODO@
Description: @TODO@
Version: @VERSION@
# Requirements
Requires:
# Compilation information
Libs: -L@LIBDIR@ -lCStreamedXML
Cflags: -I@INCLUDEDIR@

View File

@ -0,0 +1,17 @@
# libCStreamedXML/src/libCStreamedXML/soversion
#
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
# Released under the GNU GPLv2. See file COPYING or
# http://www.gnu.org/copyleft/gpl.html for details.
#
# SOMAJOR and SOMINOR are included in the library's soname. They need to
# be bumped on a binary-incompatible release. They are both single
# integers.
SOMAJOR=0
SOMINOR=0
# SOMICRO is bumped every time there is a binary-compatible release.
SOMICRO=0

320
src/libCStreamedXML/types.h Normal file
View File

@ -0,0 +1,320 @@
/* libCStreamedXML/src/libCStreamedXML/types.h
*
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
* COPYING for more information / terms of license.
*/
/*! \brief String buffer.
This type implements a simple string buffer. The string should be kept null-terminated. The \a data
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
string length (excluding terminating null) and \a size is the size of the allocated buffer. It is
never valid for \a data or \a size to be 0.
*/
struct csxml_buf {
/// Pointer to string (null terminated).
char* data;
/// Length of string (excluding null).
size_t len;
/// Size of allocated buffer.
size_t size;
};
/*! \brief String list.
This type implements a dynamic array of strings (using the struct csxml_buf type). The \a data
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
number of valid elements in the array and \a size is the size of the allocated buffer. It is valid
for \a data and \a size to be zero.
*/
struct csxml_list {
/// Array of buffers (all allocated elements are valid and initialised).
struct csxml_buf* data;
/// Number of elements in use.
size_t len;
/// Number of allocated elements.
size_t size;
};
/*! \brief Streamed XML parser context.
This structure contains all the details of a Streamed XML parsing operation. Most fields are
internal to the parser and should not be touched; those are briefly documented here. You may want to
change the \a expandEntities option, examine the \a line and \a col variables, and change the
callback functions.
Those members marked Internal: are subject to change (either in semantics, type or existence).
*/
struct csxml {
/// Option: change "&amp;lt;" to "&lt;", etc. if non-zero (the default). Change to 0 to leave
/// entities inline.
int expandEntities;
/// Current line (from 0, starts from last restart marker).
int line;
/// Current column (from 0, starts from last restart marker).
int col;
/// List of attribute names for current element.
struct csxml_list elemAttrNames;
/// List of attribute values for current element.
struct csxml_list elemAttrVals;
/*! \brief Error callback: Streamed XML is not well formed.
\param ctx Parsing context.
\param reason Human-readable description of error.
This function is called whenever badly-formed Streamed XML is encountered (e.g. if content is
encountered at stream-level).
The default implementation prints a formatted message to \a stderr.
*/
void (*notWellFormed)(const struct csxml* ctx, const char* reason);
/*! \brief Error callback: out of memory.
\param ctx Parsing context.
\param amount Number of bytes we tried to allocate.
This function is called whenever a dynamic string resizing operation (or similar) fails. It is
called with the number of bytes the library attempted to allocate.
The default implementation prints a formatted message to \a stderr.
\todo When parsing extremely long content sections, we could just call the content() callback
when memory is low rather than failing altogether.
*/
void (*outOfMemory)(const struct csxml* ctx, size_t amount);
/*! \brief Error callback: reference to unknown entity.
\param ctx Parsing context.
\param ent Name of the referenced entity.
If an entity is referenced by name (e.g. "&amp;myEntity;" is encountered) but the lookup in the
default entity list and through the entityRef() callback fails to resolve the content, this
function is called.
The default implementation prints a formatted message to \a stderr.
*/
void (*unknownEntity)(const struct csxml* ctx, const char* ent);
/*! \brief Callback: whitespace.
\param ctx Parsing context.
\param ws Pointer to string of whitespace.
\returns Zero on success, non-zero on error.
This function is called whenever a block of whitespace is encountered at stream level or
immediately after some structural element (i.e. on leading whitespace within elements). It is
generally safe to discard it, unless you need to reproduce the file verbatim. Once content is
encountered within an element, any further whitespace will be reported through the content()
callback, up to the next structural element.
The default implementation discards the data.
*/
int (*whiteSpace)(const struct csxml* ctx, const char* ws);
/*! \brief Callback: content.
\param ctx Parsing context.
\param content Pointer to string of cdata.
\returns Zero on success, non-zero on error.
This function is called whenever textual content (cdata) is encountered inside an element. It
may be called multiple times simultaneously, in which case the data from each call should be
concatenated. If \a expandEntities is non-zero, then this function will also be called with the
expanded text from any entities encountered, rather than the entities themselves.
The default implementation discards the data.
*/
int (*content)(const struct csxml* ctx, const char* content);
/*! \brief Callback: cdata block.
\param ctx Parsing context.
\param cdata Pointer to string of cdata.
\returns Zero on success, non-zero on error.
This function is called for all data inside a CDATA marked section. It is useful to
differentiate between cdata encoded in this manner and normal cdata if you are trying to
reproduce the orginal file verbatim.
The default implementation calls content() with the same data.
*/
int (*cdata)(const struct csxml* ctx, const char* cdata);
/*! \brief Callback: stream restart marker.
\param ctx Parsing context.
\param marker Pointer to marker string.
\returns Zero on success, non-zero on error.
Whenever a stream restart marker is encountered, this function is called with the data found
inside the marker.
The default implementation ignores the data (but note that the parser state is always reset,
regardless of what the callback does).
*/
int (*streamRestart)(const struct csxml* ctx, const char* marker);
/*! \brief Callback: processing instruction.
\param ctx Parsing context.
\param target PI target.
\param data PI data (may be 0).
\returns Zero on success, non-zero on error.
This callback is used to report processing instructions (PIs). If the PI has no data, then the
\a data variable can be set to 0.
The default implementation ignores the data.
*/
int (*PI)(const struct csxml* ctx, const char* target, const char* data);
/*! \brief Callback: comment.
\param ctx Parsing context.
\param comment Pointer to comment string data.
\returns Zero on success, non-zero on error.
This callback is used to report comments. It is only useful if you wish to reproduce the source
file verbatim (for instance, you want to alter some data in a file but preserve user comments).
The default implementation ignores the data.
*/
int (*comment)(const struct csxml* ctx, const char* comment);
/*! \brief Callback: element.
\param ctx Parsing context.
\param elemName Pointer to element name string.
\param numAttrs Number of attributes.
\returns Zero on success, non-zero on error.
This callback is used whenever an element open tag is encountered. To parse attributes, use the
numAttrs variable to determine how many there are, and then access them with:
<pre> // get attribute name
const char* elemName = ctx->elemAttrNames.data[i].data;
// get attribute value
const char* elemValue = ctx->elemAttrValues.data[i].data;</pre>
In the case of an empty element, the closeTag() callback will be called immediately afterwards.
The default implemenation ignores the data.
*/
int (*element)(const struct csxml* ctx, const char* elemName, int numAttrs);
/*! \brief Callback: close tag.
\param ctx Parsing context.
\param elemName Element name.
\returns Zero on success, non-zero on error.
This function is called whenever an element close tag is encountered. The element name will
always match the open tag element name (if not, the Streamed XML is not well formed, and an
error will be signalled before this callback is triggered).
The default implementation ignores the data.
*/
int (*closeTag)(const struct csxml* ctx, const char* elemName);
/*! \brief Callback: entity reference.
\param ctx Parsing context.
\param ent Entity name.
\retval 0 if the entity name does not match any known entity.
\returns Pointer to null-terminated string data for expanding entity.
This function is called whenever \a expandEntities is true and an entity is encountered in cdata
or an attribute value. It is only called if the entity does not match one of the five built-in
defaults. You only need to provide an implementation if you know about some additional entities.
The default implementation returns 0 ("no such entity").
*/
const char* (*entityRef)(const struct csxml* ctx, const char* ent);
/// Internal: string buffer.
struct csxml_buf buffer;
/// Internal: string buffer.
struct csxml_buf buffer2;
/// Internal: string buffer.
struct csxml_buf elemName;
/// Internal: stack of element names, used to match open/close tags.
struct csxml_list elemStack;
/// Internal: state machine state.
int state;
/// Internal: for matching strings.
int xmlCount;
/// Internal: depth in element tree (0 = stream level).
int elementDepth;
/// Internal: for matching against restart marker string.
int restartCount;
/// Internal: for stripping windows line endings.
int skipNextNewline;
/// Internal: flag to check if we are currently parsing an attribute while expanding an entity.
int parsingAttr;
/// Internal: flag to record if current attribute was quoted with ' or " char.
int singleQuote;
/// Internal: used to expand character entities.
int entityChar;
};
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/