Add in libCStreamedXML and a test program for it.
This commit is contained in:
parent
dc6a4cf430
commit
4533df2947
|
@ -0,0 +1 @@
|
|||
c tests ctests libCStreamedXML
|
|
@ -0,0 +1,3 @@
|
|||
source src/ctests/build.tests
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,43 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# ctests_BUILT
|
||||
#
|
||||
|
||||
build_target libCStreamedXML || return 1
|
||||
|
||||
if [ -z ${ctests_BUILT} ]
|
||||
then
|
||||
LIBS="${libCStreamedXML} "
|
||||
EXTRAS=""
|
||||
|
||||
echo "Building test programs..."
|
||||
do_cmd mkdir -p obj/tests || return 1
|
||||
|
||||
for SRC in src/ctests/*.c
|
||||
do
|
||||
TEST="obj/tests/$(basename ${SRC} | sed -e 's,.c$,,')"
|
||||
MODIFIED=0
|
||||
for file in ${LIBS} ${SRC} src/ctests/build.tests
|
||||
do
|
||||
if [ ${file} -nt ${TEST} ]
|
||||
then
|
||||
MODIFIED=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${MODIFIED} -ne 0 ]
|
||||
then
|
||||
do_cmd ${CC} -Iobj ${CFLAGS} -o ${TEST} ${SRC} ${LIBS} ${EXTRAS} || return 1
|
||||
print_success "Built ${TEST}"
|
||||
else
|
||||
print_success "${TEST} is up to date"
|
||||
fi
|
||||
done
|
||||
|
||||
print_success "All tests built"
|
||||
|
||||
ctests_BUILT=1
|
||||
fi
|
||||
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,137 @@
|
|||
/* libStreamedXML/src/ctests/cCallback.c
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#include "StreamedXML.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
int echo_whiteSpace(const struct csxml* ctx, const char* ws)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Whitespace: ``%s''\n", ws);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_content(const struct csxml* ctx, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Content: ``%s''\n", data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_cdata(const struct csxml* ctx, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("CDATA: ``%s''\n", data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_streamRestart(const struct csxml* ctx, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Stream restart marker: ``%s''\n", data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_PI(const struct csxml* ctx, const char* target, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("PI: target ``%s'', data ", target);
|
||||
if(data) printf("``%s''\n", data);
|
||||
else printf("not specified\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_comment(const struct csxml* ctx, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Comment: ``%s''\n", data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_element(const struct csxml* ctx, const char* elemName, int numAttrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("Open tag: <%s> (%d attributes)\n", elemName, numAttrs);
|
||||
for(i = 0; i < numAttrs; ++i) printf(" %s='%s'\n", ctx->elemAttrNames.data[i].data, ctx->elemAttrVals.data[i].data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_closeTag(const struct csxml* ctx, const char* elemName)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Close tag: </%s>\n", elemName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* echo_entityRef(const struct csxml* ctx, const char* ent)
|
||||
{
|
||||
(void)ctx;
|
||||
printf("Entity reference: ``%s''\n", ent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
FILE* fp = 0;
|
||||
char buf[1024];
|
||||
size_t amt;
|
||||
struct csxml* ctx = 0;
|
||||
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
printf("{C library} Tests the callback functions.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc != 2) {
|
||||
fprintf(stderr, "Expecting filename.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fp = fopen(argv[1], "r");
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Error opening '%s' for input.\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
ctx = csxml_newParser();
|
||||
if(!ctx) {
|
||||
fprintf(stderr, "Couldn't allocate a new parser.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ctx->whiteSpace = echo_whiteSpace;
|
||||
ctx->content = echo_content;
|
||||
ctx->cdata = echo_cdata;
|
||||
ctx->streamRestart = echo_streamRestart;
|
||||
ctx->PI = echo_PI;
|
||||
ctx->comment = echo_comment;
|
||||
ctx->element = echo_element;
|
||||
ctx->closeTag = echo_closeTag;
|
||||
ctx->entityRef = echo_entityRef;
|
||||
|
||||
while(!feof(fp)) {
|
||||
amt = fread(buf, 1, sizeof(buf), fp);
|
||||
csxml_feedData(ctx, buf, amt);
|
||||
}
|
||||
|
||||
csxml_freeParser(ctx);
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,35 @@
|
|||
/* libStreamedXML/src/ctests/???.c
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#include "StreamedXML.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
|
||||
printf("One line summary.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(argc == 1) {
|
||||
// empty argument list
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
|
||||
// TODO
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
c lib libCStreamedXML StreamedXML.h
|
|
@ -0,0 +1,14 @@
|
|||
/* libCStreamedXML/src/clib/BottomHeader.h
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#endif
|
||||
/*!@}*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,32 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/TopHeader.h
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#ifndef HEADER_libCStreamedXML
|
||||
#define HEADER_libCStreamedXML
|
||||
|
||||
// standard includes, or includes needed for type declarations
|
||||
#include <sys/types.h>
|
||||
|
||||
/*! \defgroup libCStreamedXML libCStreamedXML interface.
|
||||
|
||||
libCStreamedXML is a C library for parsing Streamed XML data. It is somewhat simpler than the C++
|
||||
library (it only parses ASCII data, not Unicode data). In future, the API could be extended to
|
||||
use only fixed-size buffers, and not to call malloc() at all. This would be advantageous for small
|
||||
embedded systems (e.g. PICs).
|
||||
|
||||
To parse Streamed XML, you must first create a parser context object with csxml_newParser(). Then
|
||||
you set up the returned object by changing options and callback functions. Once set up correctly,
|
||||
pass in data using the csxml_feedChar() and csxml_feedData() functions. Finally, free the parser
|
||||
object with csxml_freeParser().
|
||||
|
||||
*/
|
||||
/*!@{*/
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,20 @@
|
|||
/* libCStreamedXML/src/libCStreamedXMLlibCStreamedXML/TopSource.c
|
||||
*
|
||||
* (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
* Released under the GNU GPLv2. See file COPYING or
|
||||
* http://www.gnu.org/copyleft/gpl.html for details.
|
||||
*/
|
||||
|
||||
#include "StreamedXML.h"
|
||||
|
||||
// Below are all the includes used throughout the library.
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
//#define DEBUG_STATE_MACHINE
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,100 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/buffer.c
|
||||
*
|
||||
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
static int do_realloc(struct csxml* ctx, struct csxml_buf* buf)
|
||||
{
|
||||
char* n = realloc(buf->data, buf->size << 1);
|
||||
if(!n) {
|
||||
ctx->outOfMemory(ctx, buf->size << 1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf->size <<= 1;
|
||||
buf->data = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int buffer_copy(struct csxml* ctx, struct csxml_buf* dest, const struct csxml_buf* src)
|
||||
{
|
||||
while(dest->size <= src->len) if(do_realloc(ctx, dest)) return -1;
|
||||
strcpy(dest->data, src->data);
|
||||
dest->len = src->len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int buffer_strcat(struct csxml* ctx, struct csxml_buf* dest, const char* src)
|
||||
{
|
||||
size_t req = strlen(src) + dest->len;
|
||||
while(req >= dest->size) if(do_realloc(ctx, dest)) return -1;
|
||||
strcat(dest->data, src);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int buffer_init(struct csxml* ctx, struct csxml_buf* buf)
|
||||
{
|
||||
memset(buf, 0, sizeof(buf));
|
||||
buf->size = 256;
|
||||
buf->data = malloc(buf->size);
|
||||
if(!buf->data) {
|
||||
ctx->outOfMemory(ctx, buf->size);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void buffer_free(struct csxml_buf* buf)
|
||||
{
|
||||
free(buf->data);
|
||||
memset(buf, 0, sizeof(struct csxml_buf));
|
||||
}
|
||||
|
||||
static int do_realloc2(struct csxml* ctx, struct csxml_list* list)
|
||||
{
|
||||
size_t i, newlen = list->size ? (list->size << 1) : 4;
|
||||
struct csxml_buf* n = realloc(list->data, newlen * sizeof(struct csxml_buf));
|
||||
if(!n) {
|
||||
ctx->outOfMemory(ctx, newlen * sizeof(struct csxml_buf));
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(n + list->size * sizeof(struct csxml_buf), 0, newlen * sizeof(struct csxml_buf));
|
||||
for(i = 0; i < newlen; ++i) {
|
||||
if(buffer_init(ctx, n + list->size + i)) return -1;
|
||||
}
|
||||
|
||||
list->size = newlen;
|
||||
list->data = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int list_push(struct csxml* ctx, struct csxml_list* list, struct csxml_buf* buf)
|
||||
{
|
||||
if((list->size == list->len) && do_realloc2(ctx, list)) return -1;
|
||||
if(buffer_copy(ctx, list->data + list->len, buf)) return -1;
|
||||
++list->len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct csxml_buf* list_pop(struct csxml_list* list)
|
||||
{
|
||||
return list->data + --list->len;
|
||||
}
|
||||
|
||||
static void list_free(struct csxml_list* list)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for(i = 0; i < list->size; ++i) buffer_free(list->data + i);
|
||||
free(list->data);
|
||||
memset(list, 0, sizeof(struct csxml_list));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1 @@
|
|||
source src/libCStreamedXML/build.lib
|
|
@ -0,0 +1 @@
|
|||
source src/libCStreamedXML/build.install-lib
|
|
@ -0,0 +1,36 @@
|
|||
build_target libCStreamedXML
|
||||
|
||||
# make paths (this is for Gentoo in particular)
|
||||
build_dir_tree "${LIBDIR}" || return 1
|
||||
build_dir_tree "${PKGCONFDIR}" || return 1
|
||||
build_dir_tree "${INCLUDEDIR}" || return 1
|
||||
|
||||
# install library
|
||||
echo "Installing libraries into '${LIBDIR}'"
|
||||
install_file ${libCStreamedXML} ${LIBDIR} 0755 || return 1
|
||||
BASE="${libCStreamedXML_BASE}.so"
|
||||
MAJOR="${BASE}.${SOMAJOR}"
|
||||
MINOR="${MAJOR}.${SOMINOR}"
|
||||
MICRO="${MINOR}.${SOMICRO}"
|
||||
install_symlink "${MINOR}" "${MICRO}" "${LIBDIR}"
|
||||
install_symlink "${MAJOR}" "${MINOR}" "${LIBDIR}"
|
||||
install_symlink "${BASE}" "${MAJOR}" "${LIBDIR}"
|
||||
|
||||
# install header
|
||||
echo "Installing header file '${libCStreamedXML_HEADER}' into ${INCLUDEDIR}"
|
||||
install_header ${libCStreamedXML_HEADER} ${INCLUDEDIR} 0644 || return 1
|
||||
|
||||
# install pkgconfig file
|
||||
echo "Installing package config file into ${PKGCONFDIR}"
|
||||
PKGCONFFILE=${PKGCONFDIR}/libCStreamedXML.pc
|
||||
do_cmd rm -f ${PKGCONFFILE}
|
||||
do_cmd_redir ${PKGCONFFILE} sed \
|
||||
-e "s,@VERSION@,${VERSION}," \
|
||||
-e "s,@LIBDIR@,${FINALLIBDIR}," \
|
||||
-e "s,@INCLUDEDIR@,${FINALINCLUDEDIR}," \
|
||||
src/libCStreamedXML/pkgconf.in
|
||||
do_cmd chmod 0644 ${PKGCONFFILE}
|
||||
print_success "Done"
|
||||
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,51 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# libCStreamedXML
|
||||
# libCStreamedXML_BUILT
|
||||
# libCStreamedXML_HEADER
|
||||
# libCStreamedXML_BASE
|
||||
|
||||
if [ -z ${libCStreamedXML_BUILT} ]
|
||||
then
|
||||
libCStreamedXML_BASE=libCStreamedXML
|
||||
source src/libCStreamedXML/soversion
|
||||
|
||||
libCStreamedXML="obj/${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}.${SOMICRO}"
|
||||
SO_EXTRA="-lc" # @TODO@ libs, cflags
|
||||
|
||||
echo "Building library ${libCStreamedXML}..."
|
||||
|
||||
do_cmd source src/libCStreamedXML/build.monolithic || return 1
|
||||
|
||||
MODIFIED=0
|
||||
for test in ${MONOLITHIC_TESTS} ${HDR} ${SRC}
|
||||
do
|
||||
if [ ${test} -nt ${libCStreamedXML} ]
|
||||
then
|
||||
MODIFIED=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ${MODIFIED} -ne 0 ]
|
||||
then
|
||||
echo " Compiling"
|
||||
|
||||
SONAME="${libCStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}"
|
||||
do_cmd ${CC} ${CFLAGS} -shared -fpic -o "${libCStreamedXML}" \
|
||||
-Wl,-soname,${SONAME} \
|
||||
${SRC} ${SO_EXTRA} || return 1
|
||||
|
||||
# make tests work
|
||||
do_cmd ln -sf $(basename ${libCStreamedXML}) obj/${SONAME} || return 1
|
||||
|
||||
print_success "Library built"
|
||||
else
|
||||
print_success "Library up to date"
|
||||
fi
|
||||
|
||||
libCStreamedXML_BUILT=1
|
||||
libCStreamedXML_HEADER=${HDR}
|
||||
|
||||
fi
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,21 @@
|
|||
# These are external variables, and shouldn't clash with anything else
|
||||
# libCStreamedXML_MONOLITHIC
|
||||
|
||||
SRC="obj/libCStreamedXML.c"
|
||||
HDR="obj/StreamedXML.h"
|
||||
|
||||
MONOLITHIC_TESTS="src/libCStreamedXML/build.lib src/libCStreamedXML/build.monolithic"
|
||||
|
||||
if [ -z "${libCStreamedXML_MONOLITHIC}" ]
|
||||
then
|
||||
MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopHeader,types,functions,BottomHeader}.h)"
|
||||
make_monolithic ${HDR} C || return 1
|
||||
|
||||
MONOLITHIC_SOURCE="$(echo src/libCStreamedXML/{TopSource,buffer,defaults,parser}.c)"
|
||||
make_monolithic ${SRC} C || return 1
|
||||
|
||||
libCStreamedXML_MONOLITHIC=1
|
||||
MONOLITHIC_DOC="${MONOLITHIC_DOC} ${HDR}"
|
||||
fi
|
||||
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
# vim: expandtab:ts=4:sw=4
|
|
@ -0,0 +1,79 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/defaults.c
|
||||
*
|
||||
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
static void default_notWellFormed(const struct csxml* ctx, const char* reason)
|
||||
{
|
||||
fprintf(stderr, "Streamed XML is not well formed.\n Line : %d\n Col : %d\n Reason: %s\n",
|
||||
ctx->line + 1, ctx->col + 1, reason);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void default_outOfMemory(const struct csxml* ctx, size_t amount)
|
||||
{
|
||||
(void)ctx;
|
||||
fprintf(stderr, "Streamed XML parser: out of memory allocating %lu bytes\n", amount);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void default_unknownEntity(const struct csxml* ctx, const char* ent)
|
||||
{
|
||||
fprintf(stderr, "Unknown entity referenced in Streamed XML.\n Line: %d\n Col : %d\n Name: %s\n",
|
||||
ctx->line + 1, ctx->col + 1, ent);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int default_discard(const struct csxml* ctx, const char* x)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)x;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int default_cdata(const struct csxml* ctx, const char* data)
|
||||
{
|
||||
return ctx->content(ctx, data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int default_discardPI(const struct csxml* ctx, const char* target, const char* data)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)target;
|
||||
(void)data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int default_discardElem(const struct csxml* ctx, const char* elemName, int numAttrs)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)elemName;
|
||||
(void)numAttrs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char* default_discardEnt(const struct csxml* ctx, const char* ent)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)ent;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,102 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/types.h
|
||||
*
|
||||
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*! \brief Allocate and set up a new parser object.
|
||||
|
||||
\retval 0 on error (see \a errno).
|
||||
\returns Pointer to newly-allocated parser object.
|
||||
|
||||
Call this function to obtain a parser context object. The object is initially populated with
|
||||
sensible defaults (\a expandEntities is 1, the error callback functions print messages to stderr,
|
||||
the data callback functions discard the data, etc.).
|
||||
|
||||
*/
|
||||
struct csxml* csxml_newParser();
|
||||
|
||||
|
||||
|
||||
/*! \brief Free an existing parser object.
|
||||
|
||||
\param ctx The object to free (may be 0).
|
||||
|
||||
Frees a parser object and all of its associated data structures (strings, lists, etc.).
|
||||
|
||||
*/
|
||||
void csxml_freeParser(struct csxml* ctx);
|
||||
|
||||
|
||||
|
||||
/*! \brief Parse a character.
|
||||
|
||||
\param ctx Parser context.
|
||||
\param ch Character to parse.
|
||||
\retval 0 on success.
|
||||
\retval -1 on XML error.
|
||||
\retval (non-0) if any callback function signals an error.
|
||||
|
||||
This function parses a single character. It will generate callbacks as appropriate. If one of the
|
||||
callback functions returns a non-zero value, that value is returned from this function and the
|
||||
object is put into an error state. If the streamed XML is not well formed, or some other parsing
|
||||
error occurs, -1 will be returned and the object will be put into an error state.
|
||||
|
||||
Once in an error state, the function will simply discard data passed to it and return 0. However,
|
||||
a lower-level parser is still running and is capable of matching stream restart markers, allowing
|
||||
the parser to be reset.
|
||||
|
||||
*/
|
||||
int csxml_feedChar(struct csxml* ctx, char ch);
|
||||
|
||||
|
||||
|
||||
/*! \brief Look up an entity.
|
||||
|
||||
\param ctx Parser context.
|
||||
\param ent Entity name.
|
||||
\retval 0 on error.
|
||||
\returns Pointer to null-terminated string of expanded entity text.
|
||||
|
||||
This function is called to dereference entities. It handles the standard entities itself, and chains
|
||||
on to the entityRef callback of the parser if needed. If the entity is not found, it calls the
|
||||
unknownEntity callback and returns 0.
|
||||
|
||||
*/
|
||||
const char* csxml_entityRef(struct csxml* ctx, const char* ent);
|
||||
|
||||
|
||||
|
||||
/*! \brief Reset parser.
|
||||
|
||||
\param ctx Parser context.
|
||||
|
||||
This function will reset the parser into its initial state, as though no data had yet been
|
||||
encountered. This is called after stream restart markers for example.
|
||||
|
||||
*/
|
||||
void csxml_reset(struct csxml* ctx);
|
||||
|
||||
|
||||
|
||||
|
||||
/*! \brief Parse a block of data.
|
||||
|
||||
\param ctx Parser context.
|
||||
\param data Pointer to block of data.
|
||||
\param amt Number of bytes to parse.
|
||||
|
||||
This function will parse each character in the block of data. If an error is encountered, the usual
|
||||
process (callback, error state) is followed, but this function will continue to feed in data. The
|
||||
idea is that you continue to feed in data from your data source, which might have a restart marker
|
||||
(at which point the parsing will once again continue).
|
||||
|
||||
*/
|
||||
void csxml_feedData(struct csxml* ctx, const char* data, size_t amt);
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,692 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/parser.c
|
||||
*
|
||||
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
//#define DEBUG_STATE_MACHINE
|
||||
|
||||
#ifdef DEBUG_STATE_MACHINE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
enum csxml_State {
|
||||
StateNone, // at element or stream level
|
||||
StateRestartMarker, // after <![RESTART[
|
||||
StateRestartMarker1, // after first ]
|
||||
StateRestartMarker2, // after second ]
|
||||
StateOpen, // after <
|
||||
StateOpenBang, // after <!
|
||||
StateOpenCdataMarker, // after <![
|
||||
StateCDATA,
|
||||
StateCDATA1, // first ]
|
||||
StateCDATA2, // second ]
|
||||
StateData, // like StateNone, but don't skip whitespace
|
||||
StateOpenComment, // after <!-
|
||||
StateComment,
|
||||
StateComment2,
|
||||
StateComment3,
|
||||
StatePI, // after <? (we are currently parsing the target)
|
||||
StatePIData, // after target
|
||||
StatePI2, // first ? encountered (in target)
|
||||
StatePI3, // first ? encountered (in data)
|
||||
StateClose, // "</" encountered
|
||||
StateClosing, // "</x"
|
||||
StateNeedClose2, // "</xyz "
|
||||
StateElemName, // <x encountered
|
||||
StateElemTag, // first space after "<name" encountered
|
||||
StateElemAttrName,
|
||||
StateElemAttrEq, // name=
|
||||
StateElemAttrVal, // name=' or name="
|
||||
StateElemAttrDone, // name='value'
|
||||
StateNeedClose, // <elem/
|
||||
StateEntity, // &
|
||||
StateCharEntity, // &#
|
||||
StateEntityName, // &x
|
||||
StateError
|
||||
};
|
||||
|
||||
static const char* xmlCdataMarker = "<![CDATA[";
|
||||
static const char* xmlRestartMarker = "<![RESTART[";
|
||||
|
||||
|
||||
|
||||
enum TokenClass {
|
||||
ClassInvalid,
|
||||
ClassWhitespace,
|
||||
ClassEntity,
|
||||
ClassOther,
|
||||
ClassOpenTag,
|
||||
ClassNameChar,
|
||||
ClassNameStartChar
|
||||
};
|
||||
|
||||
static const enum TokenClass token_classes[] = {
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 00--03
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 04--07
|
||||
ClassInvalid, ClassWhitespace, ClassWhitespace, ClassInvalid, // control 08, tab, newline, control 0B
|
||||
ClassInvalid, ClassWhitespace, ClassInvalid, ClassInvalid, // control 0C, carriage return, control 0E--0F
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 10--13
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 14--17
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 18--1B
|
||||
ClassInvalid, ClassInvalid, ClassInvalid, ClassInvalid, // control chars 1C--1F
|
||||
ClassWhitespace, ClassOther, ClassOther, ClassOther, // ' ', '!', '"', '#'
|
||||
ClassOther, ClassOther, ClassEntity, ClassOther, // '$', '%', '&', '\''
|
||||
ClassOther, ClassOther, ClassOther, ClassOther, // '(', ')', '*', '+'
|
||||
ClassOther, ClassNameChar, ClassNameChar, ClassOther, // ',', '-', '.', '/'
|
||||
ClassNameChar, ClassNameChar, ClassNameChar, ClassNameChar, // '0'--'3'
|
||||
ClassNameChar, ClassNameChar, ClassNameChar, ClassNameChar, // '4'--'7'
|
||||
ClassNameChar, ClassNameChar, ClassNameStartChar, ClassOther, // '8', '9', ':', ';'
|
||||
ClassOpenTag, ClassOther, ClassOther, ClassOther, // '<', '=', '>', '?'
|
||||
ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '@', 'A'--'C'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'D'--'G'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'H'--'K'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'L'--'O'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'P'--'S'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'T'--'W'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'X'--'Z', '['
|
||||
ClassOther, ClassOther, ClassOther, ClassNameStartChar, // '\\', ']', '^', '_'
|
||||
ClassOther, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // '`', 'a'--'c'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'd'--'g'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'h'--'k'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'l'--'o'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 'p'--'s'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, // 't'--'w'
|
||||
ClassNameStartChar, ClassNameStartChar, ClassNameStartChar, ClassOther, // 'x'--'z', '{'
|
||||
ClassOther, ClassOther, ClassOther, ClassInvalid // '|', '}', '~', control 7F
|
||||
};
|
||||
|
||||
|
||||
|
||||
int csxml_feedChar(struct csxml* ctx, char ch)
|
||||
{
|
||||
enum TokenClass c;
|
||||
int try;
|
||||
|
||||
#ifdef DEBUG_STATE_MACHINE
|
||||
printf("Character '%c' (%02X), state %d\n", ch, ch, ctx->state);
|
||||
#endif
|
||||
|
||||
#define ERROR(_reason) do { \
|
||||
ctx->state = StateError; \
|
||||
ctx->notWellFormed(ctx, _reason); \
|
||||
return -1; \
|
||||
}while(0)
|
||||
|
||||
#define APPEND_CH(_whichbuf, _ch) do { \
|
||||
ctx-> _whichbuf .data[ctx-> _whichbuf .len++] = _ch; \
|
||||
if((ctx-> _whichbuf .len == ctx-> _whichbuf .size) && \
|
||||
do_realloc(ctx, &ctx-> _whichbuf)) return -1; \
|
||||
ctx-> _whichbuf .data[ctx-> _whichbuf .len] = 0; \
|
||||
}while(0)
|
||||
|
||||
#define CLEAR_BUFFER(_whichbuf) do { \
|
||||
ctx-> _whichbuf .data[0] = 0; \
|
||||
ctx-> _whichbuf .len = 0; \
|
||||
}while(0)
|
||||
|
||||
#define TRY(_x) do { \
|
||||
try = (_x); \
|
||||
if(try) { \
|
||||
ctx->state = StateError; \
|
||||
return try; \
|
||||
} \
|
||||
}while(0)
|
||||
|
||||
if(ch == xmlRestartMarker[ctx->restartCount]) {
|
||||
if(++ctx->restartCount == 11) {
|
||||
csxml_reset(ctx);
|
||||
ctx->state = StateRestartMarker;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
ctx->restartCount = 0;
|
||||
}
|
||||
|
||||
if(ch & ~0x7F) c = ClassInvalid;
|
||||
else c = token_classes[(int)ch];
|
||||
|
||||
if(ch == '\r') {
|
||||
ctx->skipNextNewline = 1;
|
||||
ch = '\n';
|
||||
++ctx->line;
|
||||
ctx->col = 0;
|
||||
} else if(ch == '\n') {
|
||||
if(ctx->skipNextNewline) return 0;
|
||||
++ctx->line;
|
||||
ctx->col = 0;
|
||||
} else {
|
||||
ctx->skipNextNewline = 0;
|
||||
}
|
||||
|
||||
if(c == ClassInvalid) ERROR("Restricted character encountered.");
|
||||
|
||||
// deal with char appropriately, according to state
|
||||
switch(ctx->state) {
|
||||
case StateError:
|
||||
return 0;
|
||||
|
||||
case StateNone:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case ClassOpenTag:
|
||||
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
|
||||
ctx->state = StateOpen;
|
||||
CLEAR_BUFFER(buffer);
|
||||
break;
|
||||
|
||||
case ClassEntity:
|
||||
if(ctx->expandEntities) {
|
||||
if(!ctx->elementDepth) ERROR("Entities cannot appear at stream level.");
|
||||
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->parsingAttr = 0;
|
||||
ctx->state = StateEntity;
|
||||
break;
|
||||
}
|
||||
|
||||
// fall through
|
||||
default:
|
||||
if(!ctx->elementDepth) ERROR("Content cannot appear at stream level.");
|
||||
if(ctx->buffer.len) TRY(ctx->whiteSpace(ctx, ctx->buffer.data));
|
||||
ctx->state = StateData;
|
||||
CLEAR_BUFFER(buffer);
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateData:
|
||||
switch(c) {
|
||||
case ClassOpenTag:
|
||||
TRY(ctx->content(ctx, ctx->buffer.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateOpen;
|
||||
break;
|
||||
|
||||
case ClassEntity:
|
||||
ctx->parsingAttr = 0;
|
||||
ctx->state = StateEntity;
|
||||
break;
|
||||
|
||||
default:
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateCDATA:
|
||||
if(ch == ']') ctx->state = StateCDATA1;
|
||||
else APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case StateCDATA1:
|
||||
if(ch == ']') ctx->state = StateCDATA2;
|
||||
else {
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateCDATA;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateCDATA2:
|
||||
if(ch == '>') {
|
||||
TRY(ctx->cdata(ctx, ctx->buffer.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateNone;
|
||||
} else if(ch == ']') {
|
||||
APPEND_CH(buffer, ']');
|
||||
} else {
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateCDATA;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateRestartMarker:
|
||||
if(ch == ']') ctx->state = StateRestartMarker1;
|
||||
else APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case StateRestartMarker1:
|
||||
if(ch == ']') ctx->state = StateRestartMarker2;
|
||||
else {
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateRestartMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateRestartMarker2:
|
||||
if(ch == '>') {
|
||||
TRY(ctx->streamRestart(ctx, ctx->buffer.data));
|
||||
csxml_reset(ctx);
|
||||
|
||||
} else if(ch == ']') {
|
||||
APPEND_CH(buffer, ']');
|
||||
|
||||
} else {
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ']');
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateRestartMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateOpen:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
ctx->state = StateElemName;
|
||||
ctx->elemAttrNames.len = 0;
|
||||
ctx->elemAttrVals.len = 0;
|
||||
CLEAR_BUFFER(elemName);
|
||||
APPEND_CH(elemName, ch);
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch == '!') ctx->state = StateOpenBang;
|
||||
else if(ch == '?') {
|
||||
ctx->state = StatePI;
|
||||
CLEAR_BUFFER(buffer2);
|
||||
} else if(ch == '/') {
|
||||
if(!ctx->elementDepth) ERROR("Encountered a close tag at stream level.");
|
||||
ctx->state = StateClose;
|
||||
} else ERROR("Invalid start character for element.");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StatePI:
|
||||
if(ch == '?') ctx->state = StatePI2;
|
||||
else if(c == ClassWhitespace) {
|
||||
ctx->state = StatePIData;
|
||||
CLEAR_BUFFER(buffer);
|
||||
} else APPEND_CH(buffer2, ch);
|
||||
break;
|
||||
|
||||
case StatePI2:
|
||||
if(ch != '>') ERROR("Invalid target for PI");
|
||||
else {
|
||||
TRY(ctx->PI(ctx, ctx->buffer2.data, 0));
|
||||
ctx->state = StateNone;
|
||||
}
|
||||
break;
|
||||
|
||||
case StatePIData:
|
||||
if(ch == '?') ctx->state = StatePI3;
|
||||
else APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case StatePI3:
|
||||
if(ch == '>') {
|
||||
TRY(ctx->PI(ctx, ctx->buffer2.data, ctx->buffer.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateNone;
|
||||
} else if(ch == '?') {
|
||||
APPEND_CH(buffer, '?');
|
||||
} else {
|
||||
APPEND_CH(buffer, '?');
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StatePIData;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateOpenBang:
|
||||
if(ch == '[') {
|
||||
// restart markers handled by lower layer
|
||||
ctx->state = StateOpenCdataMarker;
|
||||
ctx->xmlCount = 3;
|
||||
if(!ctx->elementDepth) ERROR("CDATA sections not valid at stream level.");
|
||||
} else if(ch == '-') ctx->state = StateOpenComment;
|
||||
else ERROR("Invalid special tag.");
|
||||
break;
|
||||
|
||||
case StateOpenCdataMarker:
|
||||
if(ch != xmlCdataMarker[ctx->xmlCount]) ERROR("Invalid marked section.");
|
||||
if(!xmlCdataMarker[++ctx->xmlCount]) ctx->state = StateCDATA;
|
||||
break;
|
||||
|
||||
case StateOpenComment:
|
||||
if(ch != '-') ERROR("Invalid special tag.");
|
||||
ctx->state = StateComment;
|
||||
CLEAR_BUFFER(buffer);
|
||||
break;
|
||||
|
||||
case StateComment:
|
||||
if(ch == '-') ctx->state = StateComment2;
|
||||
else APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case StateComment2:
|
||||
if(ch == '-') ctx->state = StateComment3;
|
||||
else {
|
||||
APPEND_CH(buffer, '-');
|
||||
APPEND_CH(buffer, ch);
|
||||
}
|
||||
break;
|
||||
|
||||
case StateComment3:
|
||||
if(ch != '>') ERROR("`--' not valid in comments");
|
||||
TRY(ctx->comment(ctx, ctx->buffer.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateNone;
|
||||
break;
|
||||
|
||||
case StateElemName:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
ctx->state = StateElemTag;
|
||||
CLEAR_BUFFER(buffer);
|
||||
break;
|
||||
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
APPEND_CH(elemName, ch);
|
||||
break;
|
||||
|
||||
default:
|
||||
switch(ch) {
|
||||
case L'>':
|
||||
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
|
||||
TRY(ctx->element(ctx, ctx->elemName.data, 0));
|
||||
ctx->state = StateNone;
|
||||
++ctx->elementDepth;
|
||||
CLEAR_BUFFER(buffer);
|
||||
break;
|
||||
|
||||
case L'/':
|
||||
ctx->state = StateNeedClose;
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR("Invalid character in tag name.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemTag:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
break;
|
||||
|
||||
case ClassNameStartChar:
|
||||
ctx->state = StateElemAttrName;
|
||||
CLEAR_BUFFER(buffer);
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
default:
|
||||
switch(ch) {
|
||||
case '>':
|
||||
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
|
||||
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrNames.len));
|
||||
ctx->state = StateNone;
|
||||
++ctx->elementDepth;
|
||||
break;
|
||||
|
||||
case '/':
|
||||
ctx->state = StateNeedClose;
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR("Invalid character in tag.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemAttrName:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch != '=') ERROR("Invalid character in attribute name.");
|
||||
TRY(list_push(ctx, &ctx->elemAttrNames, &ctx->buffer));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateElemAttrEq;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateElemAttrEq:
|
||||
if(ch == '\'') ctx->singleQuote = 1;
|
||||
else if(ch == '"') ctx->singleQuote = 0;
|
||||
else ERROR("Invalid character in attribute.");
|
||||
ctx->state = StateElemAttrVal;
|
||||
break;
|
||||
|
||||
case StateElemAttrVal:
|
||||
if((ctx->singleQuote && ch == '\'') || (!ctx->singleQuote && ch == '"')) {
|
||||
TRY(list_push(ctx, &ctx->elemAttrVals, &ctx->buffer));
|
||||
ctx->state = StateElemAttrDone;
|
||||
} else if(ctx->expandEntities && ch == L'&') {
|
||||
ctx->parsingAttr = 1;
|
||||
ctx->state = StateEntity;
|
||||
} else APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case StateElemAttrDone:
|
||||
switch(c) {
|
||||
case ClassWhitespace:
|
||||
ctx->state = StateElemTag;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch == '/') {
|
||||
ctx->state = StateNeedClose;
|
||||
} else if(ch == '>') {
|
||||
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len));
|
||||
TRY(list_push(ctx, &ctx->elemStack, &ctx->elemName));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateNone;
|
||||
++ctx->elementDepth;
|
||||
} else ERROR("Invalid character after attribute.");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StateNeedClose:
|
||||
if(ch != '>') ERROR("Stray `/' in open tag.");
|
||||
TRY(ctx->element(ctx, ctx->elemName.data, ctx->elemAttrVals.len));
|
||||
TRY(ctx->closeTag(ctx, ctx->elemName.data));
|
||||
CLEAR_BUFFER(buffer);
|
||||
ctx->state = StateNone;
|
||||
break;
|
||||
|
||||
case StateClose:
|
||||
if(c != ClassNameStartChar) ERROR("Invalid character in close tag name.");
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateClosing;
|
||||
break;
|
||||
|
||||
case StateClosing:
|
||||
switch(c) {
|
||||
case ClassNameStartChar:
|
||||
case ClassNameChar:
|
||||
APPEND_CH(buffer, ch);
|
||||
break;
|
||||
|
||||
case ClassWhitespace:
|
||||
ctx->state = StateNeedClose2;
|
||||
break;
|
||||
|
||||
default:
|
||||
if(ch != '>') ERROR("Invalid character in close tag name.");
|
||||
TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack)));
|
||||
if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag.");
|
||||
TRY(ctx->closeTag(ctx, ctx->elemName.data));
|
||||
ctx->state = StateNone;
|
||||
--ctx->elementDepth;
|
||||
CLEAR_BUFFER(buffer);
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case StateNeedClose2:
|
||||
if(c == ClassWhitespace) break;
|
||||
if(ch != '>') ERROR("Invalid data in close tag.");
|
||||
TRY(buffer_copy(ctx, &ctx->elemName, list_pop(&ctx->elemStack)));
|
||||
if(strcmp(ctx->elemName.data, ctx->buffer.data)) ERROR("Mismatched close tag.");
|
||||
TRY(ctx->closeTag(ctx, ctx->elemName.data));
|
||||
ctx->state = StateNone;
|
||||
--ctx->elementDepth;
|
||||
CLEAR_BUFFER(buffer);
|
||||
break;
|
||||
|
||||
case StateEntity:
|
||||
if(ch == '#') {
|
||||
ctx->state = StateCharEntity;
|
||||
ctx->entityChar = 0;
|
||||
} else if(c == ClassNameStartChar) {
|
||||
APPEND_CH(buffer, ch);
|
||||
ctx->state = StateEntityName;
|
||||
} else ERROR("Invalid entity name.");
|
||||
break;
|
||||
|
||||
case StateCharEntity:
|
||||
if(ch == ';') {
|
||||
APPEND_CH(buffer, ctx->entityChar);
|
||||
ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData;
|
||||
break;
|
||||
|
||||
} else if(ch >= '0' && ch <= '9') {
|
||||
ctx->entityChar *= 10;
|
||||
ctx->entityChar += (ch - '0');
|
||||
if(!ctx->entityChar || ctx->entityChar > 126) ERROR("Character code out of range in character entity.");
|
||||
} else ERROR("Invalid character in character entity.");
|
||||
break;
|
||||
|
||||
case StateEntityName:
|
||||
if(ch == ';') {
|
||||
const char* e = csxml_entityRef(ctx, ctx->buffer2.data);
|
||||
TRY(!e || buffer_strcat(ctx, &ctx->buffer, e));
|
||||
ctx->state = ctx->parsingAttr ? StateElemAttrVal : StateData;
|
||||
break;
|
||||
}
|
||||
if(c != ClassNameChar && c != ClassNameStartChar) ERROR("Invalid entity name.");
|
||||
APPEND_CH(buffer2, ch);
|
||||
break;
|
||||
}
|
||||
++ctx->col;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char* csxml_entityRef(struct csxml* ctx, const char* ent)
|
||||
{
|
||||
const char* q = 0;
|
||||
|
||||
if(!strcmp(ent, "quot")) return "\"";
|
||||
if(!strcmp(ent, "amp")) return "&";
|
||||
if(!strcmp(ent, "apos")) return "'";
|
||||
if(!strcmp(ent, "lt")) return "<";
|
||||
if(!strcmp(ent, "gt")) return ">";
|
||||
|
||||
q = ctx->entityRef(ctx, ent);
|
||||
if(!q) {
|
||||
ctx->unknownEntity(ctx, ent);
|
||||
ctx->state = StateError;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void csxml_reset(struct csxml* ctx)
|
||||
{
|
||||
CLEAR_BUFFER(buffer);
|
||||
CLEAR_BUFFER(buffer2);
|
||||
CLEAR_BUFFER(elemName);
|
||||
ctx->state = StateNone;
|
||||
ctx->xmlCount = 0;
|
||||
ctx->elementDepth = 0;
|
||||
ctx->restartCount = 0;
|
||||
ctx->skipNextNewline = 0;
|
||||
ctx->parsingAttr = 0;
|
||||
ctx->line = 0;
|
||||
ctx->col = 0;
|
||||
ctx->elemStack.len = 0;
|
||||
ctx->elemAttrNames.len = 0;
|
||||
ctx->elemAttrVals.len = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#undef ERROR
|
||||
#undef APPEND_CH
|
||||
#undef CLEAR_BUFFER
|
||||
|
||||
|
||||
|
||||
void csxml_freeParser(struct csxml* ctx)
|
||||
{
|
||||
if(!ctx) return;
|
||||
buffer_free(&ctx->buffer);
|
||||
buffer_free(&ctx->buffer2);
|
||||
buffer_free(&ctx->elemName);
|
||||
list_free(&ctx->elemStack);
|
||||
list_free(&ctx->elemAttrNames);
|
||||
list_free(&ctx->elemAttrVals);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct csxml* csxml_newParser()
|
||||
{
|
||||
struct csxml* ctx = 0;
|
||||
|
||||
ctx = malloc(sizeof(struct csxml));
|
||||
if(!ctx) return 0;
|
||||
memset(ctx, 0, sizeof(struct csxml));
|
||||
|
||||
if(buffer_init(ctx, &ctx->buffer) || buffer_init(ctx, &ctx->buffer2) || buffer_init(ctx, &ctx->elemName)) {
|
||||
csxml_freeParser(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctx->expandEntities = 1;
|
||||
|
||||
ctx->notWellFormed = default_notWellFormed;
|
||||
ctx->outOfMemory = default_outOfMemory;
|
||||
ctx->unknownEntity = default_unknownEntity;
|
||||
ctx->whiteSpace = default_discard;
|
||||
ctx->content = default_discard;
|
||||
ctx->cdata = default_cdata;
|
||||
ctx->streamRestart = default_discard;
|
||||
ctx->PI = default_discardPI;
|
||||
ctx->comment = default_discard;
|
||||
ctx->element = default_discardElem;
|
||||
ctx->closeTag = default_discard;
|
||||
ctx->entityRef = default_discardEnt;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void csxml_feedData(struct csxml* ctx, const char* data, size_t amt)
|
||||
{
|
||||
while(amt--) csxml_feedChar(ctx, *data++);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
|
@ -0,0 +1,21 @@
|
|||
# libCStreamedXML/src/lib/clib/pkgconf.in
|
||||
#
|
||||
# Metadata file for pkg-config
|
||||
# ( http://www.freedesktop.org/software/pkgconfig/ )
|
||||
#
|
||||
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
# Released under the GNU GPLv2. See file COPYING or
|
||||
# http://www.gnu.org/copyleft/gpl.html for details.
|
||||
#
|
||||
|
||||
# Name, description
|
||||
Name: @TODO@
|
||||
Description: @TODO@
|
||||
Version: @VERSION@
|
||||
|
||||
# Requirements
|
||||
Requires:
|
||||
|
||||
# Compilation information
|
||||
Libs: -L@LIBDIR@ -lCStreamedXML
|
||||
Cflags: -I@INCLUDEDIR@
|
|
@ -0,0 +1,17 @@
|
|||
# libCStreamedXML/src/libCStreamedXML/soversion
|
||||
#
|
||||
# (c)2006, Laurence Withers, <l@lwithers.me.uk>.
|
||||
# Released under the GNU GPLv2. See file COPYING or
|
||||
# http://www.gnu.org/copyleft/gpl.html for details.
|
||||
#
|
||||
|
||||
|
||||
|
||||
# SOMAJOR and SOMINOR are included in the library's soname. They need to
|
||||
# be bumped on a binary-incompatible release. They are both single
|
||||
# integers.
|
||||
SOMAJOR=0
|
||||
SOMINOR=0
|
||||
|
||||
# SOMICRO is bumped every time there is a binary-compatible release.
|
||||
SOMICRO=0
|
|
@ -0,0 +1,320 @@
|
|||
/* libCStreamedXML/src/libCStreamedXML/types.h
|
||||
*
|
||||
* (c)2006, Laurence Withers. Released under the GNU GPL. See file
|
||||
* COPYING for more information / terms of license.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*! \brief String buffer.
|
||||
|
||||
This type implements a simple string buffer. The string should be kept null-terminated. The \a data
|
||||
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
|
||||
string length (excluding terminating null) and \a size is the size of the allocated buffer. It is
|
||||
never valid for \a data or \a size to be 0.
|
||||
|
||||
*/
|
||||
struct csxml_buf {
|
||||
/// Pointer to string (null terminated).
|
||||
char* data;
|
||||
|
||||
/// Length of string (excluding null).
|
||||
size_t len;
|
||||
|
||||
/// Size of allocated buffer.
|
||||
size_t size;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*! \brief String list.
|
||||
|
||||
This type implements a dynamic array of strings (using the struct csxml_buf type). The \a data
|
||||
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
|
||||
number of valid elements in the array and \a size is the size of the allocated buffer. It is valid
|
||||
for \a data and \a size to be zero.
|
||||
|
||||
*/
|
||||
struct csxml_list {
|
||||
/// Array of buffers (all allocated elements are valid and initialised).
|
||||
struct csxml_buf* data;
|
||||
|
||||
/// Number of elements in use.
|
||||
size_t len;
|
||||
|
||||
/// Number of allocated elements.
|
||||
size_t size;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*! \brief Streamed XML parser context.
|
||||
|
||||
This structure contains all the details of a Streamed XML parsing operation. Most fields are
|
||||
internal to the parser and should not be touched; those are briefly documented here. You may want to
|
||||
change the \a expandEntities option, examine the \a line and \a col variables, and change the
|
||||
callback functions.
|
||||
|
||||
Those members marked Internal: are subject to change (either in semantics, type or existence).
|
||||
|
||||
*/
|
||||
struct csxml {
|
||||
/// Option: change "&lt;" to "<", etc. if non-zero (the default). Change to 0 to leave
|
||||
/// entities inline.
|
||||
int expandEntities;
|
||||
|
||||
/// Current line (from 0, starts from last restart marker).
|
||||
int line;
|
||||
/// Current column (from 0, starts from last restart marker).
|
||||
int col;
|
||||
|
||||
/// List of attribute names for current element.
|
||||
struct csxml_list elemAttrNames;
|
||||
/// List of attribute values for current element.
|
||||
struct csxml_list elemAttrVals;
|
||||
|
||||
|
||||
|
||||
/*! \brief Error callback: Streamed XML is not well formed.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param reason Human-readable description of error.
|
||||
|
||||
This function is called whenever badly-formed Streamed XML is encountered (e.g. if content is
|
||||
encountered at stream-level).
|
||||
|
||||
The default implementation prints a formatted message to \a stderr.
|
||||
|
||||
*/
|
||||
void (*notWellFormed)(const struct csxml* ctx, const char* reason);
|
||||
|
||||
|
||||
|
||||
/*! \brief Error callback: out of memory.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param amount Number of bytes we tried to allocate.
|
||||
|
||||
This function is called whenever a dynamic string resizing operation (or similar) fails. It is
|
||||
called with the number of bytes the library attempted to allocate.
|
||||
|
||||
The default implementation prints a formatted message to \a stderr.
|
||||
|
||||
\todo When parsing extremely long content sections, we could just call the content() callback
|
||||
when memory is low rather than failing altogether.
|
||||
|
||||
*/
|
||||
void (*outOfMemory)(const struct csxml* ctx, size_t amount);
|
||||
|
||||
|
||||
|
||||
/*! \brief Error callback: reference to unknown entity.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param ent Name of the referenced entity.
|
||||
|
||||
If an entity is referenced by name (e.g. "&myEntity;" is encountered) but the lookup in the
|
||||
default entity list and through the entityRef() callback fails to resolve the content, this
|
||||
function is called.
|
||||
|
||||
The default implementation prints a formatted message to \a stderr.
|
||||
|
||||
*/
|
||||
void (*unknownEntity)(const struct csxml* ctx, const char* ent);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: whitespace.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param ws Pointer to string of whitespace.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This function is called whenever a block of whitespace is encountered at stream level or
|
||||
immediately after some structural element (i.e. on leading whitespace within elements). It is
|
||||
generally safe to discard it, unless you need to reproduce the file verbatim. Once content is
|
||||
encountered within an element, any further whitespace will be reported through the content()
|
||||
callback, up to the next structural element.
|
||||
|
||||
The default implementation discards the data.
|
||||
|
||||
*/
|
||||
int (*whiteSpace)(const struct csxml* ctx, const char* ws);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: content.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param content Pointer to string of cdata.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This function is called whenever textual content (cdata) is encountered inside an element. It
|
||||
may be called multiple times simultaneously, in which case the data from each call should be
|
||||
concatenated. If \a expandEntities is non-zero, then this function will also be called with the
|
||||
expanded text from any entities encountered, rather than the entities themselves.
|
||||
|
||||
The default implementation discards the data.
|
||||
|
||||
*/
|
||||
int (*content)(const struct csxml* ctx, const char* content);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: cdata block.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param cdata Pointer to string of cdata.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This function is called for all data inside a CDATA marked section. It is useful to
|
||||
differentiate between cdata encoded in this manner and normal cdata if you are trying to
|
||||
reproduce the orginal file verbatim.
|
||||
|
||||
The default implementation calls content() with the same data.
|
||||
|
||||
*/
|
||||
int (*cdata)(const struct csxml* ctx, const char* cdata);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: stream restart marker.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param marker Pointer to marker string.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
Whenever a stream restart marker is encountered, this function is called with the data found
|
||||
inside the marker.
|
||||
|
||||
The default implementation ignores the data (but note that the parser state is always reset,
|
||||
regardless of what the callback does).
|
||||
|
||||
*/
|
||||
int (*streamRestart)(const struct csxml* ctx, const char* marker);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: processing instruction.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param target PI target.
|
||||
\param data PI data (may be 0).
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This callback is used to report processing instructions (PIs). If the PI has no data, then the
|
||||
\a data variable can be set to 0.
|
||||
|
||||
The default implementation ignores the data.
|
||||
|
||||
*/
|
||||
int (*PI)(const struct csxml* ctx, const char* target, const char* data);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: comment.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param comment Pointer to comment string data.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This callback is used to report comments. It is only useful if you wish to reproduce the source
|
||||
file verbatim (for instance, you want to alter some data in a file but preserve user comments).
|
||||
|
||||
The default implementation ignores the data.
|
||||
|
||||
*/
|
||||
int (*comment)(const struct csxml* ctx, const char* comment);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: element.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param elemName Pointer to element name string.
|
||||
\param numAttrs Number of attributes.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This callback is used whenever an element open tag is encountered. To parse attributes, use the
|
||||
numAttrs variable to determine how many there are, and then access them with:
|
||||
|
||||
<pre> // get attribute name
|
||||
const char* elemName = ctx->elemAttrNames.data[i].data;
|
||||
|
||||
// get attribute value
|
||||
const char* elemValue = ctx->elemAttrValues.data[i].data;</pre>
|
||||
|
||||
In the case of an empty element, the closeTag() callback will be called immediately afterwards.
|
||||
|
||||
The default implemenation ignores the data.
|
||||
|
||||
*/
|
||||
int (*element)(const struct csxml* ctx, const char* elemName, int numAttrs);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: close tag.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param elemName Element name.
|
||||
\returns Zero on success, non-zero on error.
|
||||
|
||||
This function is called whenever an element close tag is encountered. The element name will
|
||||
always match the open tag element name (if not, the Streamed XML is not well formed, and an
|
||||
error will be signalled before this callback is triggered).
|
||||
|
||||
The default implementation ignores the data.
|
||||
|
||||
*/
|
||||
int (*closeTag)(const struct csxml* ctx, const char* elemName);
|
||||
|
||||
|
||||
|
||||
/*! \brief Callback: entity reference.
|
||||
|
||||
\param ctx Parsing context.
|
||||
\param ent Entity name.
|
||||
\retval 0 if the entity name does not match any known entity.
|
||||
\returns Pointer to null-terminated string data for expanding entity.
|
||||
|
||||
This function is called whenever \a expandEntities is true and an entity is encountered in cdata
|
||||
or an attribute value. It is only called if the entity does not match one of the five built-in
|
||||
defaults. You only need to provide an implementation if you know about some additional entities.
|
||||
|
||||
The default implementation returns 0 ("no such entity").
|
||||
|
||||
*/
|
||||
const char* (*entityRef)(const struct csxml* ctx, const char* ent);
|
||||
|
||||
|
||||
|
||||
/// Internal: string buffer.
|
||||
struct csxml_buf buffer;
|
||||
/// Internal: string buffer.
|
||||
struct csxml_buf buffer2;
|
||||
/// Internal: string buffer.
|
||||
struct csxml_buf elemName;
|
||||
/// Internal: stack of element names, used to match open/close tags.
|
||||
struct csxml_list elemStack;
|
||||
/// Internal: state machine state.
|
||||
int state;
|
||||
/// Internal: for matching strings.
|
||||
int xmlCount;
|
||||
/// Internal: depth in element tree (0 = stream level).
|
||||
int elementDepth;
|
||||
/// Internal: for matching against restart marker string.
|
||||
int restartCount;
|
||||
/// Internal: for stripping windows line endings.
|
||||
int skipNextNewline;
|
||||
/// Internal: flag to check if we are currently parsing an attribute while expanding an entity.
|
||||
int parsingAttr;
|
||||
/// Internal: flag to record if current attribute was quoted with ' or " char.
|
||||
int singleQuote;
|
||||
/// Internal: used to expand character entities.
|
||||
int entityChar;
|
||||
};
|
||||
|
||||
/* options for text editors
|
||||
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
|
||||
vim: expandtab:ts=4:sw=4
|
||||
*/
|
Loading…
Reference in New Issue