Compare commits

..

19 Commits

Author SHA1 Message Date
Laurence Withers 0a86b2d57f Release 1.2.8 (split out libCStreamedXML) 2006-11-23 00:17:46 +00:00
Laurence Withers f7fe4bb787 Another bugfix for entity lookups 2006-10-26 11:45:11 +01:00
Laurence Withers 2f2e07b2e2 Bump version 2006-10-26 11:32:34 +01:00
Laurence Withers 593bb8d80c Fix error where first character of entity name in entity reference was
sent to the wrong buffer, and ended up in content rather than entity
name.
2006-10-26 11:32:08 +01:00
Laurence Withers bf2ca8d925 Bump version 2006-10-04 10:48:15 +01:00
Laurence Withers d46aaf2b2e Doc fix. 2006-10-04 10:47:52 +01:00
Laurence Withers 9a33766c62 Further to the previous fix, don't clear the content buffer afetr an open/close tag
callback -- clear it before. This stops included content from being lost.
2006-10-04 10:47:19 +01:00
Laurence Withers a24d2caad2 Bump version. 2006-09-29 10:56:50 +01:00
Laurence Withers 25af0958a6 Merge branch 'master' of tourmaline:git/libStreamedXML 2006-09-29 10:51:41 +01:00
Laurence Withers 71fda14981 Update internal state before callback; that way, if during the callback we inject some more data
into the parser (e.g. it's an <include> tag or something), the parser is in the correct state.
2006-09-29 10:51:10 +01:00
Laurence Withers f1fa8b5625 Fix a bug where two separate '-' characters in a comment were interpreted as a single '--' sequence. 2006-09-29 10:41:01 +01:00
Laurence Withers 8e3798dbbc Add cast for printf() 2006-09-08 14:04:46 +01:00
Laurence Withers c2d02165d7 Edit pkgconf file, bump version. 2006-08-25 10:43:56 +01:00
Laurence Withers 0cc4d09d65 Upgrade to new build system. 2006-08-25 10:42:32 +01:00
Laurence Withers a2ca4f8122 Add user pointer, bump version. 2006-08-08 14:51:50 +01:00
Laurence Withers 4533df2947 Add in libCStreamedXML and a test program for it. 2006-08-08 12:19:08 +01:00
Laurence Withers dc6a4cf430 Fix bug: in a PI such as <?test ?pi?>, we got into an incorrect state
on the second ?. This is now properly handled.
2006-08-07 12:51:11 +01:00
Laurence Withers 4eec490403 Bump versions. 2006-08-07 11:07:35 +01:00
Laurence Withers 44147acacf Add line/column reporting to exceptions, update it to work with
std::exception as a base, slightly change semantics of entityRef()
function so that it can report line/col.
2006-08-07 11:06:31 +01:00
18 changed files with 149 additions and 59 deletions

8
README
View File

@ -1,4 +1,4 @@
libStreamedXML
libStreamedXML http://www.lwithers.me.uk/projects/libStreamedXML/
========================================================================
(c)2006, Laurence Withers, <l@lwithers.me.uk>.
Released under the GNU GPLv2. See file COPYING or
@ -11,4 +11,8 @@ To build: ./make.sh
To install: ./make.sh install
(you might want to set PREFIX, by default it's /usr/local)
@TODO@
Dependencies
------------
libutf8++
http://www.lwithers.me.uk/projects/libutf8++/

View File

@ -64,6 +64,7 @@ OUTPUT_DIRS="obj html"
# none no special processing happens before each file
# C #line directives are inserted before each file
# and VERSION, VERMAJOR etc. are #defined
# Ch Like C, but for header files (no VERSION #defines)
#
make_monolithic() {
if [ $# -ne 2 ]
@ -82,6 +83,9 @@ make_monolithic() {
then
HASHLINE=1
VERDEFINE=1
elif [ "$2" == "Ch" ]
then
HASHLINE=1
elif [ "$2" == "none" ]
then
HASHLINE=0 # dummy command
@ -261,7 +265,7 @@ else
targets="$@"
fi
for func in "${targets}"
for func in ${targets}
do
case ${func} in
clean)

1
scripts/.gitignore vendored
View File

@ -1,5 +1,6 @@
build.c++.app
build.c++.lib
build.c++.qtapp
build.c++.tests
build.c.app
build.c.lib

View File

@ -5,3 +5,7 @@
*/
#endif
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -177,8 +177,9 @@ public:
/*! \brief Entity reference.
\param name The name of the entity being referred to.
\returns The expanded text.
\throws UnknownEntity if the entity is unknown.
\param[out] value The expanded text.
\retval true if the entity is valid.
\retval false if the entity is not valid.
This function is called whenever an entity reference is encountered,
and \a expandEntities is \a true in your parser. The five standard
@ -186,12 +187,18 @@ public:
but anything else will be passed to this function.
*/
virtual std::wstring entityRef(const std::wstring& name)
virtual bool entityRef(const std::wstring& name, std::wstring& value)
{
throw UnknownEntity(name);
(void)name;
(void)value;
return false;
}
};
};
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -62,3 +62,7 @@ void Decoder::restart()
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -70,3 +70,7 @@ private:
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -8,14 +8,14 @@ namespace lsx {
Exception::Exception(const std::wstring& reason)
: reason(reason)
Exception::Exception(const std::wstring& reason, int line, int col)
: reason(reason), line(line), col(col)
{
}
const char* Exception::what()
const char* Exception::what() const throw()
{
if(utf8Reason.empty()) utf8Reason = utf8::encode(reason, true);
return utf8Reason.c_str();
@ -23,39 +23,46 @@ const char* Exception::what()
NotWellFormed::NotWellFormed(const std::wstring& error)
: Exception(format(error)), error(error)
NotWellFormed::NotWellFormed(const std::wstring& error, int line, int col)
: Exception(format(error, line, col), line, col), error(error)
{
}
std::wstring NotWellFormed::format(const std::wstring& error)
std::wstring NotWellFormed::format(const std::wstring& error, int line, int col)
{
std::wostringstream ost;
ost << L"Streamed XML is not well formed:\n"
<< error;
ost << L"Streamed XML is not well formed:"
<< L"\n line : " << line + 1
<< L"\n column: " << col + 1
<< L"\n reason: " << error;
return ost.str();
}
UnknownEntity::UnknownEntity(const std::wstring& name)
: Exception(format(name)), name(name)
UnknownEntity::UnknownEntity(const std::wstring& name, int line, int col)
: Exception(format(name, line, col), line, col), name(name)
{
}
std::wstring UnknownEntity::format(const std::wstring& name)
std::wstring UnknownEntity::format(const std::wstring& name, int line, int col)
{
std::wostringstream ost;
ost << L"Encountered an unknown entity named '"
<< name
<< L"' in the Streamed XML.";
ost << L"Encountered an unknown entity in the Streamed XML:"
<< L"\n line : " << line + 1
<< L"\n column: " << col + 1
<< L"\n name : " << name;
return ost.str();
}
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -14,30 +14,38 @@ This class is the base class of all exceptions. It simply provides a mechanism f
message; more specific details must be stored in base classes.
*/
class Exception {
class Exception : public std::exception {
public:
/*! \brief Constructor.
\param reason Reason for the exception.
\param line Line number of input (starting from 0).
\param col Column of input (starting from 0).
The constructor simply stores the reason for the exception, which may be later accessed for
reporting to the user.
*/
Exception(const std::wstring& reason);
Exception(const std::wstring& reason, int line, int col);
/// Destructor.
virtual ~Exception() throw()
{ }
/// Find what caused the error.
virtual const char* what();
virtual const char* what() const throw();
/// Reason for the exception.
const std::wstring reason;
/// Line on which the exception occurred (starting from 0).
const int line;
/// Column on which the exception occurred (starting from 0).
const int col;
private:
std::string utf8Reason;
mutable std::string utf8Reason;
};
@ -45,18 +53,20 @@ private:
/// Thrown when XML is not well formed.
class NotWellFormed : public Exception {
private:
static std::wstring format(const std::wstring& error);
static std::wstring format(const std::wstring& error, int line, int col);
public:
/*! \brief Constructor.
\param error Reason for the error.
\param line Line on which the exception occurred.
\param col Column on which the exception occurred.
The constructor will store the reason for the error. It will also prepare a suitable message
for the Exception base class.
*/
NotWellFormed(const std::wstring& error);
NotWellFormed(const std::wstring& error, int line, int col);
/// Destructor.
virtual ~NotWellFormed() throw()
@ -71,18 +81,20 @@ public:
/// Thrown when an unknown entity is referred to.
class UnknownEntity : public Exception {
private:
static std::wstring format(const std::wstring& name);
static std::wstring format(const std::wstring& name, int line, int col);
public:
/*! \brief Constructor.
\param name Name of the unknown entity.
\param line Line on which the exception occurred.
\param col Column on which the exception occurred.
The constructor will store the name of the unknown entity. It will also prepare a suitable
message for the Exception base class.
*/
UnknownEntity(const std::wstring& name);
UnknownEntity(const std::wstring& name, int line, int col);
/// Destructor.
virtual ~UnknownEntity() throw()
@ -95,3 +107,7 @@ public:
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -22,3 +22,7 @@ class UnknownEntity;
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -37,6 +37,8 @@ void Parser::reset()
skipNextNewline = false;
state = StateNone;
restartCount = 0;
line = 0;
col = 0;
}
@ -59,7 +61,7 @@ void Parser::feedChar(wchar_t ch)
#define ERROR(_reason) do { \
state = StateError; \
throw NotWellFormed(_reason); \
throw NotWellFormed(_reason, line, col); \
}while(0)
if(ch == xmlRestartMarker[restartCount]) {
if(++restartCount == 11) {
@ -84,11 +86,15 @@ void Parser::feedChar(wchar_t ch)
skipNextNewline = true;
ch = L'\n';
c = ClassWhitespace;
++line;
col = 0;
goto doBuffer;
case 0x2028:
ch = L'\n';
c = ClassWhitespace;
++line;
col = 0;
break;
case L' ':
@ -101,6 +107,8 @@ void Parser::feedChar(wchar_t ch)
if(skipNextNewline) return;
ch = L'\n';
c = ClassWhitespace;
++line;
col = 0;
break;
case L':':
@ -155,26 +163,26 @@ doBuffer:
break;
case ClassOpenTag:
if(!buffer.empty()) callback->whiteSpace(buffer);
state = StateOpen;
if(!buffer.empty()) callback->whiteSpace(buffer);
buffer.clear();
break;
case ClassEntity:
if(expandEntities) {
if(!elementDepth) ERROR(L"Entities cannot appear at stream level.");
state = StateEntity;
if(!buffer.empty()) callback->whiteSpace(buffer);
buffer.clear();
parsingAttr = false;
state = StateEntity;
break;
}
// fall through
default:
if(!elementDepth) ERROR(L"Content cannot appear at stream level.");
if(!buffer.empty()) callback->whiteSpace(buffer);
state = StateData;
if(!buffer.empty()) callback->whiteSpace(buffer);
buffer = ch;
break;
}
@ -183,16 +191,16 @@ doBuffer:
case StateData:
switch(c) {
case ClassOpenTag:
state = StateOpen;
callback->content(buffer);
buffer.clear();
state = StateOpen;
break;
case ClassEntity:
state = StateEntity;
callback->content(buffer);
buffer.clear();
parsingAttr = false;
state = StateEntity;
break;
default:
@ -217,9 +225,9 @@ doBuffer:
case StateCDATA2:
if(ch == L'>') {
state = StateNone;
callback->cdata(buffer);
buffer.clear();
state = StateNone;
} else if(ch == L']') {
buffer += ch;
} else {
@ -290,9 +298,9 @@ doBuffer:
case StatePI2:
if(ch != L'>') ERROR(L"Invalid target for PI");
else {
state = StateNone;
callback->PI(buffer2, L"");
buffer.clear();
state = StateNone;
}
break;
@ -303,12 +311,15 @@ doBuffer:
case StatePI3:
if(ch == L'>') {
state = StateNone;
callback->PI(buffer2, buffer);
buffer.clear();
state = StateNone;
} else if(ch == '?') {
buffer += L'?';
} else {
buffer += L'?';
buffer += ch;
state = StatePIData;
}
break;
@ -343,14 +354,15 @@ doBuffer:
else {
buffer += L'-';
buffer += ch;
state = StateComment;
}
break;
case StateComment3:
if(ch != L'>') ERROR(L"`--' not valid in comments");
state = StateNone;
callback->comment(buffer);
buffer.clear();
state = StateNone;
break;
case StateElemName:
@ -370,10 +382,10 @@ doBuffer:
switch(ch) {
case L'>':
elemStack.push_back(buffer);
callback->element(buffer, elemAttrs);
state = StateNone;
++elementDepth;
buffer.clear();
callback->element(buffer, elemAttrs);
++elementDepth;
break;
case L'/':
@ -400,9 +412,9 @@ doBuffer:
switch(ch) {
case L'>':
elemStack.push_back(elemName);
callback->element(elemName, elemAttrs);
buffer.clear();
state = StateNone;
buffer.clear();
callback->element(elemName, elemAttrs);
++elementDepth;
break;
@ -462,10 +474,10 @@ doBuffer:
if(ch == L'/') {
state = StateNeedClose;
} else if(ch == L'>') {
state = StateNone;
buffer.clear();
callback->element(elemName, elemAttrs);
elemStack.push_back(elemName);
buffer.clear();
state = StateNone;
++elementDepth;
} else ERROR(L"Invalid character after attribute.");
break;
@ -474,10 +486,10 @@ doBuffer:
case StateNeedClose:
if(ch != L'>') ERROR(L"Stray `/' in open tag.");
state = StateNone;
buffer.clear();
callback->element(elemName, elemAttrs);
callback->closeTag(elemName);
buffer.clear();
state = StateNone;
break;
case StateClose:
@ -501,9 +513,9 @@ doBuffer:
if(ch != L'>') ERROR(L"Invalid character in close tag name.");
if(elemStack.back() != buffer) ERROR(L"Mismatched close tag.");
elemStack.pop_back();
callback->closeTag(buffer);
buffer.clear();
state = StateNone;
buffer.clear();
callback->closeTag(buffer);
--elementDepth;
}
break;
@ -513,9 +525,9 @@ doBuffer:
if(ch != L'>') ERROR(L"Invalid data in close tag.");
if(elemStack.back() != elemName) ERROR(L"Mismatched close tag.");
elemStack.pop_back();
callback->closeTag(elemName);
buffer.clear();
state = StateNone;
buffer.clear();
callback->closeTag(elemName);
--elementDepth;
break;
@ -566,6 +578,7 @@ doBuffer:
break;
}
#undef ERROR
++col;
}
@ -577,9 +590,16 @@ std::wstring Parser::entityRef(const std::wstring& ent)
if(ent == L"apos") return L"'";
if(ent == L"lt") return L"<";
if(ent == L"gt") return L">";
return callback->entityRef(ent);
std::wstring result;
if(callback->entityRef(ent, result)) return result;
throw UnknownEntity(ent, line, col);
}
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -120,6 +120,9 @@ private:
bool skipNextNewline;
int elementDepth, xmlCount, restartCount;
// for reporting errors
int line, col;
public:
/*! \brief Constructor.
@ -221,3 +224,7 @@ public:
}
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -14,3 +14,7 @@
#include <sstream>
#include <utf8>
#include <stdint.h>
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -8,3 +8,7 @@
// Below are all the includes used throughout the library.
/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/

View File

@ -31,7 +31,7 @@ then
echo " Compiling"
SONAME="${libStreamedXML_BASE}.so.${SOMAJOR}.${SOMINOR}"
do_cmd ${CXX} ${CFLAGS} -shared -fpic -o "${libStreamedXML}" \
do_cmd ${CXX} ${CFLAGS} -Iobj -shared -fpic -o "${libStreamedXML}" \
-Wl,-soname,${SONAME} \
${SRC} ${SO_EXTRA} || return 1

View File

@ -9,7 +9,7 @@ MONOLITHIC_TESTS="src/libStreamedXML/build.lib src/libStreamedXML/build.monolith
if [ -z "${libStreamedXML_MONOLITHIC}" ]
then
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopHeader,ForwardDeclare,Exceptions,Callback,Parser,Decoder,BottomHeader}.h)"
make_monolithic ${HDR} C || return 1
make_monolithic ${HDR} Ch || return 1
MONOLITHIC_SOURCE="$(echo src/libStreamedXML/{TopSource,Exceptions,Parser,Decoder}.cpp)"
make_monolithic ${SRC} C || return 1

View File

@ -11,7 +11,7 @@
# be bumped on a binary-incompatible release. They are both single
# integers.
SOMAJOR=0
SOMINOR=0
SOMINOR=1
# SOMICRO is bumped every time there is a binary-compatible release.
SOMICRO=0
SOMICRO=3

View File

@ -11,8 +11,8 @@
# expected to be in 'major.minor.micro' format. It can optionally be
# suffixed with a string.
VERMAJOR=1
VERMINOR=1
VERMICRO=0
VERMINOR=2
VERMICRO=8
VEREXTRA=""
# kate: replace-trailing-space-save true; space-indent true; tab-width 4;