From 964b44c35315f0d1a6fd6ec8d0803e31bdae04aa Mon Sep 17 00:00:00 2001 From: hno <> Date: Fri, 10 Dec 2004 07:49:53 +0000 Subject: [PATCH] Feature #1005: libxml2 as ESI parser by jojo@fistofbenztown.de (Joachim Bauch) Todo: With this is becomes an apparent need to modularize the ESI parsers slightly to allow the user to select which ESI parsers to build. --- CONTRIBUTORS | 1 + configure.in | 6 +- src/ESILibxml2Parser.cc | 154 ++++++++++++++++++++++++++++++++++++++++ src/ESILibxml2Parser.h | 77 ++++++++++++++++++++ src/ESIParser.cc | 6 +- src/Makefile.am | 6 +- src/cf.data.pre | 6 +- 7 files changed, 247 insertions(+), 9 deletions(-) create mode 100644 src/ESILibxml2Parser.cc create mode 100644 src/ESILibxml2Parser.h diff --git a/CONTRIBUTORS b/CONTRIBUTORS index c367455f02..d16511eed7 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -91,5 +91,6 @@ and ideas to make this software available. Leeann Bent Bruce Murphy Francis Daly + Joachim Bauch Duane Wessels diff --git a/configure.in b/configure.in index 3986ebaa94..84c0849411 100644 --- a/configure.in +++ b/configure.in @@ -3,7 +3,7 @@ dnl Configuration input file for Squid dnl dnl Duane Wessels, wessels@nlanr.net, February 1996 (autoconf v2.9) dnl -dnl $Id: configure.in,v 1.362 2004/10/20 22:41:03 hno Exp $ +dnl $Id: configure.in,v 1.363 2004/12/10 00:49:53 hno Exp $ dnl dnl dnl @@ -13,7 +13,7 @@ AC_CONFIG_SRCDIR([src/main.cc]) AC_CONFIG_AUX_DIR(cfgaux) AM_INIT_AUTOMAKE(squid, 3.0-PRE3-CVS) AM_CONFIG_HEADER(include/autoconf.h) -AC_REVISION($Revision: 1.362 $)dnl +AC_REVISION($Revision: 1.363 $)dnl AC_PREFIX_DEFAULT(/usr/local/squid) AM_MAINTAINER_MODE @@ -562,7 +562,7 @@ AC_CACHE_CHECK(whether to enable ESI,ac_cv_use_esi, ac_cv_use_esi=no) if test "$ac_cv_use_esi" = "yes" ; then AC_DEFINE(ESI,1,[Compile the ESI processor and Surrogate header support]) AM_CONDITIONAL(USE_ESI, true) - XTRA_LIBS="$XTRA_LIBS -lexpat" + XTRA_LIBS="$XTRA_LIBS -lexpat -lxml2" else AC_DEFINE(ESI,0,[Compile the ESI processor and Surrogate header support]) fi diff --git a/src/ESILibxml2Parser.cc b/src/ESILibxml2Parser.cc new file mode 100644 index 0000000000..293d41426d --- /dev/null +++ b/src/ESILibxml2Parser.cc @@ -0,0 +1,154 @@ +/* + * $Id: ESILibxml2Parser.cc,v 1.1 2004/12/10 00:49:53 hno Exp $ + * + * AUTHOR: Joachim Bauch (mail@joachim-bauch.de) + * + * SQUID Web Proxy Cache http://www.squid-cache.org/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from + * the Internet community; see the CONTRIBUTORS file for full + * details. Many organizations have provided support for Squid's + * development; see the SPONSORS file for full details. Squid is + * Copyrighted (C) 2001 by the Regents of the University of + * California; see the COPYRIGHT file for full details. Squid + * incorporates software developed and/or copyrighted by other + * sources; see the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + ; but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +/* + * The ESI Libxml2 parser is Copyright (c) 2004 by Joachim Bauch + * http://www.joachim-bauch.de + * mail@joachim-bauch.de + */ + +#include "squid.h" +#include "ESILibxml2Parser.h" + +#ifdef sprintf +// ugly, but needed to use correct sprintf function below +#undef sprintf +#endif + +#include + +// the global document that will store the resolved entity +// definitions +static htmlDocPtr entity_doc = NULL; + +// the SAX callback functions +void esi_startElementSAXFunc(void * ctx, const xmlChar * name, const xmlChar ** atts) +{ + int count=0; + xmlChar **tmp = (xmlChar **)atts; + + while (tmp && *tmp != NULL) { + count++; + tmp++; + } + + // we increased on every key and value + count /= 2; + + ESILibxml2Parser *p = (ESILibxml2Parser *)ctx; + + p->getClient()->start((const char *)name, (const char **)atts, count); +} + +void esi_endElementSAXFunc(void * ctx, const xmlChar * name) +{ + ESILibxml2Parser *p = (ESILibxml2Parser *)ctx; + p->getClient()->end((const char *)name); +} + +void esi_commentSAXFunc(void * ctx, const xmlChar * value) +{ + ESILibxml2Parser *p = (ESILibxml2Parser *)ctx; + p->getClient()->parserComment((const char *)value); +} + +void esi_charactersSAXFunc(void *ctx, const xmlChar *ch, int len) +{ + ESILibxml2Parser *p = (ESILibxml2Parser *)ctx; + p->getClient()->parserDefault((const char *)ch, len); +} + +xmlEntityPtr esi_getEntitySAXFunc(void * ctx, const xmlChar * name) +{ + xmlEntityPtr res = xmlGetDocEntity(entity_doc, name); + + if (res == NULL) { + const htmlEntityDesc *ent = htmlEntityLookup(name); + + if (ent != NULL) { + char tmp[32]; + sprintf(tmp, "&#%d;", ent->value); + res = xmlAddDocEntity(entity_doc, (const xmlChar *)name, XML_INTERNAL_GENERAL_ENTITY, NULL, NULL, (const xmlChar *)tmp); + } + } + + return res; +} + +ESILibxml2Parser::ESILibxml2Parser(ESIParserClient *aClient) : theClient (aClient) +{ + xmlSAXHandler sax; + htmlDefaultSAXHandlerInit(); + memset(&sax, 0, sizeof(sax)); + sax.startElement = esi_startElementSAXFunc; + sax.endElement = esi_endElementSAXFunc; + sax.comment = esi_commentSAXFunc; + sax.characters = esi_charactersSAXFunc; + sax.getEntity = esi_getEntitySAXFunc; + + /* TODO: grab the document encoding from the headers */ + parser = xmlCreatePushParserCtxt(&sax, static_cast(this), NULL, 0, NULL); + xmlSetFeature(parser, "substitute entities", 0); + + if (entity_doc == NULL) + entity_doc = htmlNewDoc(NULL, NULL); +} + +ESILibxml2Parser::~ESILibxml2Parser() +{ + xmlFreeParserCtxt(parser); + parser = NULL; +} + +bool +ESILibxml2Parser::parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream) +{ + return (xmlParseChunk(parser, dataToParse, lengthOfData, endOfStream) == 0); +} + +size_t +ESILibxml2Parser::lineNumber() const +{ + return xmlSAX2GetLineNumber(parser); +} + +char const * +ESILibxml2Parser::errorString() const +{ + xmlErrorPtr error = xmlGetLastError(); + + if (error == NULL) + return NULL; + + return error->message; +} diff --git a/src/ESILibxml2Parser.h b/src/ESILibxml2Parser.h new file mode 100644 index 0000000000..b1b7094953 --- /dev/null +++ b/src/ESILibxml2Parser.h @@ -0,0 +1,77 @@ +/* + * $Id: ESILibxml2Parser.h,v 1.1 2004/12/10 00:49:53 hno Exp $ + * + * AUTHOR: Joachim Bauch (mail@joachim-bauch.de) + * + * SQUID Web Proxy Cache http://www.squid-cache.org/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from + * the Internet community; see the CONTRIBUTORS file for full + * details. Many organizations have provided support for Squid's + * development; see the SPONSORS file for full details. Squid is + * Copyrighted (C) 2001 by the Regents of the University of + * California; see the COPYRIGHT file for full details. Squid + * incorporates software developed and/or copyrighted by other + * sources; see the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +/* + * The ESI Libxml2 parser is Copyright (c) 2004 by Joachim Bauch + * http://www.joachim-bauch.de + * mail@joachim-bauch.de + */ + +#ifndef SQUID_ESILIBXML2PARSER_H +#define SQUID_ESILIBXML2PARSER_H + +#include "ESIParser.h" +// workaround for definition of "free" that prevents include of +// parser.h from libxml2 without errors +#ifdef free +#define OLD_FREE free +#undef free +#endif +#include +#include +#include + +#ifdef OLD_FREE +#define free OLD_FREE +#endif + +class ESILibxml2Parser : public ESIParser +{ + +public: + ESILibxml2Parser(ESIParserClient *); + ~ESILibxml2Parser(); + /* true on success */ + bool parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream); + size_t lineNumber() const; + char const * errorString() const; + + ESIParserClient *getClient() { return theClient; } + +private: + mutable xmlParserCtxtPtr parser; /* our parser */ + + ESIParserClient *theClient; +}; + +#endif /* SQUID_ESILIBXML2PARSER_H */ diff --git a/src/ESIParser.cc b/src/ESIParser.cc index aa433cad45..d31e5d7b64 100644 --- a/src/ESIParser.cc +++ b/src/ESIParser.cc @@ -1,6 +1,6 @@ /* - * $Id: ESIParser.cc,v 1.1 2003/03/10 04:56:35 robertc Exp $ + * $Id: ESIParser.cc,v 1.2 2004/12/10 00:49:53 hno Exp $ * * DEBUG: section 86 ESI processing * AUTHOR: Robert Collins @@ -37,12 +37,16 @@ #include "ESIParser.h" #include "ESIExpatParser.h" #include "ESICustomParser.h" +#include "ESILibxml2Parser.h" char *ESIParser::Type = NULL; ESIParser::Pointer ESIParser::NewParser(ESIParserClient *aClient) { + if (!strcasecmp("libxml2", Type)) + return new ESILibxml2Parser (aClient); + if (!strcasecmp("expat", Type)) return new ESIExpatParser (aClient); diff --git a/src/Makefile.am b/src/Makefile.am index 449c156725..38bf75acd3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,7 +1,7 @@ # # Makefile for the Squid Object Cache server # -# $Id: Makefile.am,v 1.95 2004/10/25 12:40:20 hno Exp $ +# $Id: Makefile.am,v 1.96 2004/12/10 00:49:53 hno Exp $ # # Uncomment and customize the following to suit your needs: # @@ -75,6 +75,8 @@ ESI_ALL_SOURCE = \ ESIInclude.cc \ ESIInclude.h \ ESILiteral.h \ + ESILibxml2Parser.cc \ + ESILibxml2Parser.h \ ESIParser.cc \ ESIParser.h \ ESISegment.cc \ @@ -165,7 +167,7 @@ AM_CXXFLAGS = @SQUID_CXXFLAGS@ SUBDIRS = fs repl auth -INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/lib/libTrie/include +INCLUDES = -I/usr/include/libxml2 -I. -I$(srcdir) -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/lib/libTrie/include EXTRA_PROGRAMS = \ unlinkd \ diff --git a/src/cf.data.pre b/src/cf.data.pre index 467f12cd85..ceb7cf8dd5 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -1,6 +1,6 @@ # -# $Id: cf.data.pre,v 1.363 2004/12/08 00:24:42 hno Exp $ +# $Id: cf.data.pre,v 1.364 2004/12/10 00:49:54 hno Exp $ # # # SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -3006,7 +3006,7 @@ DOC_END NAME: esi_parser IFDEF: ESI -COMMENT: expat|custom +COMMENT: libxml2|expat|custom TYPE: string LOC: ESIParser::Type DEFAULT: custom @@ -4147,7 +4147,7 @@ DOC_START DOC_END NAME: mcast_miss_ttl -IFDEF: MULTICAST_MISS_TTL +IFDEF: MULTICAST_MISS_STREAM TYPE: ushort LOC: Config.mcast_miss.ttl DEFAULT: 16 -- 2.39.5