From: Francesco Chemolli Date: Thu, 19 Dec 2013 16:00:30 +0000 (+0100) Subject: Absorb parser work from parser-ng X-Git-Tag: SQUID_3_5_0_1~217^2~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c9a4e310be4faff0c7457175ec84cbbf86f967cb;p=thirdparty%2Fsquid.git Absorb parser work from parser-ng --- diff --git a/configure.ac b/configure.ac index 4fd8209975..5fc55190a7 100644 --- a/configure.ac +++ b/configure.ac @@ -3429,6 +3429,7 @@ AC_CONFIG_FILES([ src/ipc/Makefile src/ssl/Makefile src/mgr/Makefile + src/parser/Makefile src/snmp/Makefile contrib/Makefile icons/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index f39833ce91..1385879c7d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -46,8 +46,8 @@ LOADABLE_MODULES_SOURCES = \ LoadableModules.h \ LoadableModules.cc -SUBDIRS = base anyp comm eui acl format fs repl -DIST_SUBDIRS = base anyp comm eui acl format fs repl +SUBDIRS = base anyp parser comm eui acl format fs repl +DIST_SUBDIRS = base anyp parser comm eui acl format fs repl if ENABLE_AUTH SUBDIRS += auth @@ -643,6 +643,7 @@ squid_LDADD = \ $(ESI_LIBS) \ $(SSL_LIBS) \ $(SNMP_LIBS) \ + parser/libsquid-parser.la \ $(top_builddir)/lib/libmisccontainers.la \ $(top_builddir)/lib/libmiscencoding.la \ $(top_builddir)/lib/libmiscutil.la \ diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am new file mode 100644 index 0000000000..8a05b46a78 --- /dev/null +++ b/src/parser/Makefile.am @@ -0,0 +1,50 @@ +include $(top_srcdir)/src/Common.am +include $(top_srcdir)/src/TestHeaders.am + +EXTRA_PROGRAMS = \ + testTokenizer + +check_PROGRAMS += testTokenizer +TESTS += testTokenizer + +noinst_LTLIBRARIES = libsquid-parser.la + +libsquid_parser_la_SOURCES = \ + Tokenizer.h \ + Tokenizer.cc + +SBUF_SOURCE= \ + $(top_srcdir)/base/CharacterSet.h \ + $(top_srcdir)/src/SBuf.h \ + $(top_srcdir)/src/SBuf.cc \ + $(top_srcdir)/src/MemBlob.h \ + $(top_srcdir)/src/MemBlob.cc \ + $(top_srcdir)/src/OutOfBoundsException.h \ + $(top_srcdir)/src/SBufExceptions.h \ + $(top_srcdir)/src/SBufExceptions.cc \ + $(top_srcdir)/src/String.cc \ + $(top_srcdir)/src/SquidString.h \ + $(top_srcdir)/src/base/TextException.h \ + $(top_srcdir)/src/base/TextException.cc + +testTokenizer_SOURCES = \ + CharacterSet.h \ + $(SBUF_SOURCE) \ + testTokenizer.h \ + testTokenizer.cc \ + Tokenizer.h +nodist_testTokenizer_SOURCES = \ + $(top_srcdir)/src/tests/testMain.cc \ + $(top_srcdir)/src/tests/stub_mem.cc \ + $(top_srcdir)/src/tests/stub_debug.cc \ + $(top_srcdir)/src/tests/stub_time.cc \ + $(top_srcdir)/src/tests/stub_SBufDetailedStats.cc +testTokenizer_LDFLAGS = $(LIBADD_DL) +testTokenizer_LDADD = \ + libsquid-parser.la \ + $(top_builddir)/lib/libmiscutil.la \ + $(top_builddir)/src/base/libbase.la \ + $(SQUID_CPPUNIT_LIBS) \ + $(SQUID_CPPUNIT_LA) \ + $(COMPAT_LIB) +testTokenizer_DEPENDENCIES = $(SQUID_CPPUNIT_LA) diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc new file mode 100644 index 0000000000..b76aa1ec67 --- /dev/null +++ b/src/parser/Tokenizer.cc @@ -0,0 +1,58 @@ +#include "squid.h" +#include "Tokenizer.h" + +namespace Parser { + +bool +Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace) +{ + const SBuf::size_type endOfPreWhiteSpace = buf_.findFirstNotOf(whitespace); + const SBuf::size_type endOfToken = buf_.findFirstOf(whitespace, endOfPreWhiteSpace); + if (endOfToken == SBuf::npos) + return false; + buf_.consume(endOfPreWhiteSpace); + returnedToken = buf_.consume(endOfToken - endOfPreWhiteSpace); + skip(whitespace); + return true; +} + +bool +Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars) +{ + SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars); + if (prefixLen == 0) + return false; + returnedToken = buf_.consume(prefixLen); + return true; +} + +bool +Tokenizer::skip(const CharacterSet &tokenChars) +{ + SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars); + if (prefixLen == 0) + return false; + buf_.consume(prefixLen); + return true; +} + +bool +Tokenizer::skip(const SBuf &tokenToSkip) +{ + if (buf_.startsWith(tokenToSkip)) { + buf_.consume(tokenToSkip.length()); + return true; + } + return false; +} + +bool +Tokenizer::skip(const char tokenChar) +{ + if (buf_[0] == tokenChar) { + buf_.consume(1); + return true; + } + return false; +} +} /* namespace Parser */ diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h new file mode 100644 index 0000000000..d40869875d --- /dev/null +++ b/src/parser/Tokenizer.h @@ -0,0 +1,47 @@ +#ifndef SQUID_PARSER_TOKENIZER_H_ +#define SQUID_PARSER_TOKENIZER_H_ + +#include "base/CharacterSet.h" +#include "SBuf.h" + +namespace Parser { + +class Tokenizer { +public: + explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {} + + bool atEnd() const { return !buf_.length(); } + const SBuf& remaining() const { return buf_; } + void reset(const SBuf &newBuf) { buf_ = newBuf; } + + /* The following methods start from the beginning of the input buffer. + * They return true and consume parsed chars if a non-empty token is found. + * Otherwise, they return false without any side-effects. */ + + /** Basic strtok(3): + * Skips all leading delimiters (if any), + * accumulates all characters up to the first delimiter (a token), and + * skips all trailing delimiters (if any). + * Want to extract delimiters? Use three prefix() calls instead. + */ + bool token(SBuf &returnedToken, const CharacterSet &whitespace); + + /// Accumulates all sequential permitted characters (a token). + bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars); + + /// Skips all sequential permitted characters (a token). + bool skip(const CharacterSet &tokenChars); + + /// Skips a given token. + bool skip(const SBuf &tokenToSkip); + + /// Skips a given character (a token). + bool skip(const char tokenChar); + +private: + SBuf buf_; ///< yet unparsed input +}; + + +} /* namespace Parser */ +#endif /* SQUID_PARSER_TOKENIZER_H_ */ diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc new file mode 100644 index 0000000000..7334d743e5 --- /dev/null +++ b/src/parser/testTokenizer.cc @@ -0,0 +1,107 @@ +#include "squid.h" + +#include "testTokenizer.h" +#include "base/CharacterSet.h" +#include "Tokenizer.h" + +CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); + +SBuf text("GET http://resource.com/path HTTP/1.1\r\n" + "Host: resource.com\r\n" + "Cookie: laijkpk3422r j1noin \r\n" + "\r\n"); +const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); +const CharacterSet whitespace("whitespace"," \r\n"); +const CharacterSet crlf("crlf","\r\n"); +const CharacterSet tab("tab","\t"); +const CharacterSet numbers("numbers","0123456789"); + +void +testTokenizer::testTokenizerPrefix() +{ + Parser::Tokenizer t(text); + SBuf s; + + // successful prefix tokenization + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); + CPPUNIT_ASSERT(t.prefix(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); + + //no match (first char is not in the prefix set) + CPPUNIT_ASSERT(!t.prefix(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); + + // one more match to set S to something meaningful + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); + + //no match (no characters from the character set in the prefix) + CPPUNIT_ASSERT(!t.prefix(s,tab)); + CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched + + // match until the end of the sample + CharacterSet all(whitespace); + all += alpha; + all += crlf; + all += numbers; + all.add(':').add('.').add('/'); + CPPUNIT_ASSERT(t.prefix(s,all)); + CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); +} + +void +testTokenizer::testTokenizerSkip() +{ + Parser::Tokenizer t(text); + SBuf s; + + // first scenario: patterns match + // prep for test + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); + + // test skip testing character set + CPPUNIT_ASSERT(t.skip(whitespace)); + // check that skip was right + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); + + //check skip prefix + CPPUNIT_ASSERT(t.skip(SBuf("://"))); + // verify + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s); + + // no skip + CPPUNIT_ASSERT(!t.skip(alpha)); + CPPUNIT_ASSERT(!t.skip(SBuf("://"))); + CPPUNIT_ASSERT(!t.skip('a')); + +} + +void +testTokenizer::testTokenizerToken() +{ + Parser::Tokenizer t(text); + SBuf s; + + // first scenario: patterns match + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); + + //no separator found + CPPUNIT_ASSERT(!t.token(s,tab)); +} + +void +testTokenizer::testCharacterSet() +{ + +} diff --git a/src/parser/testTokenizer.h b/src/parser/testTokenizer.h new file mode 100644 index 0000000000..22ff87d9da --- /dev/null +++ b/src/parser/testTokenizer.h @@ -0,0 +1,22 @@ +#ifndef SQUID_TESTTOKENIZER_H_ +#define SQUID_TESTTOKENIZER_H_ + +#include + +class testTokenizer : public CPPUNIT_NS::TestFixture +{ + CPPUNIT_TEST_SUITE( testTokenizer ); + CPPUNIT_TEST ( testCharacterSet ); + CPPUNIT_TEST ( testTokenizerPrefix ); + CPPUNIT_TEST ( testTokenizerSkip ); + CPPUNIT_TEST ( testTokenizerToken ); + CPPUNIT_TEST_SUITE_END(); + +protected: + void testTokenizerPrefix(); + void testTokenizerSkip(); + void testTokenizerToken(); + void testCharacterSet(); +}; + +#endif /* SQUID_TESTTOKENIZER_H_ */