Merged from upstream

author Francesco Chemolli <kinkie@squid-cache.org>

Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)

committer Francesco Chemolli <kinkie@squid-cache.org>

Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)
author Francesco Chemolli <kinkie@squid-cache.org>
Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)
committer Francesco Chemolli <kinkie@squid-cache.org>
Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)
diff --git a/configure.ac b/configure.ac

index 4fd8209975b61475b40a3ee0c16b18376d204012..5fc55190a73a16c2c92e25b28d35d056bf4318ea 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -3429,6 +3429,7 @@ AC_CONFIG_FILES([
         src/ipc/Makefile
         src/ssl/Makefile
         src/mgr/Makefile
+       src/parser/Makefile
         src/snmp/Makefile
         contrib/Makefile
         icons/Makefile
diff --git a/src/Makefile.am b/src/Makefile.am

index bb6b1aa030a59f4d0c8118e60e2bb5826bcc55a0..7ded999a863ca50adfed64d5291ed5d797abb3d5 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -46,8 +46,8 @@ LOADABLE_MODULES_SOURCES = \
         LoadableModules.h \
         LoadableModules.cc
  
-SUBDIRS        = base anyp comm eui acl format fs repl
-DIST_SUBDIRS = base anyp comm eui acl format fs repl
+SUBDIRS        = base anyp parser comm eui acl format fs repl
+DIST_SUBDIRS = base anyp parser comm eui acl format fs repl
  
  if ENABLE_AUTH
  SUBDIRS += auth
@@ -646,6 +646,7 @@ squid_LDADD = \
         $(ESI_LIBS) \
         $(SSL_LIBS) \
         $(SNMP_LIBS) \
+       parser/libsquid-parser.la \
         $(top_builddir)/lib/libmisccontainers.la \
         $(top_builddir)/lib/libmiscencoding.la \
         $(top_builddir)/lib/libmiscutil.la \
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am

new file mode 100644 (file)

index 0000000..97cab1f
--- /dev/null
+++ b/src/parser/Makefile.am
@@ -0,0 +1,49 @@
+include $(top_srcdir)/src/Common.am
+include $(top_srcdir)/src/TestHeaders.am
+
+EXTRA_PROGRAMS = \
+       testTokenizer
+       
+check_PROGRAMS += testTokenizer
+TESTS += testTokenizer
+
+noinst_LTLIBRARIES = libsquid-parser.la
+
+libsquid_parser_la_SOURCES = \
+       Tokenizer.h \
+       Tokenizer.cc
+
+SBUF_SOURCE= \
+       $(top_srcdir)/src/base/CharacterSet.h \
+       $(top_srcdir)/src/SBuf.h \
+       $(top_srcdir)/src/SBuf.cc \
+       $(top_srcdir)/src/MemBlob.h \
+       $(top_srcdir)/src/MemBlob.cc \
+       $(top_srcdir)/src/OutOfBoundsException.h \
+       $(top_srcdir)/src/SBufExceptions.h \
+       $(top_srcdir)/src/SBufExceptions.cc \
+       $(top_srcdir)/src/String.cc \
+       $(top_srcdir)/src/SquidString.h \
+       $(top_srcdir)/src/base/TextException.h \
+       $(top_srcdir)/src/base/TextException.cc
+
+testTokenizer_SOURCES = \
+       $(SBUF_SOURCE) \
+       testTokenizer.h \
+       testTokenizer.cc \
+       Tokenizer.h
+nodist_testTokenizer_SOURCES = \
+       $(top_srcdir)/src/tests/testMain.cc \
+       $(top_srcdir)/src/tests/stub_mem.cc \
+       $(top_srcdir)/src/tests/stub_debug.cc \
+       $(top_srcdir)/src/tests/stub_time.cc \
+       $(top_srcdir)/src/tests/stub_SBufDetailedStats.cc
+testTokenizer_LDFLAGS = $(LIBADD_DL)
+testTokenizer_LDADD = \
+       libsquid-parser.la \
+       $(top_builddir)/lib/libmiscutil.la \
+       $(top_builddir)/src/base/libbase.la \
+       $(SQUID_CPPUNIT_LIBS) \
+       $(SQUID_CPPUNIT_LA) \
+       $(COMPAT_LIB)
+testTokenizer_DEPENDENCIES = $(SQUID_CPPUNIT_LA)
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc

new file mode 100644 (file)

index 0000000..b76aa1e
--- /dev/null
+++ b/src/parser/Tokenizer.cc
@@ -0,0 +1,58 @@
+#include "squid.h"
+#include "Tokenizer.h"
+
+namespace Parser {
+
+bool
+Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
+{
+    const SBuf::size_type endOfPreWhiteSpace = buf_.findFirstNotOf(whitespace);
+    const SBuf::size_type endOfToken = buf_.findFirstOf(whitespace, endOfPreWhiteSpace);
+    if (endOfToken == SBuf::npos)
+        return false;
+    buf_.consume(endOfPreWhiteSpace);
+    returnedToken = buf_.consume(endOfToken - endOfPreWhiteSpace);
+    skip(whitespace);
+    return true;
+}
+
+bool
+Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
+{
+    SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
+    if (prefixLen == 0)
+        return false;
+    returnedToken = buf_.consume(prefixLen);
+    return true;
+}
+
+bool
+Tokenizer::skip(const CharacterSet &tokenChars)
+{
+    SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
+    if (prefixLen == 0)
+        return false;
+    buf_.consume(prefixLen);
+    return true;
+}
+
+bool
+Tokenizer::skip(const SBuf &tokenToSkip)
+{
+    if (buf_.startsWith(tokenToSkip)) {
+        buf_.consume(tokenToSkip.length());
+        return true;
+    }
+    return false;
+}
+
+bool
+Tokenizer::skip(const char tokenChar)
+{
+    if (buf_[0] == tokenChar) {
+        buf_.consume(1);
+        return true;
+    }
+    return false;
+}
+} /* namespace Parser */
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h

new file mode 100644 (file)

index 0000000..d408698
--- /dev/null
+++ b/src/parser/Tokenizer.h
@@ -0,0 +1,47 @@
+#ifndef SQUID_PARSER_TOKENIZER_H_
+#define SQUID_PARSER_TOKENIZER_H_
+
+#include "base/CharacterSet.h"
+#include "SBuf.h"
+
+namespace Parser {
+
+class Tokenizer {
+public:
+   explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {}
+
+   bool atEnd() const { return !buf_.length(); }
+   const SBuf& remaining() const { return buf_; }
+   void reset(const SBuf &newBuf) { buf_ = newBuf; }
+
+   /* The following methods start from the beginning of the input buffer.
+    * They return true and consume parsed chars if a non-empty token is found.
+    * Otherwise, they return false without any side-effects. */
+
+   /** Basic strtok(3):
+    *  Skips all leading delimiters (if any),
+    *  accumulates all characters up to the first delimiter (a token), and
+    *  skips all trailing delimiters (if any).
+    *  Want to extract delimiters? Use three prefix() calls instead.
+    */
+   bool token(SBuf &returnedToken, const CharacterSet &whitespace);
+
+   /// Accumulates all sequential permitted characters (a token).
+   bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars);
+
+   /// Skips all sequential permitted characters (a token).
+   bool skip(const CharacterSet &tokenChars);
+
+   /// Skips a given token.
+   bool skip(const SBuf &tokenToSkip);
+
+   /// Skips a given character (a token).
+   bool skip(const char tokenChar);
+
+private:
+   SBuf buf_; ///< yet unparsed input
+};
+
+
+} /* namespace Parser */
+#endif /* SQUID_PARSER_TOKENIZER_H_ */
diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc

new file mode 100644 (file)

index 0000000..ad45614
--- /dev/null
+++ b/src/parser/testTokenizer.cc
@@ -0,0 +1,109 @@
+#include "squid.h"
+
+#include "testTokenizer.h"
+#include "base/CharacterSet.h"
+#include "Tokenizer.h"
+
+CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
+
+SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
+    "Host: resource.com\r\n"
+    "Cookie: laijkpk3422r j1noin \r\n"
+    "\r\n");
+const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const CharacterSet whitespace("whitespace"," \r\n");
+const CharacterSet crlf("crlf","\r\n");
+const CharacterSet tab("tab","\t");
+const CharacterSet numbers("numbers","0123456789");
+
+void
+testTokenizer::testTokenizerPrefix()
+{
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // successful prefix tokenization
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+    CPPUNIT_ASSERT(t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    //no match (first char is not in the prefix set)
+    CPPUNIT_ASSERT(!t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    // one more match to set S to something meaningful
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+    //no match (no characters from the character set in the prefix)
+    CPPUNIT_ASSERT(!t.prefix(s,tab));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
+
+    // match until the end of the sample
+    CharacterSet all(whitespace);
+    all += alpha;
+    all += crlf;
+    all += numbers;
+    all.add(':').add('.').add('/');
+    CPPUNIT_ASSERT(t.prefix(s,all));
+    CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
+}
+
+void
+testTokenizer::testTokenizerSkip()
+{
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // first scenario: patterns match
+    // prep for test
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+
+    // test skip testing character set
+    CPPUNIT_ASSERT(t.skip(whitespace));
+    // check that skip was right
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+    //check skip prefix
+    CPPUNIT_ASSERT(t.skip(SBuf("://")));
+    // verify
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
+
+    // no skip
+    CPPUNIT_ASSERT(!t.skip(alpha));
+    CPPUNIT_ASSERT(!t.skip(SBuf("://")));
+    CPPUNIT_ASSERT(!t.skip('a'));
+
+}
+
+void
+testTokenizer::testTokenizerToken()
+{
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // first scenario: patterns match
+    CPPUNIT_ASSERT(t.token(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+    CPPUNIT_ASSERT(t.token(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
+    CPPUNIT_ASSERT(t.token(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
+    CPPUNIT_ASSERT(t.token(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
+
+    SBuf s2(s);
+    //no separator found
+    CPPUNIT_ASSERT(!t.token(s,tab));
+    CPPUNIT_ASSERT_EQUAL(s2,s); // check that the output parameter was untouched
+}
+
+void
+testTokenizer::testCharacterSet()
+{
+
+}
diff --git a/src/parser/testTokenizer.h b/src/parser/testTokenizer.h

new file mode 100644 (file)

index 0000000..22ff87d
--- /dev/null
+++ b/src/parser/testTokenizer.h
@@ -0,0 +1,22 @@
+#ifndef SQUID_TESTTOKENIZER_H_
+#define SQUID_TESTTOKENIZER_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class testTokenizer : public CPPUNIT_NS::TestFixture
+{
+    CPPUNIT_TEST_SUITE( testTokenizer );
+    CPPUNIT_TEST ( testCharacterSet );
+    CPPUNIT_TEST ( testTokenizerPrefix );
+    CPPUNIT_TEST ( testTokenizerSkip );
+    CPPUNIT_TEST ( testTokenizerToken );
+    CPPUNIT_TEST_SUITE_END();
+
+protected:
+    void testTokenizerPrefix();
+    void testTokenizerSkip();
+    void testTokenizerToken();
+    void testCharacterSet();
+};
+
+#endif /* SQUID_TESTTOKENIZER_H_ */
author	Francesco Chemolli <kinkie@squid-cache.org>
	Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)
committer	Francesco Chemolli <kinkie@squid-cache.org>
	Sun, 5 Jan 2014 16:57:44 +0000 (17:57 +0100)
configure.ac		patch \| blob \| blame \| history
src/Makefile.am		patch \| blob \| blame \| history
src/parser/Makefile.am	[new file with mode: 0644]	patch \| blob
src/parser/Tokenizer.cc	[new file with mode: 0644]	patch \| blob
src/parser/Tokenizer.h	[new file with mode: 0644]	patch \| blob
src/parser/testTokenizer.cc	[new file with mode: 0644]	patch \| blob
src/parser/testTokenizer.h	[new file with mode: 0644]	patch \| blob