From: Francesco Chemolli <kinkie@squid-cache.org>
Date: Thu, 12 Dec 2013 23:12:29 +0000 (+0100)
Subject: Reimplemented prefix, implemented prefix matching unit test
X-Git-Tag: merge-candidate-3-v1~506^2~105
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f51e6fc202c4afce094a6f33f3a1b59f5e935689;p=thirdparty%2Fsquid.git

Reimplemented prefix, implemented prefix matching unit test
---

diff --git a/src/parser/CharacterSet.h b/src/parser/CharacterSet.h
index cc8b1df4fc..f1c6714980 100644
--- a/src/parser/CharacterSet.h
+++ b/src/parser/CharacterSet.h
@@ -3,6 +3,7 @@
 
 #include <vector>
 
+//#include <iostream>
 namespace Parser {
 
 class CharacterSet
@@ -10,10 +11,10 @@ class CharacterSet
 public:
     //XXX: use unsigned chars?
     CharacterSet(const char *label, const char * const c) : name(label) {
+        chars_.reserve(256);
         size_t clen = strlen(c);
-        for (size_t i = 0; i < clen; ++i) {
+        for (size_t i = 0; i < clen; ++i)
             chars_[static_cast<uint8_t>(c[i])] = true;
-        }
     }
 
     /// whether a given character exists in the set
@@ -25,9 +26,12 @@ public:
     /// add all characters from the given CharacterSet to this one
     const CharacterSet &operator +=(const CharacterSet &src) {
         // TODO: iterate src.chars_ vector instead of walking the entire 8-bit space
-        for (uint8_t i = 0; i < 256; ++i)
-            if (src.chars_[i])
+        for (uint8_t i = 0; i < 256; ++i) {
+            if (src.chars_[i]) {
+//                std::cout << static_cast<int>(i) << ',';
                 chars_[i] = true;
+            }
+        }
         return *this;
     }
 
@@ -36,7 +40,7 @@ public:
 
 private:
     /// characters defined in this set
-    std::vector<bool> chars_;
+    std::vector<bool> chars_; //std::vector<bool> is optimized
 };
 
 } // namespace Parser
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am
index 9679cb0993..82cc83f534 100644
--- a/src/parser/Makefile.am
+++ b/src/parser/Makefile.am
@@ -32,13 +32,13 @@ testTokenizer_SOURCES = \
 	$(SBUF_SOURCE) \
 	testTokenizer.h \
 	testTokenizer.cc \
-	Tokenizer.h \
+	Tokenizer.h
+nodist_testTokenizer_SOURCES = \
+	$(top_srcdir)/src/tests/testMain.cc \
 	$(top_srcdir)/src/tests/stub_mem.cc \
 	$(top_srcdir)/src/tests/stub_debug.cc \
 	$(top_srcdir)/src/tests/stub_time.cc \
 	$(top_srcdir)/src/tests/stub_SBufDetailedStats.cc
-nodist_testTokenizer_SOURCES = \
-	$(top_srcdir)/src/tests/testMain.cc
 testTokenizer_LDFLAGS = $(LIBADD_DL)
 testTokenizer_LDADD = \
 	libsquid-parser.la \
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 13c5f5059f..a0ade50b2e 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -13,10 +13,16 @@ Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
 bool
 Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
 {
-    const SBuf::size_type pos=find_first_not_in(tokenChars);
-    if (pos == SBuf::npos)
+    SBuf::size_type prefixLen = 0;
+    const SBuf::size_type len=buf_.length();
+    while (prefixLen < len) {
+        if (!tokenChars[buf_[prefixLen]])
+            break;
+        ++prefixLen;
+    }
+    if (prefixLen == 0)
         return false;
-    //finish
+    returnedToken = buf_.consume(prefixLen);
     return true;
 }
 
@@ -55,12 +61,7 @@ Tokenizer::find_first_in (const CharacterSet &set)
 SBuf::size_type
 Tokenizer::find_first_not_in (const CharacterSet &set)
 {
-    SBuf::size_type rv;
-    const SBuf::size_type len=buf_.length();
-    for (rv = 0; rv < len; ++rv)
-        if (!set[buf_[rv]])
-            return rv;
-    return SBuf::npos;
+    return 0;
 }
 
 } /* namespace Parser */
diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc
index 949779666f..011dcec620 100644
--- a/src/parser/testTokenizer.cc
+++ b/src/parser/testTokenizer.cc
@@ -1,14 +1,50 @@
 #include "squid.h"
 
 #include "testTokenizer.h"
+#include "CharacterSet.h"
 #include "Tokenizer.h"
 
 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
 
+SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
+    "Host: resource.com\r\n"
+    "Cookie: laijkpk3422r j1noin \r\n"
+    "\r\n");
+const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const Parser::CharacterSet whitespace("whitespace"," ");
+const Parser::CharacterSet crlf("crlf","\r\n");
+const Parser::CharacterSet tab("tab","\t");
 
 void
 testTokenizer::testTokenizerPrefix()
 {
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // successful prefix tokenization
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+    CPPUNIT_ASSERT(t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    //no match (first char is not in the prefix set)
+    CPPUNIT_ASSERT(!t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    // one more match to set S to something meaningful
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+    //no match (no characters from the character set in the prefix)
+    CPPUNIT_ASSERT(!t.prefix(s,tab));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
+
+    // match until the end of the sample
+    Parser::CharacterSet all(alpha);
+    // TODO: finish from here. But += is buggy
+//    all += whitespace;
+//    all += crlf;
+
 }
 
 void
diff --git a/src/tests/stub_SBufDetailedStats.cc b/src/tests/stub_SBufDetailedStats.cc
index 8233ac2c38..c4520b7eb0 100644
--- a/src/tests/stub_SBufDetailedStats.cc
+++ b/src/tests/stub_SBufDetailedStats.cc
@@ -6,7 +6,7 @@
 
 class StatHist;
 
-void recordSBufSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordSBufSizeAtDestruct(SBuf::size_type) {}
 const StatHist * collectSBufDestructTimeStats() STUB_RETVAL(NULL)
-void recordMemBlobSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordMemBlobSizeAtDestruct(SBuf::size_type)  {}
 const StatHist * collectMemBlobDestructTimeStats() STUB_RETVAL(NULL)