]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Reimplemented prefix, implemented prefix matching unit test
authorFrancesco Chemolli <kinkie@squid-cache.org>
Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
committerFrancesco Chemolli <kinkie@squid-cache.org>
Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
src/parser/CharacterSet.h
src/parser/Makefile.am
src/parser/Tokenizer.cc
src/parser/testTokenizer.cc
src/tests/stub_SBufDetailedStats.cc

index cc8b1df4fc12705e0f0dab3a68b0fff763a29192..f1c6714980dc00119933f7f5fc2b33e89fdedb5a 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <vector>
 
+//#include <iostream>
 namespace Parser {
 
 class CharacterSet
@@ -10,10 +11,10 @@ class CharacterSet
 public:
     //XXX: use unsigned chars?
     CharacterSet(const char *label, const char * const c) : name(label) {
+        chars_.reserve(256);
         size_t clen = strlen(c);
-        for (size_t i = 0; i < clen; ++i) {
+        for (size_t i = 0; i < clen; ++i)
             chars_[static_cast<uint8_t>(c[i])] = true;
-        }
     }
 
     /// whether a given character exists in the set
@@ -25,9 +26,12 @@ public:
     /// add all characters from the given CharacterSet to this one
     const CharacterSet &operator +=(const CharacterSet &src) {
         // TODO: iterate src.chars_ vector instead of walking the entire 8-bit space
-        for (uint8_t i = 0; i < 256; ++i)
-            if (src.chars_[i])
+        for (uint8_t i = 0; i < 256; ++i) {
+            if (src.chars_[i]) {
+//                std::cout << static_cast<int>(i) << ',';
                 chars_[i] = true;
+            }
+        }
         return *this;
     }
 
@@ -36,7 +40,7 @@ public:
 
 private:
     /// characters defined in this set
-    std::vector<bool> chars_;
+    std::vector<bool> chars_; //std::vector<bool> is optimized
 };
 
 } // namespace Parser
index 9679cb099302112ef8eb877f2abcc6881b6fb680..82cc83f53406711d9cceb88bb1287f938e0b86a7 100644 (file)
@@ -32,13 +32,13 @@ testTokenizer_SOURCES = \
        $(SBUF_SOURCE) \
        testTokenizer.h \
        testTokenizer.cc \
-       Tokenizer.h \
+       Tokenizer.h
+nodist_testTokenizer_SOURCES = \
+       $(top_srcdir)/src/tests/testMain.cc \
        $(top_srcdir)/src/tests/stub_mem.cc \
        $(top_srcdir)/src/tests/stub_debug.cc \
        $(top_srcdir)/src/tests/stub_time.cc \
        $(top_srcdir)/src/tests/stub_SBufDetailedStats.cc
-nodist_testTokenizer_SOURCES = \
-       $(top_srcdir)/src/tests/testMain.cc
 testTokenizer_LDFLAGS = $(LIBADD_DL)
 testTokenizer_LDADD = \
        libsquid-parser.la \
index 13c5f5059f312467692140475ac076348d623612..a0ade50b2e0f4fa1be54a020991f4c16b0377730 100644 (file)
@@ -13,10 +13,16 @@ Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
 bool
 Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
 {
-    const SBuf::size_type pos=find_first_not_in(tokenChars);
-    if (pos == SBuf::npos)
+    SBuf::size_type prefixLen = 0;
+    const SBuf::size_type len=buf_.length();
+    while (prefixLen < len) {
+        if (!tokenChars[buf_[prefixLen]])
+            break;
+        ++prefixLen;
+    }
+    if (prefixLen == 0)
         return false;
-    //finish
+    returnedToken = buf_.consume(prefixLen);
     return true;
 }
 
@@ -55,12 +61,7 @@ Tokenizer::find_first_in (const CharacterSet &set)
 SBuf::size_type
 Tokenizer::find_first_not_in (const CharacterSet &set)
 {
-    SBuf::size_type rv;
-    const SBuf::size_type len=buf_.length();
-    for (rv = 0; rv < len; ++rv)
-        if (!set[buf_[rv]])
-            return rv;
-    return SBuf::npos;
+    return 0;
 }
 
 } /* namespace Parser */
index 949779666fac76d9f12239e79fe5bc5b6ed1a91c..011dcec620cdc6a931d73014fcc8e619e70fd068 100644 (file)
@@ -1,14 +1,50 @@
 #include "squid.h"
 
 #include "testTokenizer.h"
+#include "CharacterSet.h"
 #include "Tokenizer.h"
 
 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
 
+SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
+    "Host: resource.com\r\n"
+    "Cookie: laijkpk3422r j1noin \r\n"
+    "\r\n");
+const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const Parser::CharacterSet whitespace("whitespace"," ");
+const Parser::CharacterSet crlf("crlf","\r\n");
+const Parser::CharacterSet tab("tab","\t");
 
 void
 testTokenizer::testTokenizerPrefix()
 {
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // successful prefix tokenization
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+    CPPUNIT_ASSERT(t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    //no match (first char is not in the prefix set)
+    CPPUNIT_ASSERT(!t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    // one more match to set S to something meaningful
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+    //no match (no characters from the character set in the prefix)
+    CPPUNIT_ASSERT(!t.prefix(s,tab));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
+
+    // match until the end of the sample
+    Parser::CharacterSet all(alpha);
+    // TODO: finish from here. But += is buggy
+//    all += whitespace;
+//    all += crlf;
+
 }
 
 void
index 8233ac2c3875a3f31e5ee45628978ebd34a59fa5..c4520b7eb04f295706c10f99d48700b16a1ca01f 100644 (file)
@@ -6,7 +6,7 @@
 
 class StatHist;
 
-void recordSBufSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordSBufSizeAtDestruct(SBuf::size_type) {}
 const StatHist * collectSBufDestructTimeStats() STUB_RETVAL(NULL)
-void recordMemBlobSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordMemBlobSizeAtDestruct(SBuf::size_type)  {}
 const StatHist * collectMemBlobDestructTimeStats() STUB_RETVAL(NULL)