Reimplemented prefix, implemented prefix matching unit test

author Francesco Chemolli <kinkie@squid-cache.org>

Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)

committer Francesco Chemolli <kinkie@squid-cache.org>

Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
author Francesco Chemolli <kinkie@squid-cache.org>
Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
committer Francesco Chemolli <kinkie@squid-cache.org>
Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
diff --git a/src/parser/CharacterSet.h b/src/parser/CharacterSet.h

index cc8b1df4fc12705e0f0dab3a68b0fff763a29192..f1c6714980dc00119933f7f5fc2b33e89fdedb5a 100644 (file)
--- a/src/parser/CharacterSet.h
+++ b/src/parser/CharacterSet.h
@@ -3,6 +3,7 @@
  
  #include <vector>
  
+//#include <iostream>
  namespace Parser {
  
  class CharacterSet
@@ -10,10 +11,10 @@ class CharacterSet
  public:
      //XXX: use unsigned chars?
      CharacterSet(const char *label, const char * const c) : name(label) {
+        chars_.reserve(256);
          size_t clen = strlen(c);
-        for (size_t i = 0; i < clen; ++i) {
+        for (size_t i = 0; i < clen; ++i)
              chars_[static_cast<uint8_t>(c[i])] = true;
-        }
      }
  
      /// whether a given character exists in the set
@@ -25,9 +26,12 @@ public:
      /// add all characters from the given CharacterSet to this one
      const CharacterSet &operator +=(const CharacterSet &src) {
          // TODO: iterate src.chars_ vector instead of walking the entire 8-bit space
-        for (uint8_t i = 0; i < 256; ++i)
-            if (src.chars_[i])
+        for (uint8_t i = 0; i < 256; ++i) {
+            if (src.chars_[i]) {
+//                std::cout << static_cast<int>(i) << ',';
                  chars_[i] = true;
+            }
+        }
          return *this;
      }
  
@@ -36,7 +40,7 @@ public:
  
  private:
      /// characters defined in this set
-    std::vector<bool> chars_;
+    std::vector<bool> chars_; //std::vector<bool> is optimized
  };
  
  } // namespace Parser
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am

index 9679cb099302112ef8eb877f2abcc6881b6fb680..82cc83f53406711d9cceb88bb1287f938e0b86a7 100644 (file)
--- a/src/parser/Makefile.am
+++ b/src/parser/Makefile.am
@@ -32,13 +32,13 @@ testTokenizer_SOURCES = \
         $(SBUF_SOURCE) \
         testTokenizer.h \
         testTokenizer.cc \
-       Tokenizer.h \
+       Tokenizer.h
+nodist_testTokenizer_SOURCES = \
+       $(top_srcdir)/src/tests/testMain.cc \
         $(top_srcdir)/src/tests/stub_mem.cc \
         $(top_srcdir)/src/tests/stub_debug.cc \
         $(top_srcdir)/src/tests/stub_time.cc \
         $(top_srcdir)/src/tests/stub_SBufDetailedStats.cc
-nodist_testTokenizer_SOURCES = \
-       $(top_srcdir)/src/tests/testMain.cc
  testTokenizer_LDFLAGS = $(LIBADD_DL)
  testTokenizer_LDADD = \
         libsquid-parser.la \
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc

index 13c5f5059f312467692140475ac076348d623612..a0ade50b2e0f4fa1be54a020991f4c16b0377730 100644 (file)
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -13,10 +13,16 @@ Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
  bool
  Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
  {
-    const SBuf::size_type pos=find_first_not_in(tokenChars);
-    if (pos == SBuf::npos)
+    SBuf::size_type prefixLen = 0;
+    const SBuf::size_type len=buf_.length();
+    while (prefixLen < len) {
+        if (!tokenChars[buf_[prefixLen]])
+            break;
+        ++prefixLen;
+    }
+    if (prefixLen == 0)
          return false;
-    //finish
+    returnedToken = buf_.consume(prefixLen);
      return true;
  }
  
@@ -55,12 +61,7 @@ Tokenizer::find_first_in (const CharacterSet &set)
  SBuf::size_type
  Tokenizer::find_first_not_in (const CharacterSet &set)
  {
-    SBuf::size_type rv;
-    const SBuf::size_type len=buf_.length();
-    for (rv = 0; rv < len; ++rv)
-        if (!set[buf_[rv]])
-            return rv;
-    return SBuf::npos;
+    return 0;
  }
  
  } /* namespace Parser */
diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc

index 949779666fac76d9f12239e79fe5bc5b6ed1a91c..011dcec620cdc6a931d73014fcc8e619e70fd068 100644 (file)
--- a/src/parser/testTokenizer.cc
+++ b/src/parser/testTokenizer.cc
@@ -1,14 +1,50 @@
  #include "squid.h"
  
  #include "testTokenizer.h"
+#include "CharacterSet.h"
  #include "Tokenizer.h"
  
  CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
  
+SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
+    "Host: resource.com\r\n"
+    "Cookie: laijkpk3422r j1noin \r\n"
+    "\r\n");
+const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const Parser::CharacterSet whitespace("whitespace"," ");
+const Parser::CharacterSet crlf("crlf","\r\n");
+const Parser::CharacterSet tab("tab","\t");
  
  void
  testTokenizer::testTokenizerPrefix()
  {
+    Parser::Tokenizer t(text);
+    SBuf s;
+
+    // successful prefix tokenization
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+    CPPUNIT_ASSERT(t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    //no match (first char is not in the prefix set)
+    CPPUNIT_ASSERT(!t.prefix(s,whitespace));
+    CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+    // one more match to set S to something meaningful
+    CPPUNIT_ASSERT(t.prefix(s,alpha));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+    //no match (no characters from the character set in the prefix)
+    CPPUNIT_ASSERT(!t.prefix(s,tab));
+    CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
+
+    // match until the end of the sample
+    Parser::CharacterSet all(alpha);
+    // TODO: finish from here. But += is buggy
+//    all += whitespace;
+//    all += crlf;
+
  }
  
  void
diff --git a/src/tests/stub_SBufDetailedStats.cc b/src/tests/stub_SBufDetailedStats.cc

index 8233ac2c3875a3f31e5ee45628978ebd34a59fa5..c4520b7eb04f295706c10f99d48700b16a1ca01f 100644 (file)
--- a/src/tests/stub_SBufDetailedStats.cc
+++ b/src/tests/stub_SBufDetailedStats.cc
@@ -6,7 +6,7 @@
  
  class StatHist;
  
-void recordSBufSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordSBufSizeAtDestruct(SBuf::size_type) {}
  const StatHist * collectSBufDestructTimeStats() STUB_RETVAL(NULL)
-void recordMemBlobSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordMemBlobSizeAtDestruct(SBuf::size_type)  {}
  const StatHist * collectMemBlobDestructTimeStats() STUB_RETVAL(NULL)
author	Francesco Chemolli <kinkie@squid-cache.org>
	Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
committer	Francesco Chemolli <kinkie@squid-cache.org>
	Thu, 12 Dec 2013 23:12:29 +0000 (00:12 +0100)
src/parser/CharacterSet.h		patch \| blob \| blame \| history
src/parser/Makefile.am		patch \| blob \| blame \| history
src/parser/Tokenizer.cc		patch \| blob \| blame \| history
src/parser/testTokenizer.cc		patch \| blob \| blame \| history
src/tests/stub_SBufDetailedStats.cc		patch \| blob \| blame \| history