From: Francesco Chemolli Date: Thu, 12 Dec 2013 23:12:29 +0000 (+0100) Subject: Reimplemented prefix, implemented prefix matching unit test X-Git-Tag: merge-candidate-3-v1~506^2~105 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f51e6fc202c4afce094a6f33f3a1b59f5e935689;p=thirdparty%2Fsquid.git Reimplemented prefix, implemented prefix matching unit test --- diff --git a/src/parser/CharacterSet.h b/src/parser/CharacterSet.h index cc8b1df4fc..f1c6714980 100644 --- a/src/parser/CharacterSet.h +++ b/src/parser/CharacterSet.h @@ -3,6 +3,7 @@ #include +//#include namespace Parser { class CharacterSet @@ -10,10 +11,10 @@ class CharacterSet public: //XXX: use unsigned chars? CharacterSet(const char *label, const char * const c) : name(label) { + chars_.reserve(256); size_t clen = strlen(c); - for (size_t i = 0; i < clen; ++i) { + for (size_t i = 0; i < clen; ++i) chars_[static_cast(c[i])] = true; - } } /// whether a given character exists in the set @@ -25,9 +26,12 @@ public: /// add all characters from the given CharacterSet to this one const CharacterSet &operator +=(const CharacterSet &src) { // TODO: iterate src.chars_ vector instead of walking the entire 8-bit space - for (uint8_t i = 0; i < 256; ++i) - if (src.chars_[i]) + for (uint8_t i = 0; i < 256; ++i) { + if (src.chars_[i]) { +// std::cout << static_cast(i) << ','; chars_[i] = true; + } + } return *this; } @@ -36,7 +40,7 @@ public: private: /// characters defined in this set - std::vector chars_; + std::vector chars_; //std::vector is optimized }; } // namespace Parser diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 9679cb0993..82cc83f534 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -32,13 +32,13 @@ testTokenizer_SOURCES = \ $(SBUF_SOURCE) \ testTokenizer.h \ testTokenizer.cc \ - Tokenizer.h \ + Tokenizer.h +nodist_testTokenizer_SOURCES = \ + $(top_srcdir)/src/tests/testMain.cc \ $(top_srcdir)/src/tests/stub_mem.cc \ $(top_srcdir)/src/tests/stub_debug.cc \ $(top_srcdir)/src/tests/stub_time.cc \ $(top_srcdir)/src/tests/stub_SBufDetailedStats.cc -nodist_testTokenizer_SOURCES = \ - $(top_srcdir)/src/tests/testMain.cc testTokenizer_LDFLAGS = $(LIBADD_DL) testTokenizer_LDADD = \ libsquid-parser.la \ diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 13c5f5059f..a0ade50b2e 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -13,10 +13,16 @@ Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace) bool Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars) { - const SBuf::size_type pos=find_first_not_in(tokenChars); - if (pos == SBuf::npos) + SBuf::size_type prefixLen = 0; + const SBuf::size_type len=buf_.length(); + while (prefixLen < len) { + if (!tokenChars[buf_[prefixLen]]) + break; + ++prefixLen; + } + if (prefixLen == 0) return false; - //finish + returnedToken = buf_.consume(prefixLen); return true; } @@ -55,12 +61,7 @@ Tokenizer::find_first_in (const CharacterSet &set) SBuf::size_type Tokenizer::find_first_not_in (const CharacterSet &set) { - SBuf::size_type rv; - const SBuf::size_type len=buf_.length(); - for (rv = 0; rv < len; ++rv) - if (!set[buf_[rv]]) - return rv; - return SBuf::npos; + return 0; } } /* namespace Parser */ diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc index 949779666f..011dcec620 100644 --- a/src/parser/testTokenizer.cc +++ b/src/parser/testTokenizer.cc @@ -1,14 +1,50 @@ #include "squid.h" #include "testTokenizer.h" +#include "CharacterSet.h" #include "Tokenizer.h" CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); +SBuf text("GET http://resource.com/path HTTP/1.1\r\n" + "Host: resource.com\r\n" + "Cookie: laijkpk3422r j1noin \r\n" + "\r\n"); +const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxzABCDEFGHIJKLMNOPQRSTUVWXYZ"); +const Parser::CharacterSet whitespace("whitespace"," "); +const Parser::CharacterSet crlf("crlf","\r\n"); +const Parser::CharacterSet tab("tab","\t"); void testTokenizer::testTokenizerPrefix() { + Parser::Tokenizer t(text); + SBuf s; + + // successful prefix tokenization + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); + CPPUNIT_ASSERT(t.prefix(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); + + //no match (first char is not in the prefix set) + CPPUNIT_ASSERT(!t.prefix(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); + + // one more match to set S to something meaningful + CPPUNIT_ASSERT(t.prefix(s,alpha)); + CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); + + //no match (no characters from the character set in the prefix) + CPPUNIT_ASSERT(!t.prefix(s,tab)); + CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched + + // match until the end of the sample + Parser::CharacterSet all(alpha); + // TODO: finish from here. But += is buggy +// all += whitespace; +// all += crlf; + } void diff --git a/src/tests/stub_SBufDetailedStats.cc b/src/tests/stub_SBufDetailedStats.cc index 8233ac2c38..c4520b7eb0 100644 --- a/src/tests/stub_SBufDetailedStats.cc +++ b/src/tests/stub_SBufDetailedStats.cc @@ -6,7 +6,7 @@ class StatHist; -void recordSBufSizeAtDestruct(SBuf::size_type) STUB_NOP +void recordSBufSizeAtDestruct(SBuf::size_type) {} const StatHist * collectSBufDestructTimeStats() STUB_RETVAL(NULL) -void recordMemBlobSizeAtDestruct(SBuf::size_type) STUB_NOP +void recordMemBlobSizeAtDestruct(SBuf::size_type) {} const StatHist * collectMemBlobDestructTimeStats() STUB_RETVAL(NULL)