From: Francesco Chemolli Date: Fri, 13 Dec 2013 18:22:08 +0000 (+0100) Subject: Implemented Tokenizer::token and its unit test X-Git-Tag: merge-candidate-3-v1~506^2~99 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a6fa38f5fa69d86313f7f1e658f0f55eb49c4ca5;p=thirdparty%2Fsquid.git Implemented Tokenizer::token and its unit test --- diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 716b204a5b..709ad31af7 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -4,9 +4,9 @@ namespace Parser { SBuf::size_type -Tokenizer::findPrefixLen(const CharacterSet& tokenChars) +Tokenizer::findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos) { - SBuf::size_type prefixLen = 0; + SBuf::size_type prefixLen = startAtPos; const SBuf::size_type len = buf_.length(); while (prefixLen < len) { if (!tokenChars[buf_[prefixLen]]) @@ -17,31 +17,38 @@ Tokenizer::findPrefixLen(const CharacterSet& tokenChars) } SBuf::size_type -Tokenizer::findFirstOf(const CharacterSet& tokenChars) +Tokenizer::findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos) { - SBuf::size_type s = 0; + SBuf::size_type i = startAtPos; const SBuf::size_type len = buf_.length(); bool found = false; - while (s < len) { - if (tokenChars[buf_[prefixLen]]) { + while (i < len) { + if (tokenChars[buf_[i]]) { found = true; break; } - ++s; + ++i; } + return found ? i : SBuf::npos ; } bool Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace) { - //TODO - return false; + const SBuf::size_type endOfPreWhiteSpace = findFirstNotIn(whitespace); + const SBuf::size_type endOfToken = findFirstIn(whitespace, endOfPreWhiteSpace); + if (endOfToken == SBuf::npos) + return false; + buf_.consume(endOfPreWhiteSpace); + returnedToken = buf_.consume(endOfToken - endOfPreWhiteSpace); + skip(whitespace); + return true; } bool Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars) { - SBuf::size_type prefixLen = findPrefixLen(tokenChars); + SBuf::size_type prefixLen = findFirstNotIn(tokenChars); if (prefixLen == 0) return false; returnedToken = buf_.consume(prefixLen); @@ -51,7 +58,7 @@ Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars) bool Tokenizer::skip(const CharacterSet &tokenChars) { - SBuf::size_type prefixLen = findPrefixLen(tokenChars); + SBuf::size_type prefixLen = findFirstNotIn(tokenChars); if (prefixLen == 0) return false; buf_.consume(prefixLen); diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index 66a78460b7..9e73521d4e 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -40,8 +40,8 @@ public: protected: //obtain the length of the longest prefix in buf_ only made of chars in tokenChars - SBuf::size_type findPrefixLen(const CharacterSet& tokenChars); - SBuf::size_type findFirstOf(const CharacterSet& tokenChars); + SBuf::size_type findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0); + SBuf::size_type findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0); private: SBuf buf_; ///< yet unparsed input diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc index afb6d754d1..eac7415021 100644 --- a/src/parser/testTokenizer.cc +++ b/src/parser/testTokenizer.cc @@ -11,7 +11,7 @@ SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n"); const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); -const Parser::CharacterSet whitespace("whitespace"," "); +const Parser::CharacterSet whitespace("whitespace"," \r\n"); const Parser::CharacterSet crlf("crlf","\r\n"); const Parser::CharacterSet tab("tab","\t"); const Parser::CharacterSet numbers("numbers","0123456789"); @@ -83,6 +83,21 @@ testTokenizer::testTokenizerSkip() void testTokenizer::testTokenizerToken() { + Parser::Tokenizer t(text); + SBuf s; + + // first scenario: patterns match + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); + CPPUNIT_ASSERT(t.token(s,whitespace)); + CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); + + //no separator found + CPPUNIT_ASSERT(!t.token(s,tab)); }