namespace Parser {
SBuf::size_type
-Tokenizer::findPrefixLen(const CharacterSet& tokenChars)
+Tokenizer::findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos)
{
- SBuf::size_type prefixLen = 0;
+ SBuf::size_type prefixLen = startAtPos;
const SBuf::size_type len = buf_.length();
while (prefixLen < len) {
if (!tokenChars[buf_[prefixLen]])
}
SBuf::size_type
-Tokenizer::findFirstOf(const CharacterSet& tokenChars)
+Tokenizer::findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos)
{
- SBuf::size_type s = 0;
+ SBuf::size_type i = startAtPos;
const SBuf::size_type len = buf_.length();
bool found = false;
- while (s < len) {
- if (tokenChars[buf_[prefixLen]]) {
+ while (i < len) {
+ if (tokenChars[buf_[i]]) {
found = true;
break;
}
- ++s;
+ ++i;
}
+ return found ? i : SBuf::npos ;
}
bool
Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
{
- //TODO
- return false;
+ const SBuf::size_type endOfPreWhiteSpace = findFirstNotIn(whitespace);
+ const SBuf::size_type endOfToken = findFirstIn(whitespace, endOfPreWhiteSpace);
+ if (endOfToken == SBuf::npos)
+ return false;
+ buf_.consume(endOfPreWhiteSpace);
+ returnedToken = buf_.consume(endOfToken - endOfPreWhiteSpace);
+ skip(whitespace);
+ return true;
}
bool
Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
{
- SBuf::size_type prefixLen = findPrefixLen(tokenChars);
+ SBuf::size_type prefixLen = findFirstNotIn(tokenChars);
if (prefixLen == 0)
return false;
returnedToken = buf_.consume(prefixLen);
bool
Tokenizer::skip(const CharacterSet &tokenChars)
{
- SBuf::size_type prefixLen = findPrefixLen(tokenChars);
+ SBuf::size_type prefixLen = findFirstNotIn(tokenChars);
if (prefixLen == 0)
return false;
buf_.consume(prefixLen);
protected:
//obtain the length of the longest prefix in buf_ only made of chars in tokenChars
- SBuf::size_type findPrefixLen(const CharacterSet& tokenChars);
- SBuf::size_type findFirstOf(const CharacterSet& tokenChars);
+ SBuf::size_type findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0);
+ SBuf::size_type findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0);
private:
SBuf buf_; ///< yet unparsed input
"Cookie: laijkpk3422r j1noin \r\n"
"\r\n");
const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
-const Parser::CharacterSet whitespace("whitespace"," ");
+const Parser::CharacterSet whitespace("whitespace"," \r\n");
const Parser::CharacterSet crlf("crlf","\r\n");
const Parser::CharacterSet tab("tab","\t");
const Parser::CharacterSet numbers("numbers","0123456789");
void
testTokenizer::testTokenizerToken()
{
+ Parser::Tokenizer t(text);
+ SBuf s;
+
+ // first scenario: patterns match
+ CPPUNIT_ASSERT(t.token(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+ CPPUNIT_ASSERT(t.token(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
+ CPPUNIT_ASSERT(t.token(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
+ CPPUNIT_ASSERT(t.token(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
+
+ //no separator found
+ CPPUNIT_ASSERT(!t.token(s,tab));
}