From 1d53a60aae678005c7f133352ef42f85e0f5e785 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Mon, 11 Aug 2014 11:14:15 -0600 Subject: [PATCH] Fixed Parser::Tokenizer::prefix() to return false on all empty prefixes. The code was returning true and sometimes even setting the returnedToken to a non-empty string when no permitted tokenChars were found. Added unit test cases to catch similar bugs in the future. --- src/parser/Tokenizer.cc | 4 +++- src/parser/testTokenizer.cc | 28 +++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index a7dbdba6b6..c9c8959dbe 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -66,7 +66,9 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c const SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars); if (prefixLen == 0) return false; - returnedToken = consume(prefixLen); + if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) + return false; + returnedToken = consume(prefixLen); // cannot be empty after the npos check return true; } diff --git a/src/parser/testTokenizer.cc b/src/parser/testTokenizer.cc index 7443e07eeb..b730a9429c 100644 --- a/src/parser/testTokenizer.cc +++ b/src/parser/testTokenizer.cc @@ -18,9 +18,26 @@ const CharacterSet numbers("numbers","0123456789"); void testTokenizer::testTokenizerPrefix() { + const SBuf canary("This text should not be changed."); + Parser::Tokenizer t(text); SBuf s; + CharacterSet all(whitespace); + all += alpha; + all += crlf; + all += numbers; + all.add(':').add('.').add('/'); + + // an empty prefix should return false (the full output buffer case) + s = canary; + const SBuf before = t.remaining(); + CPPUNIT_ASSERT(!t.prefix(s, all, 0)); + // ... and a false return value means no parameter changes + CPPUNIT_ASSERT_EQUAL(canary, s); + // ... and a false return value means no input buffer changes + CPPUNIT_ASSERT_EQUAL(before, t.remaining()); + // successful prefix tokenization CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); @@ -40,13 +57,14 @@ testTokenizer::testTokenizerPrefix() CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched // match until the end of the sample - CharacterSet all(whitespace); - all += alpha; - all += crlf; - all += numbers; - all.add(':').add('.').add('/'); CPPUNIT_ASSERT(t.prefix(s,all)); CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); + + // empty prefix should return false (the empty input buffer case) + s = canary; + CPPUNIT_ASSERT(!t.prefix(s, all)); + // ... and a false return value means no parameter changes + CPPUNIT_ASSERT_EQUAL(canary, s); } void -- 2.47.3