#include <vector>
+//#include <iostream>
namespace Parser {
class CharacterSet
public:
//XXX: use unsigned chars?
CharacterSet(const char *label, const char * const c) : name(label) {
+ chars_.reserve(256);
size_t clen = strlen(c);
- for (size_t i = 0; i < clen; ++i) {
+ for (size_t i = 0; i < clen; ++i)
chars_[static_cast<uint8_t>(c[i])] = true;
- }
}
/// whether a given character exists in the set
/// add all characters from the given CharacterSet to this one
const CharacterSet &operator +=(const CharacterSet &src) {
// TODO: iterate src.chars_ vector instead of walking the entire 8-bit space
- for (uint8_t i = 0; i < 256; ++i)
- if (src.chars_[i])
+ for (uint8_t i = 0; i < 256; ++i) {
+ if (src.chars_[i]) {
+// std::cout << static_cast<int>(i) << ',';
chars_[i] = true;
+ }
+ }
return *this;
}
private:
/// characters defined in this set
- std::vector<bool> chars_;
+ std::vector<bool> chars_; //std::vector<bool> is optimized
};
} // namespace Parser
$(SBUF_SOURCE) \
testTokenizer.h \
testTokenizer.cc \
- Tokenizer.h \
+ Tokenizer.h
+nodist_testTokenizer_SOURCES = \
+ $(top_srcdir)/src/tests/testMain.cc \
$(top_srcdir)/src/tests/stub_mem.cc \
$(top_srcdir)/src/tests/stub_debug.cc \
$(top_srcdir)/src/tests/stub_time.cc \
$(top_srcdir)/src/tests/stub_SBufDetailedStats.cc
-nodist_testTokenizer_SOURCES = \
- $(top_srcdir)/src/tests/testMain.cc
testTokenizer_LDFLAGS = $(LIBADD_DL)
testTokenizer_LDADD = \
libsquid-parser.la \
bool
Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
{
- const SBuf::size_type pos=find_first_not_in(tokenChars);
- if (pos == SBuf::npos)
+ SBuf::size_type prefixLen = 0;
+ const SBuf::size_type len=buf_.length();
+ while (prefixLen < len) {
+ if (!tokenChars[buf_[prefixLen]])
+ break;
+ ++prefixLen;
+ }
+ if (prefixLen == 0)
return false;
- //finish
+ returnedToken = buf_.consume(prefixLen);
return true;
}
SBuf::size_type
Tokenizer::find_first_not_in (const CharacterSet &set)
{
- SBuf::size_type rv;
- const SBuf::size_type len=buf_.length();
- for (rv = 0; rv < len; ++rv)
- if (!set[buf_[rv]])
- return rv;
- return SBuf::npos;
+ return 0;
}
} /* namespace Parser */
#include "squid.h"
#include "testTokenizer.h"
+#include "CharacterSet.h"
#include "Tokenizer.h"
CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
+SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
+ "Host: resource.com\r\n"
+ "Cookie: laijkpk3422r j1noin \r\n"
+ "\r\n");
+const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const Parser::CharacterSet whitespace("whitespace"," ");
+const Parser::CharacterSet crlf("crlf","\r\n");
+const Parser::CharacterSet tab("tab","\t");
void
testTokenizer::testTokenizerPrefix()
{
+ Parser::Tokenizer t(text);
+ SBuf s;
+
+ // successful prefix tokenization
+ CPPUNIT_ASSERT(t.prefix(s,alpha));
+ CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
+ CPPUNIT_ASSERT(t.prefix(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+ //no match (first char is not in the prefix set)
+ CPPUNIT_ASSERT(!t.prefix(s,whitespace));
+ CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
+
+ // one more match to set S to something meaningful
+ CPPUNIT_ASSERT(t.prefix(s,alpha));
+ CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
+
+ //no match (no characters from the character set in the prefix)
+ CPPUNIT_ASSERT(!t.prefix(s,tab));
+ CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
+
+ // match until the end of the sample
+ Parser::CharacterSet all(alpha);
+ // TODO: finish from here. But += is buggy
+// all += whitespace;
+// all += crlf;
+
}
void
class StatHist;
-void recordSBufSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordSBufSizeAtDestruct(SBuf::size_type) {}
const StatHist * collectSBufDestructTimeStats() STUB_RETVAL(NULL)
-void recordMemBlobSizeAtDestruct(SBuf::size_type) STUB_NOP
+void recordMemBlobSizeAtDestruct(SBuf::size_type) {}
const StatHist * collectMemBlobDestructTimeStats() STUB_RETVAL(NULL)