+++ /dev/null
-#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
-#define _SQUID_SRC_PARSER_CHARACTERSET_H
-
-#include <vector>
-
-namespace Parser {
-
-class CharacterSet
-{
-public:
- //XXX: use unsigned chars?
- CharacterSet(const char *label, const char * const c) : name(label), chars_(std::vector<bool>(256,false)) {
- size_t clen = strlen(c);
- for (size_t i = 0; i < clen; ++i)
- chars_[static_cast<uint8_t>(c[i])] = true;
- }
-
- /// whether a given character exists in the set
- bool operator[](char c) const {return chars_[static_cast<uint8_t>(c)];}
-
- /// add a given char to the character set
- CharacterSet & add(const char c) {chars_[static_cast<uint8_t>(c)] = true; return *this; }
-
- /// add all characters from the given CharacterSet to this one
- const CharacterSet &operator +=(const CharacterSet &src) {
- //precondition: src.chars_.size() == chars_.size()
- std::vector<bool>::const_iterator s = src.chars_.begin();
- const std::vector<bool>::const_iterator e = src.chars_.end();
- std::vector<bool>::iterator d = chars_.begin();
- while (s != e) {
- if (*s)
- *d = true;
- ++s;
- ++d;
- }
- return *this;
- }
-
- /// name of this character set
- const char * name;
-
-private:
- /// characters defined in this set
- std::vector<bool> chars_; //std::vector<bool> is optimized
-};
-
-} // namespace Parser
-
-#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
noinst_LTLIBRARIES = libsquid-parser.la
libsquid_parser_la_SOURCES = \
- CharacterSet.h \
Tokenizer.h \
Tokenizer.cc
SBUF_SOURCE= \
+ $(top_srcdir)/base/CharacterSet.h \
$(top_srcdir)/src/SBuf.h \
$(top_srcdir)/src/SBuf.cc \
$(top_srcdir)/src/MemBlob.h \
testTokenizer_LDADD = \
libsquid-parser.la \
$(top_builddir)/lib/libmiscutil.la \
+ $(top_builddir)/src/base/libbase.la \
$(SQUID_CPPUNIT_LIBS) \
$(SQUID_CPPUNIT_LA) \
$(COMPAT_LIB)
namespace Parser {
-SBuf::size_type
-Tokenizer::findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos)
-{
- SBuf::size_type prefixLen = startAtPos;
- const SBuf::size_type len = buf_.length();
- while (prefixLen < len) {
- if (!tokenChars[buf_[prefixLen]])
- break;
- ++prefixLen;
- }
- return prefixLen;
-}
-
-SBuf::size_type
-Tokenizer::findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos)
-{
- SBuf::size_type i = startAtPos;
- const SBuf::size_type len = buf_.length();
- bool found = false;
- while (i < len) {
- if (tokenChars[buf_[i]]) {
- found = true;
- break;
- }
- ++i;
- }
- return found ? i : SBuf::npos ;
-}
-
bool
Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
{
- const SBuf::size_type endOfPreWhiteSpace = findFirstNotIn(whitespace);
- const SBuf::size_type endOfToken = findFirstIn(whitespace, endOfPreWhiteSpace);
+ const SBuf::size_type endOfPreWhiteSpace = buf_.findFirstNotOf(whitespace);
+ const SBuf::size_type endOfToken = buf_.findFirstOf(whitespace, endOfPreWhiteSpace);
if (endOfToken == SBuf::npos)
return false;
buf_.consume(endOfPreWhiteSpace);
bool
Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
{
- SBuf::size_type prefixLen = findFirstNotIn(tokenChars);
+ SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
if (prefixLen == 0)
return false;
returnedToken = buf_.consume(prefixLen);
bool
Tokenizer::skip(const CharacterSet &tokenChars)
{
- SBuf::size_type prefixLen = findFirstNotIn(tokenChars);
+ SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
if (prefixLen == 0)
return false;
buf_.consume(prefixLen);
#ifndef SQUID_PARSER_TOKENIZER_H_
#define SQUID_PARSER_TOKENIZER_H_
-#include "CharacterSet.h"
+#include "base/CharacterSet.h"
#include "SBuf.h"
namespace Parser {
/// Skips a given character (a token).
bool skip(const char tokenChar);
-protected:
- //obtain the length of the longest prefix in buf_ only made of chars in tokenChars
- SBuf::size_type findFirstNotIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0);
- SBuf::size_type findFirstIn(const CharacterSet& tokenChars, SBuf::size_type startAtPos = 0);
-
private:
SBuf buf_; ///< yet unparsed input
};
#include "squid.h"
#include "testTokenizer.h"
-#include "CharacterSet.h"
+#include "base/CharacterSet.h"
#include "Tokenizer.h"
CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
"Host: resource.com\r\n"
"Cookie: laijkpk3422r j1noin \r\n"
"\r\n");
-const Parser::CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
-const Parser::CharacterSet whitespace("whitespace"," \r\n");
-const Parser::CharacterSet crlf("crlf","\r\n");
-const Parser::CharacterSet tab("tab","\t");
-const Parser::CharacterSet numbers("numbers","0123456789");
+const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+const CharacterSet whitespace("whitespace"," \r\n");
+const CharacterSet crlf("crlf","\r\n");
+const CharacterSet tab("tab","\t");
+const CharacterSet numbers("numbers","0123456789");
void
testTokenizer::testTokenizerPrefix()
CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
// match until the end of the sample
- Parser::CharacterSet all(whitespace);
+ CharacterSet all(whitespace);
all += alpha;
all += crlf;
all += numbers;
//no separator found
CPPUNIT_ASSERT(!t.token(s,tab));
-
}
void