From: Francesco Chemolli Date: Thu, 12 Dec 2013 16:52:02 +0000 (+0100) Subject: interim: better adhere to CharacterSet API, first stubs of Tokenizer X-Git-Tag: merge-candidate-3-v1~506^2~109 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=279383aa68bec86a35e73d9c464413d80908475e;p=thirdparty%2Fsquid.git interim: better adhere to CharacterSet API, first stubs of Tokenizer --- diff --git a/src/parser/CharacterSet.h b/src/parser/CharacterSet.h index 5a24d8ef3f..cc8b1df4fc 100644 --- a/src/parser/CharacterSet.h +++ b/src/parser/CharacterSet.h @@ -8,9 +8,10 @@ namespace Parser { class CharacterSet { public: + //XXX: use unsigned chars? CharacterSet(const char *label, const char * const c) : name(label) { - const size_t = strlen(c); - for (size_t i = 0; i < len; ++i) { + size_t clen = strlen(c); + for (size_t i = 0; i < clen; ++i) { chars_[static_cast(c[i])] = true; } } @@ -24,8 +25,9 @@ public: /// add all characters from the given CharacterSet to this one const CharacterSet &operator +=(const CharacterSet &src) { // TODO: iterate src.chars_ vector instead of walking the entire 8-bit space - for (size_t i = 0; i < 256; ++i) - chars_[static_cast(c)] = true; + for (uint8_t i = 0; i < 256; ++i) + if (src.chars_[i]) + chars_[i] = true; return *this; } diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index e877f0bece..fbd995f812 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -3,4 +3,7 @@ include $(top_srcdir)/src/Common.am noinst_LTLIBRARIES = libsquid-parser.la libsquid_parser_la_SOURCES = \ - CharacterSet.h + CharacterSet.h \ + Tokenizer.h \ + Tokenizer.cc + diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc new file mode 100644 index 0000000000..632b4a348a --- /dev/null +++ b/src/parser/Tokenizer.cc @@ -0,0 +1,59 @@ +#include "squid.h" +#include "Tokenizer.h" + +namespace Parser { + +bool +Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace) +{ + //TODO + return false; +} + +bool +Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars) +{ + //TODO + return false; +} + +bool +Tokenizer::skip(const CharacterSet &tokenChars) +{ + //TODO + return false; +} + +bool +Tokenizer::skip(const SBuf &tokenToSkip) +{ + //TODO + return false; +} + +bool +Tokenizer::skip(const char tokenChar) +{ + //TODO + return false; +} + +SBuf::size_type +Tokenizer::find_first_in (const CharacterSet &set) +{ + SBuf::size_type rv; + const SBuf::size_type len=buf_.length(); + for (rv = 0; rv < len; ++rv) + if (set[buf_[rv]]) + return rv; + return SBuf::npos; +} + +SBuf::size_type +Tokenizer::find_first_not_in (const CharacterSet &set) +{ + //TODO + return SBuf::npos; +} + +} /* namespace Parser */ diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h new file mode 100644 index 0000000000..955878b414 --- /dev/null +++ b/src/parser/Tokenizer.h @@ -0,0 +1,51 @@ +#ifndef SQUID_PARSER_TOKENIZER_H_ +#define SQUID_PARSER_TOKENIZER_H_ + +#include "CharacterSet.h" +#include "SBuf.h" + +namespace Parser { + +class Tokenizer { +public: + explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {} + + bool atEnd() const { return !buf_.length(); } + const SBuf& remaining() const { return buf_; } + void reset(const SBuf &newBuf) { buf_ = newBuf; } + + /* The following methods start from the beginning of the input buffer. + * They return true and consume parsed chars if a non-empty token is found. + * Otherwise, they return false without any side-effects. */ + + /** Basic strtok(3): + * Skips all leading delimiters (if any), + * accumulates all characters up to the first delimiter (a token), and + * skips all trailing delimiters (if any). + * Want to extract delimiters? Use three prefix() calls instead. + */ + bool token(SBuf &returnedToken, const CharacterSet &whitespace); + + /// Accumulates all sequential permitted characters (a token). + bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars); + + /// Skips all sequential permitted characters (a token). + bool skip(const CharacterSet &tokenChars); + + /// Skips a given token. + bool skip(const SBuf &tokenToSkip); + + /// Skips a given character (a token). + bool skip(const char tokenChar); + +private: + SBuf buf_; ///< yet unparsed input + + /// find the position of the first character in the set. Return npos if not found + SBuf::size_type find_first_in (const CharacterSet &set); + SBuf::size_type find_first_not_in (const CharacterSet &set); +}; + + +} /* namespace Parser */ +#endif /* SQUID_PARSER_TOKENIZER_H_ */