class CharacterSet
{
public:
+ //XXX: use unsigned chars?
CharacterSet(const char *label, const char * const c) : name(label) {
- const size_t = strlen(c);
- for (size_t i = 0; i < len; ++i) {
+ size_t clen = strlen(c);
+ for (size_t i = 0; i < clen; ++i) {
chars_[static_cast<uint8_t>(c[i])] = true;
}
}
/// add all characters from the given CharacterSet to this one
const CharacterSet &operator +=(const CharacterSet &src) {
// TODO: iterate src.chars_ vector instead of walking the entire 8-bit space
- for (size_t i = 0; i < 256; ++i)
- chars_[static_cast<uint8_t>(c)] = true;
+ for (uint8_t i = 0; i < 256; ++i)
+ if (src.chars_[i])
+ chars_[i] = true;
return *this;
}
--- /dev/null
+#include "squid.h"
+#include "Tokenizer.h"
+
+namespace Parser {
+
+bool
+Tokenizer::token(SBuf &returnedToken, const CharacterSet &whitespace)
+{
+ //TODO
+ return false;
+}
+
+bool
+Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars)
+{
+ //TODO
+ return false;
+}
+
+bool
+Tokenizer::skip(const CharacterSet &tokenChars)
+{
+ //TODO
+ return false;
+}
+
+bool
+Tokenizer::skip(const SBuf &tokenToSkip)
+{
+ //TODO
+ return false;
+}
+
+bool
+Tokenizer::skip(const char tokenChar)
+{
+ //TODO
+ return false;
+}
+
+SBuf::size_type
+Tokenizer::find_first_in (const CharacterSet &set)
+{
+ SBuf::size_type rv;
+ const SBuf::size_type len=buf_.length();
+ for (rv = 0; rv < len; ++rv)
+ if (set[buf_[rv]])
+ return rv;
+ return SBuf::npos;
+}
+
+SBuf::size_type
+Tokenizer::find_first_not_in (const CharacterSet &set)
+{
+ //TODO
+ return SBuf::npos;
+}
+
+} /* namespace Parser */
--- /dev/null
+#ifndef SQUID_PARSER_TOKENIZER_H_
+#define SQUID_PARSER_TOKENIZER_H_
+
+#include "CharacterSet.h"
+#include "SBuf.h"
+
+namespace Parser {
+
+class Tokenizer {
+public:
+ explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {}
+
+ bool atEnd() const { return !buf_.length(); }
+ const SBuf& remaining() const { return buf_; }
+ void reset(const SBuf &newBuf) { buf_ = newBuf; }
+
+ /* The following methods start from the beginning of the input buffer.
+ * They return true and consume parsed chars if a non-empty token is found.
+ * Otherwise, they return false without any side-effects. */
+
+ /** Basic strtok(3):
+ * Skips all leading delimiters (if any),
+ * accumulates all characters up to the first delimiter (a token), and
+ * skips all trailing delimiters (if any).
+ * Want to extract delimiters? Use three prefix() calls instead.
+ */
+ bool token(SBuf &returnedToken, const CharacterSet &whitespace);
+
+ /// Accumulates all sequential permitted characters (a token).
+ bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars);
+
+ /// Skips all sequential permitted characters (a token).
+ bool skip(const CharacterSet &tokenChars);
+
+ /// Skips a given token.
+ bool skip(const SBuf &tokenToSkip);
+
+ /// Skips a given character (a token).
+ bool skip(const char tokenChar);
+
+private:
+ SBuf buf_; ///< yet unparsed input
+
+ /// find the position of the first character in the set. Return npos if not found
+ SBuf::size_type find_first_in (const CharacterSet &set);
+ SBuf::size_type find_first_not_in (const CharacterSet &set);
+};
+
+
+} /* namespace Parser */
+#endif /* SQUID_PARSER_TOKENIZER_H_ */