#endif
#endif
+
+/// convenience method: consumes up to n bytes, counts, and returns them
+SBuf
+Parser::Tokenizer::consume(const SBuf::size_type n) {
+ // careful: n may be npos!
+ const SBuf result = buf_.consume(n);
+ parsed_ += result.length();
+ return result;
+}
+
+/// convenience method: consume()s up to n bytes and returns their count
+SBuf::size_type
+Parser::Tokenizer::success(const SBuf::size_type n) {
+ return consume(n).length();
+}
+
bool
Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
{
- const SBuf savebuf(buf_);
- skip(delimiters);
+ const Tokenizer saved(*this);
+ skipAll(delimiters);
const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
- if (tokenLen == SBuf::npos && !delimiters['\0']) {
- // no delimiter found, nor is NUL/EOS/npos acceptible as one
- buf_ = savebuf;
+ if (tokenLen == SBuf::npos) {
+ *this = saved;
return false;
}
- const SBuf retval = buf_.consume(tokenLen);
- skip(delimiters);
- returnedToken = retval;
+ returnedToken = consume(tokenLen); // cannot be empty
+ skipAll(delimiters);
return true;
}
const SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
if (prefixLen == 0)
return false;
- returnedToken = buf_.consume(prefixLen);
+ returnedToken = consume(prefixLen);
return true;
}
-bool
-Parser::Tokenizer::skip(const CharacterSet &tokenChars)
+SBuf::size_type
+Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
{
const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
if (prefixLen == 0)
- return false;
- buf_.consume(prefixLen);
- return true;
+ return 0;
+ return success(prefixLen);
+}
+
+bool
+Parser::Tokenizer::skipOne(const CharacterSet &chars)
+{
+ if (!buf_.isEmpty() && chars[buf_[0]])
+ return success(1);
+ return false;
}
bool
Parser::Tokenizer::skip(const SBuf &tokenToSkip)
{
- if (buf_.startsWith(tokenToSkip)) {
- buf_.consume(tokenToSkip.length());
- return true;
- }
+ if (buf_.startsWith(tokenToSkip))
+ return success(tokenToSkip.length());
return false;
}
bool
Parser::Tokenizer::skip(const char tokenChar)
{
- if (buf_[0] == tokenChar) {
- buf_.consume(1);
- return true;
- }
+ if (!buf_.isEmpty() && buf_[0] == tokenChar)
+ return success(1);
return false;
}
acc = -acc;
result = acc;
- buf_.consume(s - buf_.rawContent() -1);
- return true;
+ return success(s - buf_.rawContent() - 1);
}
class Tokenizer
{
public:
- explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {}
+ explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf), parsed_(0) {}
- // return a copy the current contents of the parse buffer
- const SBuf buf() const { return buf_; }
+ /// yet unparsed data
+ SBuf buf() const { return buf_; }
+
+ /// number of parsed bytes, including skipped ones
+ SBuf::size_type parsedSize() const { return parsed_; }
/// whether the end of the buffer has been reached
bool atEnd() const { return buf_.isEmpty(); }
const SBuf& remaining() const { return buf_; }
/// reinitialize processing for a new buffer
- void reset(const SBuf &newBuf) { buf_ = newBuf; }
+ void reset(const SBuf &newBuf) { buf_ = newBuf; parsed_ = 0; }
/** Basic strtok(3):
* Skips all leading delimiters (if any),
- * accumulates all characters up to the next delimiter (a token), and
- * skips all trailing delimiters.
+ * extracts all characters up to the next delimiter (a token), and
+ * skips all trailing delimiters (at least one must be present).
*
* Want to extract delimiters? Use prefix() instead.
*
- * At least one terminating delimiter is required. \0 may be passed
- * as a delimiter to treat end of buffer content as the end of token.
+ * Note that Tokenizer cannot tell whether the trailing delimiters will
+ * continue when/if more input data becomes available later.
*
- * \return false if no terminal delimiter is found.
+ * \return true if found a non-empty token followed by a delimiter
*/
bool token(SBuf &returnedToken, const CharacterSet &delimiters);
- /** Accumulates all sequential permitted characters up to an optional length limit.
+ /** Extracts all sequential permitted characters up to an optional length limit.
+ *
+ * Note that Tokenizer cannot tell whether the prefix will
+ * continue when/if more input data becomes available later.
*
* \retval true one or more characters were found, the sequence (string) is placed in returnedToken
* \retval false no characters from the permitted set were found
*/
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
- /** skips all sequential characters from the set, in any order
- *
- * \return whether one or more characters in the set were found
- */
- bool skip(const CharacterSet &tokenChars);
-
/** skips a given character sequence (string)
*
* \return whether the exact character sequence was found and skipped
/** skips a given single character
*
- * \return whether the character was found and skipped
+ * \return whether the character was skipped
*/
bool skip(const char tokenChar);
- /** parse an unsigned int64_t at the beginning of the buffer
+ /** Skips a single character from the set.
+ *
+ * \return whether a character was skipped
+ */
+ bool skipOne(const CharacterSet &discardables);
+
+ /** Skips all sequential characters from the set, in any order.
+ *
+ * \returns the number of skipped characters
+ */
+ SBuf::size_type skipAll(const CharacterSet &discardables);
+
+ /** Extracts an unsigned int64_t at the beginning of the buffer.
*
* strtoll(3)-alike function: tries to parse unsigned 64-bit integer
* at the beginning of the parse buffer, in the base specified by the user
*/
bool int64(int64_t &result, int base = 0);
+protected:
+ SBuf consume(const SBuf::size_type n);
+ SBuf::size_type success(const SBuf::size_type n);
+
private:
SBuf buf_; ///< yet unparsed input
+ SBuf::size_type parsed_; ///< bytes successfully parsed, including skipped
};
} /* namespace Parser */