]> git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.h
Implement Parser::Tokenizer::int64 and unit tests
[thirdparty/squid.git] / src / parser / Tokenizer.h
1 #ifndef SQUID_PARSER_TOKENIZER_H_
2 #define SQUID_PARSER_TOKENIZER_H_
3
4 #include "base/CharacterSet.h"
5 #include "SBuf.h"
6
7 /// Generic protocol-agnostic parsing tools
8 namespace Parser {
9
10 /**
11 * Lexical processor to tokenize a buffer.
12 *
13 * Allows arbitrary delimiters and token character sets to
14 * be provided by callers.
15 *
16 * All methods start from the beginning of the input buffer.
17 * Methods returning true consume bytes from the buffer.
18 * Methods returning false have no side-effects.
19 */
20 class Tokenizer {
21 public:
22 explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf) {}
23
24 // return a copy the current contents of the parse buffer
25 const SBuf buf() const { return buf_; }
26
27 /// whether the end of the buffer has been reached
28 bool atEnd() const { return buf_.isEmpty(); }
29
30 /// the remaining unprocessed section of buffer
31 const SBuf& remaining() const { return buf_; }
32
33 /// reinitialize processing for a new buffer
34 void reset(const SBuf &newBuf) { buf_ = newBuf; }
35
36 /** Basic strtok(3):
37 * Skips all leading delimiters (if any),
38 * accumulates all characters up to the next delimiter (a token), and
39 * skips all trailing delimiters (if any).
40 *
41 * Want to extract delimiters? Use prefix() instead.
42 */
43 bool token(SBuf &returnedToken, const CharacterSet &delimiters);
44
45 /** Accumulates all sequential permitted characters up to an optional length limit.
46 *
47 * \retval true one or more characters were found, the sequence (string) is placed in returnedToken
48 * \retval false no characters from the permitted set were found
49 */
50 bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
51
52 /** skips all sequential characters from the set, in any order
53 *
54 * \return whether one or more characters in the set were found
55 */
56 bool skip(const CharacterSet &tokenChars);
57
58 /** skips a given character sequence (string)
59 *
60 * \return whether the exact character sequence was found and skipped
61 */
62 bool skip(const SBuf &tokenToSkip);
63
64 /** skips a given single character
65 *
66 * \return whether the character was found and skipped
67 */
68 bool skip(const char tokenChar);
69
70 /** parse an unsigned int64_t at the beginning of the buffer
71 *
72 * strtoll(3)-alike function: tries to parse unsigned 64-bit integer
73 * at the beginning of the parse buffer, in the base specified by the user
74 * or guesstimated; consumes the parsed characters.
75 *
76 * \param result output value. Not touched if parseing is unsuccessful
77 * \param base specify base to do the parsing in. Admitted values are
78 * 8, 10, 16 and 0, the latter specifying to use C syntax for guessing
79 * the base
80 * \return true if the parsing was successful
81 */
82 bool int64 (int64_t &result, int base = 0);
83
84 private:
85 SBuf buf_; ///< yet unparsed input
86 };
87
88 } /* namespace Parser */
89
90 #endif /* SQUID_PARSER_TOKENIZER_H_ */