]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.h
7bae1ccbb481d282cd035c2d8f16e6cba0c87e55
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef SQUID_PARSER_TOKENIZER_H_
10 #define SQUID_PARSER_TOKENIZER_H_
12 #include "base/CharacterSet.h"
13 #include "sbuf/SBuf.h"
15 /// Generic protocol-agnostic parsing tools
20 * Lexical processor to tokenize a buffer.
22 * Allows arbitrary delimiters and token character sets to
23 * be provided by callers.
25 * All methods start from the beginning of the input buffer.
26 * Methods returning true consume bytes from the buffer.
27 * Methods returning false have no side-effects.
32 explicit Tokenizer(const SBuf
&inBuf
) : buf_(inBuf
), parsed_(0) {}
35 SBuf
buf() const { return buf_
; }
37 /// number of parsed bytes, including skipped ones
38 SBuf::size_type
parsedSize() const { return parsed_
; }
40 /// whether the end of the buffer has been reached
41 bool atEnd() const { return buf_
.isEmpty(); }
43 /// the remaining unprocessed section of buffer
44 const SBuf
& remaining() const { return buf_
; }
46 /// reinitialize processing for a new buffer
47 void reset(const SBuf
&newBuf
) { undoParse(newBuf
, 0); }
50 * Skips all leading delimiters (if any),
51 * extracts all characters up to the next delimiter (a token), and
52 * skips all trailing delimiters (at least one must be present).
54 * Want to extract delimiters? Use prefix() instead.
56 * Note that Tokenizer cannot tell whether the trailing delimiters will
57 * continue when/if more input data becomes available later.
59 * \return true if found a non-empty token followed by a delimiter
61 bool token(SBuf
&returnedToken
, const CharacterSet
&delimiters
);
63 /** Extracts all sequential permitted characters up to an optional length limit.
65 * Note that Tokenizer cannot tell whether the prefix will
66 * continue when/if more input data becomes available later.
68 * \retval true one or more characters were found, the sequence (string) is placed in returnedToken
69 * \retval false no characters from the permitted set were found
71 bool prefix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, SBuf::size_type limit
= SBuf::npos
);
73 /** Extracts all sequential permitted characters up to an optional length limit.
74 * Operates on the trailing end of the buffer.
76 * Note that Tokenizer cannot tell whether the buffer will
77 * gain more data when/if more input becomes available later.
79 * \retval true one or more characters were found, the sequence (string) is placed in returnedToken
80 * \retval false no characters from the permitted set were found
82 bool suffix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, SBuf::size_type limit
= SBuf::npos
);
84 /** skips a given suffix character sequence (string)
85 * Operates on the trailing end of the buffer.
87 * Note that Tokenizer cannot tell whether the buffer will
88 * gain more data when/if more input becomes available later.
90 * \return whether the exact character sequence was found and skipped
92 bool skipSuffix(const SBuf
&tokenToSkip
);
94 /** skips a given character sequence (string)
96 * \return whether the exact character sequence was found and skipped
98 bool skip(const SBuf
&tokenToSkip
);
100 /** skips a given single character
102 * \return whether the character was skipped
104 bool skip(const char tokenChar
);
106 /** Skips a single character from the set.
108 * \return whether a character was skipped
110 bool skipOne(const CharacterSet
&discardables
);
112 /** Skips all sequential characters from the set, in any order.
114 * \returns the number of skipped characters
116 SBuf::size_type
skipAll(const CharacterSet
&discardables
);
118 /** Removes a single trailing character from the set.
120 * \return whether a character was removed
122 bool skipOneTrailing(const CharacterSet
&discardables
);
124 /** Removes all sequential trailing characters from the set, in any order.
126 * \returns the number of characters removed
128 SBuf::size_type
skipAllTrailing(const CharacterSet
&discardables
);
130 /** Extracts an unsigned int64_t at the beginning of the buffer.
132 * strtoll(3)-alike function: tries to parse unsigned 64-bit integer
133 * at the beginning of the parse buffer, in the base specified by the user
134 * or guesstimated; consumes the parsed characters.
136 * \param result Output value. Not touched if parsing is unsuccessful.
137 * \param base Specify base to do the parsing in, with the same restrictions
138 * as strtoll. Defaults to 0 (meaning guess)
139 * \param allowSign Whether to accept a '+' or '-' sign prefix.
140 * \param limit Maximum count of characters to convert.
142 * \return whether the parsing was successful
144 bool int64(int64_t &result
, int base
= 0, bool allowSign
= true, SBuf::size_type limit
= SBuf::npos
);
147 * The methods below mimic their counterparts documented above, but they
148 * throw on errors, including InsufficientInput. The field description
149 * parameter is used for error reporting and debugging.
152 /// prefix() wrapper but throws InsufficientInput if input contains
153 /// nothing but the prefix (i.e. if the prefix is not "terminated")
154 SBuf
prefix(const char *description
, const CharacterSet
&tokenChars
, SBuf::size_type limit
= SBuf::npos
);
156 /// int64() wrapper but limited to unsigned decimal integers (for now)
157 int64_t udec64(const char *description
, SBuf::size_type limit
= SBuf::npos
);
160 SBuf
consume(const SBuf::size_type n
);
161 SBuf::size_type
success(const SBuf::size_type n
);
162 SBuf
consumeTrailing(const SBuf::size_type n
);
163 SBuf::size_type
successTrailing(const SBuf::size_type n
);
165 /// reset the buffer and parsed stats to a saved checkpoint
166 void undoParse(const SBuf
&newBuf
, SBuf::size_type cParsed
) { buf_
= newBuf
; parsed_
= cParsed
; }
169 SBuf buf_
; ///< yet unparsed input
170 SBuf::size_type parsed_
; ///< bytes successfully parsed, including skipped
173 } /* namespace Parser */
175 #endif /* SQUID_PARSER_TOKENIZER_H_ */