]>
Commit | Line | Data |
---|---|---|
bbc27441 | 1 | /* |
bde978a6 | 2 | * Copyright (C) 1996-2015 The Squid Software Foundation and contributors |
bbc27441 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c9a4e310 FC |
9 | #ifndef SQUID_PARSER_TOKENIZER_H_ |
10 | #define SQUID_PARSER_TOKENIZER_H_ | |
11 | ||
12 | #include "base/CharacterSet.h" | |
13 | #include "SBuf.h" | |
14 | ||
90bba30a | 15 | /// Generic protocol-agnostic parsing tools |
11bd4370 A |
16 | namespace Parser |
17 | { | |
c9a4e310 | 18 | |
0351d153 AJ |
19 | /** |
20 | * Lexical processor to tokenize a buffer. | |
21 | * | |
5d811da5 AJ |
22 | * Allows arbitrary delimiters and token character sets to |
23 | * be provided by callers. | |
24 | * | |
25 | * All methods start from the beginning of the input buffer. | |
26 | * Methods returning true consume bytes from the buffer. | |
27 | * Methods returning false have no side-effects. | |
0351d153 | 28 | */ |
11bd4370 A |
29 | class Tokenizer |
30 | { | |
c9a4e310 | 31 | public: |
0c67864a | 32 | explicit Tokenizer(const SBuf &inBuf) : buf_(inBuf), parsed_(0) {} |
11bd4370 | 33 | |
0c67864a AR |
34 | /// yet unparsed data |
35 | SBuf buf() const { return buf_; } | |
36 | ||
37 | /// number of parsed bytes, including skipped ones | |
38 | SBuf::size_type parsedSize() const { return parsed_; } | |
11bd4370 A |
39 | |
40 | /// whether the end of the buffer has been reached | |
41 | bool atEnd() const { return buf_.isEmpty(); } | |
42 | ||
43 | /// the remaining unprocessed section of buffer | |
44 | const SBuf& remaining() const { return buf_; } | |
45 | ||
46 | /// reinitialize processing for a new buffer | |
f29718b0 | 47 | void reset(const SBuf &newBuf) { undoParse(newBuf, 0); } |
11bd4370 A |
48 | |
49 | /** Basic strtok(3): | |
50 | * Skips all leading delimiters (if any), | |
0c67864a AR |
51 | * extracts all characters up to the next delimiter (a token), and |
52 | * skips all trailing delimiters (at least one must be present). | |
11bd4370 A |
53 | * |
54 | * Want to extract delimiters? Use prefix() instead. | |
55 | * | |
0c67864a AR |
56 | * Note that Tokenizer cannot tell whether the trailing delimiters will |
57 | * continue when/if more input data becomes available later. | |
11bd4370 | 58 | * |
0c67864a | 59 | * \return true if found a non-empty token followed by a delimiter |
11bd4370 A |
60 | */ |
61 | bool token(SBuf &returnedToken, const CharacterSet &delimiters); | |
62 | ||
0c67864a AR |
63 | /** Extracts all sequential permitted characters up to an optional length limit. |
64 | * | |
65 | * Note that Tokenizer cannot tell whether the prefix will | |
66 | * continue when/if more input data becomes available later. | |
11bd4370 A |
67 | * |
68 | * \retval true one or more characters were found, the sequence (string) is placed in returnedToken | |
69 | * \retval false no characters from the permitted set were found | |
70 | */ | |
71 | bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos); | |
72 | ||
bac851c0 AJ |
73 | /** Extracts all sequential permitted characters up to an optional length limit. |
74 | * Operates on the trailing end of the buffer. | |
75 | * | |
76 | * Note that Tokenizer cannot tell whether the buffer will | |
77 | * gain more data when/if more input becomes available later. | |
78 | * | |
79 | * \retval true one or more characters were found, the sequence (string) is placed in returnedToken | |
80 | * \retval false no characters from the permitted set were found | |
81 | */ | |
82 | bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos); | |
83 | ||
84 | /** skips a given suffix character sequence (string) | |
85 | * Operates on the trailing end of the buffer. | |
86 | * | |
87 | * Note that Tokenizer cannot tell whether the buffer will | |
88 | * gain more data when/if more input becomes available later. | |
89 | * | |
90 | * \return whether the exact character sequence was found and skipped | |
91 | */ | |
92 | bool skipSuffix(const SBuf &tokenToSkip); | |
93 | ||
11bd4370 A |
94 | /** skips a given character sequence (string) |
95 | * | |
96 | * \return whether the exact character sequence was found and skipped | |
97 | */ | |
98 | bool skip(const SBuf &tokenToSkip); | |
99 | ||
100 | /** skips a given single character | |
101 | * | |
0c67864a | 102 | * \return whether the character was skipped |
11bd4370 A |
103 | */ |
104 | bool skip(const char tokenChar); | |
105 | ||
0c67864a AR |
106 | /** Skips a single character from the set. |
107 | * | |
108 | * \return whether a character was skipped | |
109 | */ | |
110 | bool skipOne(const CharacterSet &discardables); | |
111 | ||
112 | /** Skips all sequential characters from the set, in any order. | |
113 | * | |
114 | * \returns the number of skipped characters | |
115 | */ | |
116 | SBuf::size_type skipAll(const CharacterSet &discardables); | |
117 | ||
118 | /** Extracts an unsigned int64_t at the beginning of the buffer. | |
11bd4370 A |
119 | * |
120 | * strtoll(3)-alike function: tries to parse unsigned 64-bit integer | |
121 | * at the beginning of the parse buffer, in the base specified by the user | |
122 | * or guesstimated; consumes the parsed characters. | |
123 | * | |
124 | * \param result Output value. Not touched if parsing is unsuccessful. | |
125 | * \param base Specify base to do the parsing in, with the same restrictions | |
126 | * as strtoll. Defaults to 0 (meaning guess) | |
b54f4137 AJ |
127 | * \param allowSign Whether to accept a '+' or '-' sign prefix. |
128 | * \param limit Maximum count of characters to convert. | |
11bd4370 A |
129 | * |
130 | * \return whether the parsing was successful | |
131 | */ | |
b54f4137 | 132 | bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos); |
957143e6 | 133 | |
0c67864a AR |
134 | protected: |
135 | SBuf consume(const SBuf::size_type n); | |
136 | SBuf::size_type success(const SBuf::size_type n); | |
137 | ||
f29718b0 AJ |
138 | /// reset the buffer and parsed stats to a saved checkpoint |
139 | void undoParse(const SBuf &newBuf, SBuf::size_type cParsed) { buf_ = newBuf; parsed_ = cParsed; } | |
140 | ||
c9a4e310 | 141 | private: |
11bd4370 | 142 | SBuf buf_; ///< yet unparsed input |
0c67864a | 143 | SBuf::size_type parsed_; ///< bytes successfully parsed, including skipped |
c9a4e310 FC |
144 | }; |
145 | ||
c9a4e310 | 146 | } /* namespace Parser */ |
0351d153 | 147 | |
c9a4e310 | 148 | #endif /* SQUID_PARSER_TOKENIZER_H_ */ |
f53969cc | 149 |