]> git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.cc
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / parser / Tokenizer.cc
1 /*
2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 24 SBuf */
10
11 #include "squid.h"
12 #include "Debug.h"
13 #include "parser/Tokenizer.h"
14
15 #include <cerrno>
16 #if HAVE_CTYPE_H
17 #include <ctype.h>
18 #endif
19
20 /// convenience method: consumes up to n bytes, counts, and returns them
21 SBuf
22 Parser::Tokenizer::consume(const SBuf::size_type n)
23 {
24 // careful: n may be npos!
25 debugs(24, 5, "consuming " << n << " bytes");
26 const SBuf result = buf_.consume(n);
27 parsed_ += result.length();
28 return result;
29 }
30
31 /// convenience method: consume()s up to n bytes and returns their count
32 SBuf::size_type
33 Parser::Tokenizer::success(const SBuf::size_type n)
34 {
35 return consume(n).length();
36 }
37
38 /// convenience method: consumes up to n last bytes and returns them
39 SBuf
40 Parser::Tokenizer::consumeTrailing(const SBuf::size_type n)
41 {
42 debugs(24, 5, "consuming " << n << " bytes");
43
44 // If n is npos, we consume everything from buf_ (and nothing from result).
45 const SBuf::size_type parsed = (n == SBuf::npos) ? buf_.length() : n;
46
47 SBuf result = buf_;
48 buf_ = result.consume(buf_.length() - parsed);
49 parsed_ += parsed;
50 return result;
51 }
52
53 /// convenience method: consumes up to n last bytes and returns their count
54 SBuf::size_type
55 Parser::Tokenizer::successTrailing(const SBuf::size_type n)
56 {
57 return consumeTrailing(n).length();
58 }
59
60 bool
61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
62 {
63 const Tokenizer saved(*this);
64 skipAll(delimiters);
65 const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
66 if (tokenLen == SBuf::npos) {
67 debugs(24, 8, "no token found for delimiters " << delimiters.name);
68 *this = saved;
69 return false;
70 }
71 returnedToken = consume(tokenLen); // cannot be empty
72 skipAll(delimiters);
73 debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
74 returnedToken << '\'');
75 return true;
76 }
77
78 bool
79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
80 {
81 SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
82 if (prefixLen == 0) {
83 debugs(24, 8, "no prefix for set " << tokenChars.name);
84 return false;
85 }
86 if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
87 debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
88 return false;
89 }
90 if (prefixLen == SBuf::npos && limit > 0) {
91 debugs(24, 8, "whole haystack matched");
92 prefixLen = limit;
93 }
94 debugs(24, 8, "found with length " << prefixLen);
95 returnedToken = consume(prefixLen); // cannot be empty after the npos check
96 return true;
97 }
98
99 bool
100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
101 {
102 SBuf span = buf_;
103
104 if (limit < buf_.length())
105 span.consume(buf_.length() - limit); // ignore the N prefix characters
106
107 auto i = span.rbegin();
108 SBuf::size_type found = 0;
109 while (i != span.rend() && tokenChars[*i]) {
110 ++i;
111 ++found;
112 }
113 if (!found)
114 return false;
115 returnedToken = consumeTrailing(found);
116 return true;
117 }
118
119 SBuf::size_type
120 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
121 {
122 const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
123 if (prefixLen == 0) {
124 debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
125 return 0;
126 }
127 debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
128 return success(prefixLen);
129 }
130
131 bool
132 Parser::Tokenizer::skipOne(const CharacterSet &chars)
133 {
134 if (!buf_.isEmpty() && chars[buf_[0]]) {
135 debugs(24, 8, "skipping one-of " << chars.name);
136 return success(1);
137 }
138 debugs(24, 8, "no match while skipping one-of " << chars.name);
139 return false;
140 }
141
142 bool
143 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
144 {
145 if (buf_.length() < tokenToSkip.length())
146 return false;
147
148 SBuf::size_type offset = 0;
149 if (tokenToSkip.length() < buf_.length())
150 offset = buf_.length() - tokenToSkip.length();
151
152 if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
153 debugs(24, 8, "skipping " << tokenToSkip.length());
154 return successTrailing(tokenToSkip.length());
155 }
156 return false;
157 }
158
159 bool
160 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
161 {
162 if (buf_.startsWith(tokenToSkip)) {
163 debugs(24, 8, "skipping " << tokenToSkip.length());
164 return success(tokenToSkip.length());
165 }
166 debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
167 return false;
168 }
169
170 bool
171 Parser::Tokenizer::skip(const char tokenChar)
172 {
173 if (!buf_.isEmpty() && buf_[0] == tokenChar) {
174 debugs(24, 8, "skipping char '" << tokenChar << '\'');
175 return success(1);
176 }
177 debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
178 return false;
179 }
180
181 bool
182 Parser::Tokenizer::skipOneTrailing(const CharacterSet &skippable)
183 {
184 if (!buf_.isEmpty() && skippable[buf_[buf_.length()-1]]) {
185 debugs(24, 8, "skipping one-of " << skippable.name);
186 return successTrailing(1);
187 }
188 debugs(24, 8, "no match while skipping one-of " << skippable.name);
189 return false;
190 }
191
192 SBuf::size_type
193 Parser::Tokenizer::skipAllTrailing(const CharacterSet &skippable)
194 {
195 const SBuf::size_type prefixEnd = buf_.findLastNotOf(skippable);
196 const SBuf::size_type prefixLen = prefixEnd == SBuf::npos ?
197 0 : (prefixEnd + 1);
198 const SBuf::size_type suffixLen = buf_.length() - prefixLen;
199 if (suffixLen == 0) {
200 debugs(24, 8, "no match when trying to skip " << skippable.name);
201 return 0;
202 }
203 debugs(24, 8, "skipping in " << skippable.name << " len " << suffixLen);
204 return successTrailing(suffixLen);
205 }
206
207 /* reworked from compat/strtoll.c */
208 bool
209 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
210 {
211 if (atEnd() || limit == 0)
212 return false;
213
214 const SBuf range(buf_.substr(0,limit));
215
216 //fixme: account for buf_.size()
217 bool neg = false;
218 const char *s = range.rawContent();
219 const char *end = range.rawContent() + range.length();
220
221 if (allowSign) {
222 if (*s == '-') {
223 neg = true;
224 ++s;
225 } else if (*s == '+') {
226 ++s;
227 }
228 if (s >= end) return false;
229 }
230 if (( base == 0 || base == 16) && *s == '0' && (s+1 < end ) &&
231 tolower(*(s+1)) == 'x') {
232 s += 2;
233 base = 16;
234 }
235 if (base == 0) {
236 if ( *s == '0') {
237 base = 8;
238 ++s;
239 } else {
240 base = 10;
241 }
242 }
243 if (s >= end) return false;
244
245 uint64_t cutoff;
246
247 cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
248 const int cutlim = cutoff % static_cast<int64_t>(base);
249 cutoff /= static_cast<uint64_t>(base);
250
251 int any = 0, c;
252 int64_t acc = 0;
253 do {
254 c = *s;
255 if (xisdigit(c)) {
256 c -= '0';
257 } else if (xisalpha(c)) {
258 c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
259 } else {
260 break;
261 }
262 if (c >= base)
263 break;
264 if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
265 any = -1;
266 else {
267 any = 1;
268 acc *= base;
269 acc += c;
270 }
271 } while (++s < end);
272
273 if (any == 0) // nothing was parsed
274 return false;
275 if (any < 0) {
276 acc = neg ? INT64_MIN : INT64_MAX;
277 errno = ERANGE;
278 return false;
279 } else if (neg)
280 acc = -acc;
281
282 result = acc;
283 return success(s - range.rawContent());
284 }
285