]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.cc
2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 24 SBuf */
13 #include "parser/forward.h"
14 #include "parser/Tokenizer.h"
15 #include "sbuf/Stream.h"
22 /// convenience method: consumes up to n bytes, counts, and returns them
24 Parser::Tokenizer::consume(const SBuf::size_type n
)
26 // careful: n may be npos!
27 debugs(24, 5, "consuming " << n
<< " bytes");
28 const SBuf result
= buf_
.consume(n
);
29 parsed_
+= result
.length();
33 /// convenience method: consume()s up to n bytes and returns their count
35 Parser::Tokenizer::success(const SBuf::size_type n
)
37 return consume(n
).length();
40 /// convenience method: consumes up to n last bytes and returns them
42 Parser::Tokenizer::consumeTrailing(const SBuf::size_type n
)
44 debugs(24, 5, "consuming " << n
<< " bytes");
46 // If n is npos, we consume everything from buf_ (and nothing from result).
47 const SBuf::size_type parsed
= (n
== SBuf::npos
) ? buf_
.length() : n
;
50 buf_
= result
.consume(buf_
.length() - parsed
);
55 /// convenience method: consumes up to n last bytes and returns their count
57 Parser::Tokenizer::successTrailing(const SBuf::size_type n
)
59 return consumeTrailing(n
).length();
63 Parser::Tokenizer::token(SBuf
&returnedToken
, const CharacterSet
&delimiters
)
65 const Tokenizer
saved(*this);
67 const SBuf::size_type tokenLen
= buf_
.findFirstOf(delimiters
); // not found = npos => consume to end
68 if (tokenLen
== SBuf::npos
) {
69 debugs(24, 8, "no token found for delimiters " << delimiters
.name
);
73 returnedToken
= consume(tokenLen
); // cannot be empty
75 debugs(24, DBG_DATA
, "token found for delimiters " << delimiters
.name
<< ": '" <<
76 returnedToken
<< '\'');
81 Parser::Tokenizer::prefix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
83 SBuf::size_type prefixLen
= buf_
.substr(0,limit
).findFirstNotOf(tokenChars
);
85 debugs(24, 8, "no prefix for set " << tokenChars
.name
);
88 if (prefixLen
== SBuf::npos
&& (atEnd() || limit
== 0)) {
89 debugs(24, 8, "no char in set " << tokenChars
.name
<< " while looking for prefix");
92 if (prefixLen
== SBuf::npos
&& limit
> 0) {
93 debugs(24, 8, "whole haystack matched");
96 debugs(24, 8, "found with length " << prefixLen
);
97 returnedToken
= consume(prefixLen
); // cannot be empty after the npos check
102 Parser::Tokenizer::prefix(const char *description
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
105 throw InsufficientInput();
109 if (!prefix(result
, tokenChars
, limit
))
110 throw TexcHere(ToSBuf("cannot parse ", description
));
113 throw InsufficientInput();
119 Parser::Tokenizer::suffix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
123 if (limit
< buf_
.length())
124 span
.consume(buf_
.length() - limit
); // ignore the N prefix characters
126 auto i
= span
.rbegin();
127 SBuf::size_type found
= 0;
128 while (i
!= span
.rend() && tokenChars
[*i
]) {
134 returnedToken
= consumeTrailing(found
);
139 Parser::Tokenizer::skipAll(const CharacterSet
&tokenChars
)
141 const SBuf::size_type prefixLen
= buf_
.findFirstNotOf(tokenChars
);
142 if (prefixLen
== 0) {
143 debugs(24, 8, "no match when trying to skipAll " << tokenChars
.name
);
146 debugs(24, 8, "skipping all in " << tokenChars
.name
<< " len " << prefixLen
);
147 return success(prefixLen
);
151 Parser::Tokenizer::skipOne(const CharacterSet
&chars
)
153 if (!buf_
.isEmpty() && chars
[buf_
[0]]) {
154 debugs(24, 8, "skipping one-of " << chars
.name
);
157 debugs(24, 8, "no match while skipping one-of " << chars
.name
);
162 Parser::Tokenizer::skipSuffix(const SBuf
&tokenToSkip
)
164 if (buf_
.length() < tokenToSkip
.length())
167 SBuf::size_type offset
= 0;
168 if (tokenToSkip
.length() < buf_
.length())
169 offset
= buf_
.length() - tokenToSkip
.length();
171 if (buf_
.substr(offset
, SBuf::npos
).cmp(tokenToSkip
) == 0) {
172 debugs(24, 8, "skipping " << tokenToSkip
.length());
173 return successTrailing(tokenToSkip
.length());
179 Parser::Tokenizer::skip(const SBuf
&tokenToSkip
)
181 if (buf_
.startsWith(tokenToSkip
)) {
182 debugs(24, 8, "skipping " << tokenToSkip
.length());
183 return success(tokenToSkip
.length());
185 debugs(24, 8, "no match, not skipping '" << tokenToSkip
<< '\'');
190 Parser::Tokenizer::skip(const char tokenChar
)
192 if (!buf_
.isEmpty() && buf_
[0] == tokenChar
) {
193 debugs(24, 8, "skipping char '" << tokenChar
<< '\'');
196 debugs(24, 8, "no match, not skipping char '" << tokenChar
<< '\'');
201 Parser::Tokenizer::skipOneTrailing(const CharacterSet
&skippable
)
203 if (!buf_
.isEmpty() && skippable
[buf_
[buf_
.length()-1]]) {
204 debugs(24, 8, "skipping one-of " << skippable
.name
);
205 return successTrailing(1);
207 debugs(24, 8, "no match while skipping one-of " << skippable
.name
);
212 Parser::Tokenizer::skipAllTrailing(const CharacterSet
&skippable
)
214 const SBuf::size_type prefixEnd
= buf_
.findLastNotOf(skippable
);
215 const SBuf::size_type prefixLen
= prefixEnd
== SBuf::npos
?
217 const SBuf::size_type suffixLen
= buf_
.length() - prefixLen
;
218 if (suffixLen
== 0) {
219 debugs(24, 8, "no match when trying to skip " << skippable
.name
);
222 debugs(24, 8, "skipping in " << skippable
.name
<< " len " << suffixLen
);
223 return successTrailing(suffixLen
);
226 /* reworked from compat/strtoll.c */
228 Parser::Tokenizer::int64(int64_t & result
, int base
, bool allowSign
, const SBuf::size_type limit
)
230 if (atEnd() || limit
== 0)
233 const SBuf
range(buf_
.substr(0,limit
));
235 // XXX: account for buf_.size()
237 const char *s
= range
.rawContent();
238 const char *end
= range
.rawContent() + range
.length();
244 } else if (*s
== '+') {
247 if (s
>= end
) return false;
249 if (( base
== 0 || base
== 16) && *s
== '0' && (s
+1 < end
) &&
250 tolower(*(s
+1)) == 'x') {
262 if (s
>= end
) return false;
266 cutoff
= neg
? -static_cast<uint64_t>(INT64_MIN
) : INT64_MAX
;
267 const int cutlim
= cutoff
% static_cast<int64_t>(base
);
268 cutoff
/= static_cast<uint64_t>(base
);
276 } else if (xisalpha(c
)) {
277 c
-= xisupper(c
) ? 'A' - 10 : 'a' - 10;
283 if (any
< 0 || static_cast<uint64_t>(acc
) > cutoff
|| (static_cast<uint64_t>(acc
) == cutoff
&& c
> cutlim
))
292 if (any
== 0) // nothing was parsed
295 acc
= neg
? INT64_MIN
: INT64_MAX
;
302 return success(s
- range
.rawContent());
306 Parser::Tokenizer::udec64(const char *description
, const SBuf::size_type limit
)
309 throw InsufficientInput();
313 // Since we only support unsigned decimals, a parsing failure with a
314 // non-empty input always implies invalid/malformed input (or a buggy
315 // limit=0 caller). TODO: Support signed and non-decimal integers by
316 // refactoring int64() to detect insufficient input.
317 if (!int64(result
, 10, false, limit
))
318 throw TexcHere(ToSBuf("cannot parse ", description
));
321 throw InsufficientInput(); // more digits may be coming