]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.cc
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 24 SBuf */
12 #include "debug/Stream.h"
13 #include "parser/forward.h"
14 #include "parser/Tokenizer.h"
15 #include "sbuf/Stream.h"
20 /// convenience method: consumes up to n bytes, counts, and returns them
22 Parser::Tokenizer::consume(const SBuf::size_type n
)
24 // careful: n may be npos!
25 debugs(24, 5, "consuming " << n
<< " bytes");
26 const SBuf result
= buf_
.consume(n
);
27 parsed_
+= result
.length();
31 /// convenience method: consume()s up to n bytes and returns their count
33 Parser::Tokenizer::success(const SBuf::size_type n
)
35 return consume(n
).length();
38 /// convenience method: consumes up to n last bytes and returns them
40 Parser::Tokenizer::consumeTrailing(const SBuf::size_type n
)
42 debugs(24, 5, "consuming " << n
<< " bytes");
44 // If n is npos, we consume everything from buf_ (and nothing from result).
45 const SBuf::size_type parsed
= (n
== SBuf::npos
) ? buf_
.length() : n
;
48 buf_
= result
.consume(buf_
.length() - parsed
);
53 /// convenience method: consumes up to n last bytes and returns their count
55 Parser::Tokenizer::successTrailing(const SBuf::size_type n
)
57 return consumeTrailing(n
).length();
61 Parser::Tokenizer::token(SBuf
&returnedToken
, const CharacterSet
&delimiters
)
63 const Tokenizer
saved(*this);
65 const SBuf::size_type tokenLen
= buf_
.findFirstOf(delimiters
); // not found = npos => consume to end
66 if (tokenLen
== SBuf::npos
) {
67 debugs(24, 8, "no token found for delimiters " << delimiters
.name
);
71 returnedToken
= consume(tokenLen
); // cannot be empty
73 debugs(24, DBG_DATA
, "token found for delimiters " << delimiters
.name
<< ": '" <<
74 returnedToken
<< '\'');
79 Parser::Tokenizer::prefix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
81 SBuf::size_type prefixLen
= buf_
.substr(0,limit
).findFirstNotOf(tokenChars
);
83 debugs(24, 8, "no prefix for set " << tokenChars
.name
);
86 if (prefixLen
== SBuf::npos
&& (atEnd() || limit
== 0)) {
87 debugs(24, 8, "no char in set " << tokenChars
.name
<< " while looking for prefix");
90 if (prefixLen
== SBuf::npos
&& limit
> 0) {
91 debugs(24, 8, "whole haystack matched");
94 debugs(24, 8, "found with length " << prefixLen
);
95 returnedToken
= consume(prefixLen
); // cannot be empty after the npos check
100 Parser::Tokenizer::prefix(const char *description
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
103 throw InsufficientInput();
107 if (!prefix(result
, tokenChars
, limit
))
108 throw TexcHere(ToSBuf("cannot parse ", description
));
111 throw InsufficientInput();
117 Parser::Tokenizer::suffix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
121 if (limit
< buf_
.length())
122 span
.consume(buf_
.length() - limit
); // ignore the N prefix characters
124 auto i
= span
.rbegin();
125 SBuf::size_type found
= 0;
126 while (i
!= span
.rend() && tokenChars
[*i
]) {
132 returnedToken
= consumeTrailing(found
);
137 Parser::Tokenizer::skipAll(const CharacterSet
&tokenChars
)
139 const SBuf::size_type prefixLen
= buf_
.findFirstNotOf(tokenChars
);
140 if (prefixLen
== 0) {
141 debugs(24, 8, "no match when trying to skipAll " << tokenChars
.name
);
144 debugs(24, 8, "skipping all in " << tokenChars
.name
<< " len " << prefixLen
);
145 return success(prefixLen
);
149 Parser::Tokenizer::skipRequired(const char *description
, const SBuf
&tokenToSkip
)
151 if (skip(tokenToSkip
) || tokenToSkip
.isEmpty())
154 if (tokenToSkip
.startsWith(buf_
))
155 throw InsufficientInput();
157 throw TextException(ToSBuf("cannot skip ", description
), Here());
161 Parser::Tokenizer::skipOne(const CharacterSet
&chars
)
163 if (!buf_
.isEmpty() && chars
[buf_
[0]]) {
164 debugs(24, 8, "skipping one-of " << chars
.name
);
167 debugs(24, 8, "no match while skipping one-of " << chars
.name
);
172 Parser::Tokenizer::skipSuffix(const SBuf
&tokenToSkip
)
174 if (buf_
.length() < tokenToSkip
.length())
177 SBuf::size_type offset
= 0;
178 if (tokenToSkip
.length() < buf_
.length())
179 offset
= buf_
.length() - tokenToSkip
.length();
181 if (buf_
.substr(offset
, SBuf::npos
).cmp(tokenToSkip
) == 0) {
182 debugs(24, 8, "skipping " << tokenToSkip
.length());
183 return successTrailing(tokenToSkip
.length());
189 Parser::Tokenizer::skip(const SBuf
&tokenToSkip
)
191 if (buf_
.startsWith(tokenToSkip
)) {
192 debugs(24, 8, "skipping " << tokenToSkip
.length());
193 return success(tokenToSkip
.length());
195 debugs(24, 8, "no match, not skipping '" << tokenToSkip
<< '\'');
200 Parser::Tokenizer::skip(const char tokenChar
)
202 if (!buf_
.isEmpty() && buf_
[0] == tokenChar
) {
203 debugs(24, 8, "skipping char '" << tokenChar
<< '\'');
206 debugs(24, 8, "no match, not skipping char '" << tokenChar
<< '\'');
211 Parser::Tokenizer::skipOneTrailing(const CharacterSet
&skippable
)
213 if (!buf_
.isEmpty() && skippable
[buf_
[buf_
.length()-1]]) {
214 debugs(24, 8, "skipping one-of " << skippable
.name
);
215 return successTrailing(1);
217 debugs(24, 8, "no match while skipping one-of " << skippable
.name
);
222 Parser::Tokenizer::skipAllTrailing(const CharacterSet
&skippable
)
224 const SBuf::size_type prefixEnd
= buf_
.findLastNotOf(skippable
);
225 const SBuf::size_type prefixLen
= prefixEnd
== SBuf::npos
?
227 const SBuf::size_type suffixLen
= buf_
.length() - prefixLen
;
228 if (suffixLen
== 0) {
229 debugs(24, 8, "no match when trying to skip " << skippable
.name
);
232 debugs(24, 8, "skipping in " << skippable
.name
<< " len " << suffixLen
);
233 return successTrailing(suffixLen
);
236 /* reworked from compat/strtoll.c */
238 Parser::Tokenizer::int64(int64_t & result
, int base
, bool allowSign
, const SBuf::size_type limit
)
240 if (atEnd() || limit
== 0)
243 const SBuf
range(buf_
.substr(0,limit
));
245 // XXX: account for buf_.size()
247 const char *s
= range
.rawContent();
248 const char *end
= range
.rawContent() + range
.length();
254 } else if (*s
== '+') {
257 if (s
>= end
) return false;
259 if (( base
== 0 || base
== 16) && *s
== '0' && (s
+1 < end
) &&
260 tolower(*(s
+1)) == 'x') {
272 if (s
>= end
) return false;
276 cutoff
= neg
? -static_cast<uint64_t>(INT64_MIN
) : INT64_MAX
;
277 const int cutlim
= cutoff
% static_cast<int64_t>(base
);
278 cutoff
/= static_cast<uint64_t>(base
);
286 } else if (xisalpha(c
)) {
287 c
-= xisupper(c
) ? 'A' - 10 : 'a' - 10;
293 if (any
< 0 || static_cast<uint64_t>(acc
) > cutoff
|| (static_cast<uint64_t>(acc
) == cutoff
&& c
> cutlim
))
302 if (any
== 0) // nothing was parsed
305 acc
= neg
? INT64_MIN
: INT64_MAX
;
312 return success(s
- range
.rawContent());
316 Parser::Tokenizer::udec64(const char *description
, const SBuf::size_type limit
)
319 throw InsufficientInput();
323 // Since we only support unsigned decimals, a parsing failure with a
324 // non-empty input always implies invalid/malformed input (or a buggy
325 // limit=0 caller). TODO: Support signed and non-decimal integers by
326 // refactoring int64() to detect insufficient input.
327 if (!int64(result
, 10, false, limit
))
328 throw TexcHere(ToSBuf("cannot parse ", description
));
331 throw InsufficientInput(); // more digits may be coming