]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.cc
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 24 SBuf */
13 #include "parser/Tokenizer.h"
23 /* Native 64 bit system without strtoll() */
24 #if defined(LONG_MIN) && (SIZEOF_LONG == 8)
25 #define INT64_MIN LONG_MIN
28 #define INT64_MIN (-9223372036854775807LL-1LL)
33 /* Native 64 bit system without strtoll() */
34 #if defined(LONG_MAX) && (SIZEOF_LONG == 8)
35 #define INT64_MAX LONG_MAX
38 #define INT64_MAX 9223372036854775807LL
42 /// convenience method: consumes up to n bytes, counts, and returns them
44 Parser::Tokenizer::consume(const SBuf::size_type n
)
46 // careful: n may be npos!
47 debugs(24, 5, "consuming " << n
<< " bytes");
48 const SBuf result
= buf_
.consume(n
);
49 parsed_
+= result
.length();
53 /// convenience method: consume()s up to n bytes and returns their count
55 Parser::Tokenizer::success(const SBuf::size_type n
)
57 return consume(n
).length();
61 Parser::Tokenizer::token(SBuf
&returnedToken
, const CharacterSet
&delimiters
)
63 const Tokenizer
saved(*this);
65 const SBuf::size_type tokenLen
= buf_
.findFirstOf(delimiters
); // not found = npos => consume to end
66 if (tokenLen
== SBuf::npos
) {
67 debugs(24, 8, "no token found for delimiters " << delimiters
.name
);
71 returnedToken
= consume(tokenLen
); // cannot be empty
73 debugs(24, DBG_DATA
, "token found for delimiters " << delimiters
.name
<< ": '" <<
74 returnedToken
<< '\'');
79 Parser::Tokenizer::prefix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
81 SBuf::size_type prefixLen
= buf_
.substr(0,limit
).findFirstNotOf(tokenChars
);
83 debugs(24, 8, "no prefix for set " << tokenChars
.name
);
86 if (prefixLen
== SBuf::npos
&& (atEnd() || limit
== 0)) {
87 debugs(24, 8, "no char in set " << tokenChars
.name
<< " while looking for prefix");
90 if (prefixLen
== SBuf::npos
&& limit
> 0) {
91 debugs(24, 8, "whole haystack matched");
94 debugs(24, 8, "found with length " << prefixLen
);
95 returnedToken
= consume(prefixLen
); // cannot be empty after the npos check
100 Parser::Tokenizer::suffix(SBuf
&returnedToken
, const CharacterSet
&tokenChars
, const SBuf::size_type limit
)
104 if (limit
< buf_
.length())
105 span
.consume(buf_
.length() - limit
); // ignore the N prefix characters
107 auto i
= span
.rbegin();
108 SBuf::size_type found
= 0;
109 while (i
!= span
.rend() && tokenChars
[*i
]) {
115 returnedToken
= buf_
;
116 buf_
= returnedToken
.consume(buf_
.length() - found
);
121 Parser::Tokenizer::skipAll(const CharacterSet
&tokenChars
)
123 const SBuf::size_type prefixLen
= buf_
.findFirstNotOf(tokenChars
);
124 if (prefixLen
== 0) {
125 debugs(24, 8, "no match when trying to skipAll " << tokenChars
.name
);
128 debugs(24, 8, "skipping all in " << tokenChars
.name
<< " len " << prefixLen
);
129 return success(prefixLen
);
133 Parser::Tokenizer::skipOne(const CharacterSet
&chars
)
135 if (!buf_
.isEmpty() && chars
[buf_
[0]]) {
136 debugs(24, 8, "skipping one-of " << chars
.name
);
139 debugs(24, 8, "no match while skipping one-of " << chars
.name
);
144 Parser::Tokenizer::skipSuffix(const SBuf
&tokenToSkip
)
146 if (buf_
.length() < tokenToSkip
.length())
149 SBuf::size_type offset
= 0;
150 if (tokenToSkip
.length() < buf_
.length())
151 offset
= buf_
.length() - tokenToSkip
.length();
153 if (buf_
.substr(offset
, SBuf::npos
).cmp(tokenToSkip
) == 0) {
154 buf_
= buf_
.substr(0,offset
);
161 Parser::Tokenizer::skip(const SBuf
&tokenToSkip
)
163 if (buf_
.startsWith(tokenToSkip
)) {
164 debugs(24, 8, "skipping " << tokenToSkip
.length());
165 return success(tokenToSkip
.length());
167 debugs(24, 8, "no match, not skipping '" << tokenToSkip
<< '\'');
172 Parser::Tokenizer::skip(const char tokenChar
)
174 if (!buf_
.isEmpty() && buf_
[0] == tokenChar
) {
175 debugs(24, 8, "skipping char '" << tokenChar
<< '\'');
178 debugs(24, 8, "no match, not skipping char '" << tokenChar
<< '\'');
182 /* reworked from compat/strtoll.c */
184 Parser::Tokenizer::int64(int64_t & result
, int base
)
189 //fixme: account for buf_.size()
191 const char *s
= buf_
.rawContent();
192 const char *end
= buf_
.rawContent() + buf_
.length();
197 } else if (*s
== '+') {
200 if (s
>= end
) return false;
201 if (( base
== 0 || base
== 16) && *s
== '0' && (s
+1 <= end
) &&
202 tolower(*(s
+1)) == 'x') {
214 if (s
>= end
) return false;
218 cutoff
= neg
? -static_cast<uint64_t>(INT64_MIN
) : INT64_MAX
;
219 const int cutlim
= cutoff
% static_cast<int64_t>(base
);
220 cutoff
/= static_cast<uint64_t>(base
);
224 for (c
= *s
++; s
<= end
; c
= *s
++) {
227 } else if (xisalpha(c
)) {
228 c
-= xisupper(c
) ? 'A' - 10 : 'a' - 10;
234 if (any
< 0 || static_cast<uint64_t>(acc
) > cutoff
|| (static_cast<uint64_t>(acc
) == cutoff
&& c
> cutlim
))
243 if (any
== 0) // nothing was parsed
246 acc
= neg
? INT64_MIN
: INT64_MAX
;
253 return success(s
- buf_
.rawContent() - 1);