]> git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/Tokenizer.cc
Merged from trunk rev.13957
[thirdparty/squid.git] / src / parser / Tokenizer.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 24 SBuf */
10
11 #include "squid.h"
12 #include "Debug.h"
13 #include "parser/Tokenizer.h"
14
15 #include <cerrno>
16 #if HAVE_CTYPE_H
17 #include <ctype.h>
18 #endif
19 #if HAVE_STDINT_H
20 #include <stdint.h>
21 #endif
22 #ifndef INT64_MIN
23 /* Native 64 bit system without strtoll() */
24 #if defined(LONG_MIN) && (SIZEOF_LONG == 8)
25 #define INT64_MIN LONG_MIN
26 #else
27 /* 32 bit system */
28 #define INT64_MIN (-9223372036854775807LL-1LL)
29 #endif
30 #endif
31
32 #ifndef INT64_MAX
33 /* Native 64 bit system without strtoll() */
34 #if defined(LONG_MAX) && (SIZEOF_LONG == 8)
35 #define INT64_MAX LONG_MAX
36 #else
37 /* 32 bit system */
38 #define INT64_MAX 9223372036854775807LL
39 #endif
40 #endif
41
42 /// convenience method: consumes up to n bytes, counts, and returns them
43 SBuf
44 Parser::Tokenizer::consume(const SBuf::size_type n)
45 {
46 // careful: n may be npos!
47 debugs(24, 5, "consuming " << n << " bytes");
48 const SBuf result = buf_.consume(n);
49 parsed_ += result.length();
50 return result;
51 }
52
53 /// convenience method: consume()s up to n bytes and returns their count
54 SBuf::size_type
55 Parser::Tokenizer::success(const SBuf::size_type n)
56 {
57 return consume(n).length();
58 }
59
60 bool
61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
62 {
63 const Tokenizer saved(*this);
64 skipAll(delimiters);
65 const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
66 if (tokenLen == SBuf::npos) {
67 debugs(24, 8, "no token found for delimiters " << delimiters.name);
68 *this = saved;
69 return false;
70 }
71 returnedToken = consume(tokenLen); // cannot be empty
72 skipAll(delimiters);
73 debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
74 returnedToken << '\'');
75 return true;
76 }
77
78 bool
79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
80 {
81 SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
82 if (prefixLen == 0) {
83 debugs(24, 8, "no prefix for set " << tokenChars.name);
84 return false;
85 }
86 if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
87 debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
88 return false;
89 }
90 if (prefixLen == SBuf::npos && limit > 0) {
91 debugs(24, 8, "whole haystack matched");
92 prefixLen = limit;
93 }
94 debugs(24, 8, "found with length " << prefixLen);
95 returnedToken = consume(prefixLen); // cannot be empty after the npos check
96 return true;
97 }
98
99 bool
100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
101 {
102 SBuf span = buf_;
103
104 if (limit < buf_.length())
105 span.consume(buf_.length() - limit); // ignore the N prefix characters
106
107 auto i = span.rbegin();
108 SBuf::size_type found = 0;
109 while (i != span.rend() && tokenChars[*i]) {
110 ++i;
111 ++found;
112 }
113 if (!found)
114 return false;
115 returnedToken = buf_;
116 buf_ = returnedToken.consume(buf_.length() - found);
117 return true;
118 }
119
120 SBuf::size_type
121 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
122 {
123 const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
124 if (prefixLen == 0) {
125 debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
126 return 0;
127 }
128 debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
129 return success(prefixLen);
130 }
131
132 bool
133 Parser::Tokenizer::skipOne(const CharacterSet &chars)
134 {
135 if (!buf_.isEmpty() && chars[buf_[0]]) {
136 debugs(24, 8, "skipping one-of " << chars.name);
137 return success(1);
138 }
139 debugs(24, 8, "no match while skipping one-of " << chars.name);
140 return false;
141 }
142
143 bool
144 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
145 {
146 if (buf_.length() < tokenToSkip.length())
147 return false;
148
149 SBuf::size_type offset = 0;
150 if (tokenToSkip.length() < buf_.length())
151 offset = buf_.length() - tokenToSkip.length();
152
153 if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
154 buf_ = buf_.substr(0,offset);
155 return true;
156 }
157 return false;
158 }
159
160 bool
161 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
162 {
163 if (buf_.startsWith(tokenToSkip)) {
164 debugs(24, 8, "skipping " << tokenToSkip.length());
165 return success(tokenToSkip.length());
166 }
167 debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
168 return false;
169 }
170
171 bool
172 Parser::Tokenizer::skip(const char tokenChar)
173 {
174 if (!buf_.isEmpty() && buf_[0] == tokenChar) {
175 debugs(24, 8, "skipping char '" << tokenChar << '\'');
176 return success(1);
177 }
178 debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
179 return false;
180 }
181
182 /* reworked from compat/strtoll.c */
183 bool
184 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
185 {
186 if (atEnd() || limit == 0)
187 return false;
188
189 const SBuf range(buf_.substr(0,limit));
190
191 //fixme: account for buf_.size()
192 bool neg = false;
193 const char *s = range.rawContent();
194 const char *end = range.rawContent() + range.length();
195
196 if (allowSign) {
197 if (*s == '-') {
198 neg = true;
199 ++s;
200 } else if (*s == '+') {
201 ++s;
202 }
203 if (s >= end) return false;
204 }
205 if (( base == 0 || base == 16) && *s == '0' && (s+1 <= end ) &&
206 tolower(*(s+1)) == 'x') {
207 s += 2;
208 base = 16;
209 }
210 if (base == 0) {
211 if ( *s == '0') {
212 base = 8;
213 ++s;
214 } else {
215 base = 10;
216 }
217 }
218 if (s >= end) return false;
219
220 uint64_t cutoff;
221
222 cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
223 const int cutlim = cutoff % static_cast<int64_t>(base);
224 cutoff /= static_cast<uint64_t>(base);
225
226 int any = 0, c;
227 int64_t acc = 0;
228 for (c = *s++; s <= end; c = *s++) {
229 if (xisdigit(c)) {
230 c -= '0';
231 } else if (xisalpha(c)) {
232 c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
233 } else {
234 break;
235 }
236 if (c >= base)
237 break;
238 if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
239 any = -1;
240 else {
241 any = 1;
242 acc *= base;
243 acc += c;
244 }
245 }
246
247 if (any == 0) // nothing was parsed
248 return false;
249 if (any < 0) {
250 acc = neg ? INT64_MIN : INT64_MAX;
251 errno = ERANGE;
252 return false;
253 } else if (neg)
254 acc = -acc;
255
256 result = acc;
257 return success(s - range.rawContent() - 1);
258 }
259