]> git.ipfire.org Git - thirdparty/squid.git/blob - src/tests/testTokenizer.cc
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / tests / testTokenizer.cc
1 /*
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 #include "parser/Tokenizer.h"
12 #include "tests/testTokenizer.h"
13 #include "unitTestMain.h"
14
15 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
16
17 SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
18 "Host: resource.com\r\n"
19 "Cookie: laijkpk3422r j1noin \r\n"
20 "\r\n");
21 const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22 const CharacterSet whitespace("whitespace"," \r\n");
23 const CharacterSet crlf("crlf","\r\n");
24 const CharacterSet tab("tab","\t");
25 const CharacterSet numbers("numbers","0123456789");
26
27 void
28 testTokenizer::testTokenizerPrefix()
29 {
30 const SBuf canary("This text should not be changed.");
31
32 Parser::Tokenizer t(text);
33 SBuf s;
34
35 CharacterSet all(whitespace);
36 all += alpha;
37 all += crlf;
38 all += numbers;
39 all.add(':').add('.').add('/');
40
41 // an empty prefix should return false (the full output buffer case)
42 s = canary;
43 const SBuf before = t.remaining();
44 CPPUNIT_ASSERT(!t.prefix(s, all, 0));
45 // ... and a false return value means no parameter changes
46 CPPUNIT_ASSERT_EQUAL(canary, s);
47 // ... and a false return value means no input buffer changes
48 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
49
50 // successful prefix tokenization
51 CPPUNIT_ASSERT(t.prefix(s,alpha));
52 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
53 CPPUNIT_ASSERT(t.prefix(s,whitespace));
54 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
55
56 //no match (first char is not in the prefix set)
57 CPPUNIT_ASSERT(!t.prefix(s,whitespace));
58 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
59
60 // one more match to set S to something meaningful
61 CPPUNIT_ASSERT(t.prefix(s,alpha));
62 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
63
64 //no match (no characters from the character set in the prefix)
65 CPPUNIT_ASSERT(!t.prefix(s,tab));
66 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
67
68 // match until the end of the sample
69 CPPUNIT_ASSERT(t.prefix(s,all));
70 CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
71
72 // empty prefix should return false (the empty input buffer case)
73 s = canary;
74 CPPUNIT_ASSERT(!t.prefix(s, all));
75 // ... and a false return value means no parameter changes
76 CPPUNIT_ASSERT_EQUAL(canary, s);
77 }
78
79 void
80 testTokenizer::testTokenizerSkip()
81 {
82 Parser::Tokenizer t(text);
83 SBuf s;
84
85 // first scenario: patterns match
86 // prep for test
87 CPPUNIT_ASSERT(t.prefix(s,alpha));
88 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
89
90 // test skipping one character from a character set
91 CPPUNIT_ASSERT(t.skipOne(whitespace));
92 // check that skip was right
93 CPPUNIT_ASSERT(t.prefix(s,alpha));
94 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
95
96 //check skip prefix
97 CPPUNIT_ASSERT(t.skip(SBuf("://")));
98 // verify
99 CPPUNIT_ASSERT(t.prefix(s,alpha));
100 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
101
102 // no skip
103 CPPUNIT_ASSERT(!t.skipOne(alpha));
104 CPPUNIT_ASSERT(!t.skip(SBuf("://")));
105 CPPUNIT_ASSERT(!t.skip('a'));
106
107 // test skipping all characters from a character set while looking at .com
108 CPPUNIT_ASSERT(t.skip('.'));
109 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
110 CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
111 }
112
113 void
114 testTokenizer::testTokenizerToken()
115 {
116 Parser::Tokenizer t(text);
117 SBuf s;
118
119 // first scenario: patterns match
120 CPPUNIT_ASSERT(t.token(s,whitespace));
121 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
122 CPPUNIT_ASSERT(t.token(s,whitespace));
123 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
124 CPPUNIT_ASSERT(t.token(s,whitespace));
125 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
126 CPPUNIT_ASSERT(t.token(s,whitespace));
127 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
128
129 }
130
131 void
132 testTokenizer::testTokenizerSuffix()
133 {
134 const SBuf canary("This text should not be changed.");
135
136 Parser::Tokenizer t(text);
137 SBuf s;
138
139 CharacterSet all(whitespace);
140 all += alpha;
141 all += crlf;
142 all += numbers;
143 all.add(':').add('.').add('/');
144
145 // an empty suffix should return false (the full output buffer case)
146 s = canary;
147 const SBuf before = t.remaining();
148 CPPUNIT_ASSERT(!t.suffix(s, all, 0));
149 // ... and a false return value means no parameter changes
150 CPPUNIT_ASSERT_EQUAL(canary, s);
151 // ... and a false return value means no input buffer changes
152 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
153
154 // consume suffix until the last CRLF, including that last CRLF
155 SBuf::size_type remaining = t.remaining().length();
156 while (t.remaining().findLastOf(crlf) != SBuf::npos) {
157 CPPUNIT_ASSERT(t.remaining().length() > 0);
158 CPPUNIT_ASSERT(t.skipOneTrailing(all));
159 // ensure steady progress
160 CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
161 --remaining;
162 }
163
164 // no match (last char is not in the suffix set)
165 CPPUNIT_ASSERT(!t.suffix(s, crlf));
166 CPPUNIT_ASSERT(!t.suffix(s, whitespace));
167
168 // successful suffix tokenization
169 CPPUNIT_ASSERT(t.suffix(s, numbers));
170 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
171 CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
172 CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
173 CPPUNIT_ASSERT(t.suffix(s, alpha));
174 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
175 CPPUNIT_ASSERT(t.suffix(s, whitespace));
176 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
177
178 // match until the end of the sample
179 CPPUNIT_ASSERT(t.suffix(s, all));
180 CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
181
182 // an empty buffer does not end with a token
183 s = canary;
184 CPPUNIT_ASSERT(!t.suffix(s, all));
185 CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
186
187 // we cannot skip an empty suffix, even in an empty buffer
188 CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
189 }
190
191 void
192 testTokenizer::testCharacterSet()
193 {
194
195 }
196
197 void
198 testTokenizer::testTokenizerInt64()
199 {
200 // successful parse in base 10
201 {
202 int64_t rv;
203 Parser::Tokenizer t(SBuf("1234"));
204 const int64_t benchmark = 1234;
205 CPPUNIT_ASSERT(t.int64(rv, 10));
206 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
207 CPPUNIT_ASSERT(t.buf().isEmpty());
208 }
209
210 // successful parse, autodetect base
211 {
212 int64_t rv;
213 Parser::Tokenizer t(SBuf("1234"));
214 const int64_t benchmark = 1234;
215 CPPUNIT_ASSERT(t.int64(rv));
216 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
217 CPPUNIT_ASSERT(t.buf().isEmpty());
218 }
219
220 // successful parse, autodetect base
221 {
222 int64_t rv;
223 Parser::Tokenizer t(SBuf("01234"));
224 const int64_t benchmark = 01234;
225 CPPUNIT_ASSERT(t.int64(rv));
226 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
227 CPPUNIT_ASSERT(t.buf().isEmpty());
228 }
229
230 // successful parse, autodetect base
231 {
232 int64_t rv;
233 Parser::Tokenizer t(SBuf("0x12f4"));
234 const int64_t benchmark = 0x12f4;
235 CPPUNIT_ASSERT(t.int64(rv));
236 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
237 CPPUNIT_ASSERT(t.buf().isEmpty());
238 }
239
240 // API mismatch: don't eat leading space
241 {
242 int64_t rv;
243 Parser::Tokenizer t(SBuf(" 1234"));
244 CPPUNIT_ASSERT(!t.int64(rv));
245 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
246 }
247
248 // API mismatch: don't eat multiple leading spaces
249 {
250 int64_t rv;
251 Parser::Tokenizer t(SBuf(" 1234"));
252 CPPUNIT_ASSERT(!t.int64(rv));
253 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
254 }
255
256 // trailing spaces
257 {
258 int64_t rv;
259 Parser::Tokenizer t(SBuf("1234 foo"));
260 const int64_t benchmark = 1234;
261 CPPUNIT_ASSERT(t.int64(rv));
262 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
263 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
264 }
265
266 // trailing nonspaces
267 {
268 int64_t rv;
269 Parser::Tokenizer t(SBuf("1234foo"));
270 const int64_t benchmark = 1234;
271 CPPUNIT_ASSERT(t.int64(rv));
272 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
273 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
274 }
275
276 // trailing nonspaces
277 {
278 int64_t rv;
279 Parser::Tokenizer t(SBuf("0x1234foo"));
280 const int64_t benchmark = 0x1234f;
281 CPPUNIT_ASSERT(t.int64(rv));
282 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
283 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
284 }
285
286 // overflow
287 {
288 int64_t rv;
289 Parser::Tokenizer t(SBuf("1029397752385698678762234"));
290 CPPUNIT_ASSERT(!t.int64(rv));
291 CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
292 }
293
294 // buffered sub-string parsing
295 {
296 int64_t rv;
297 SBuf base("1029397752385698678762234");
298 const int64_t benchmark = 22;
299 Parser::Tokenizer t(base.substr(base.length()-4,2));
300 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
301 CPPUNIT_ASSERT(t.int64(rv));
302 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
303 CPPUNIT_ASSERT(t.buf().isEmpty());
304 }
305
306 // base-16, prefix
307 {
308 int64_t rv;
309 SBuf base("deadbeefrow");
310 const int64_t benchmark=0xdeadbeef;
311 Parser::Tokenizer t(base);
312 CPPUNIT_ASSERT(t.int64(rv,16));
313 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
314 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
315
316 }
317 }
318