]> git.ipfire.org Git - thirdparty/squid.git/blame - src/tests/testTokenizer.cc
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / tests / testTokenizer.cc
CommitLineData
bbc27441 1/*
5b74111a 2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
c9a4e310 9#include "squid.h"
c9a4e310 10#include "base/CharacterSet.h"
5d4cfe02 11#include "parser/Tokenizer.h"
37122e21 12#include "tests/testTokenizer.h"
7f861c77
AJ
13#include "unitTestMain.h"
14
c9a4e310
FC
15CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
16
17SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
11bd4370
A
18 "Host: resource.com\r\n"
19 "Cookie: laijkpk3422r j1noin \r\n"
20 "\r\n");
c9a4e310
FC
21const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22const CharacterSet whitespace("whitespace"," \r\n");
23const CharacterSet crlf("crlf","\r\n");
24const CharacterSet tab("tab","\t");
25const CharacterSet numbers("numbers","0123456789");
26
27void
28testTokenizer::testTokenizerPrefix()
29{
1d53a60a
AR
30 const SBuf canary("This text should not be changed.");
31
c9a4e310
FC
32 Parser::Tokenizer t(text);
33 SBuf s;
34
1d53a60a
AR
35 CharacterSet all(whitespace);
36 all += alpha;
37 all += crlf;
38 all += numbers;
39 all.add(':').add('.').add('/');
40
41 // an empty prefix should return false (the full output buffer case)
42 s = canary;
43 const SBuf before = t.remaining();
44 CPPUNIT_ASSERT(!t.prefix(s, all, 0));
45 // ... and a false return value means no parameter changes
46 CPPUNIT_ASSERT_EQUAL(canary, s);
47 // ... and a false return value means no input buffer changes
48 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
49
c9a4e310
FC
50 // successful prefix tokenization
51 CPPUNIT_ASSERT(t.prefix(s,alpha));
52 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
53 CPPUNIT_ASSERT(t.prefix(s,whitespace));
54 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
55
56 //no match (first char is not in the prefix set)
57 CPPUNIT_ASSERT(!t.prefix(s,whitespace));
58 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
59
60 // one more match to set S to something meaningful
61 CPPUNIT_ASSERT(t.prefix(s,alpha));
62 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
63
64 //no match (no characters from the character set in the prefix)
65 CPPUNIT_ASSERT(!t.prefix(s,tab));
66 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
67
68 // match until the end of the sample
c9a4e310
FC
69 CPPUNIT_ASSERT(t.prefix(s,all));
70 CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
1d53a60a
AR
71
72 // empty prefix should return false (the empty input buffer case)
73 s = canary;
74 CPPUNIT_ASSERT(!t.prefix(s, all));
75 // ... and a false return value means no parameter changes
76 CPPUNIT_ASSERT_EQUAL(canary, s);
c9a4e310
FC
77}
78
79void
80testTokenizer::testTokenizerSkip()
81{
82 Parser::Tokenizer t(text);
83 SBuf s;
84
85 // first scenario: patterns match
86 // prep for test
87 CPPUNIT_ASSERT(t.prefix(s,alpha));
88 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
89
1ab04517
AR
90 // test skipping one character from a character set
91 CPPUNIT_ASSERT(t.skipOne(whitespace));
c9a4e310
FC
92 // check that skip was right
93 CPPUNIT_ASSERT(t.prefix(s,alpha));
94 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
95
96 //check skip prefix
97 CPPUNIT_ASSERT(t.skip(SBuf("://")));
98 // verify
99 CPPUNIT_ASSERT(t.prefix(s,alpha));
100 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
101
102 // no skip
1ab04517 103 CPPUNIT_ASSERT(!t.skipOne(alpha));
c9a4e310
FC
104 CPPUNIT_ASSERT(!t.skip(SBuf("://")));
105 CPPUNIT_ASSERT(!t.skip('a'));
106
1ab04517
AR
107 // test skipping all characters from a character set while looking at .com
108 CPPUNIT_ASSERT(t.skip('.'));
109 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
110 CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
c9a4e310
FC
111}
112
113void
114testTokenizer::testTokenizerToken()
115{
116 Parser::Tokenizer t(text);
117 SBuf s;
118
119 // first scenario: patterns match
120 CPPUNIT_ASSERT(t.token(s,whitespace));
121 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
122 CPPUNIT_ASSERT(t.token(s,whitespace));
123 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
124 CPPUNIT_ASSERT(t.token(s,whitespace));
125 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
126 CPPUNIT_ASSERT(t.token(s,whitespace));
127 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
128
c9a4e310
FC
129}
130
c571034b
AR
131void
132testTokenizer::testTokenizerSuffix()
133{
134 const SBuf canary("This text should not be changed.");
135
136 Parser::Tokenizer t(text);
137 SBuf s;
138
139 CharacterSet all(whitespace);
140 all += alpha;
141 all += crlf;
142 all += numbers;
143 all.add(':').add('.').add('/');
144
145 // an empty suffix should return false (the full output buffer case)
146 s = canary;
147 const SBuf before = t.remaining();
148 CPPUNIT_ASSERT(!t.suffix(s, all, 0));
149 // ... and a false return value means no parameter changes
150 CPPUNIT_ASSERT_EQUAL(canary, s);
151 // ... and a false return value means no input buffer changes
152 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
153
154 // consume suffix until the last CRLF, including that last CRLF
155 SBuf::size_type remaining = t.remaining().length();
156 while (t.remaining().findLastOf(crlf) != SBuf::npos) {
157 CPPUNIT_ASSERT(t.remaining().length() > 0);
158 CPPUNIT_ASSERT(t.skipOneTrailing(all));
159 // ensure steady progress
160 CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
161 --remaining;
162 }
163
164 // no match (last char is not in the suffix set)
165 CPPUNIT_ASSERT(!t.suffix(s, crlf));
166 CPPUNIT_ASSERT(!t.suffix(s, whitespace));
167
168 // successful suffix tokenization
169 CPPUNIT_ASSERT(t.suffix(s, numbers));
170 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
171 CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
172 CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
173 CPPUNIT_ASSERT(t.suffix(s, alpha));
174 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
175 CPPUNIT_ASSERT(t.suffix(s, whitespace));
176 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
177
178 // match until the end of the sample
179 CPPUNIT_ASSERT(t.suffix(s, all));
180 CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
181
182 // an empty buffer does not end with a token
183 s = canary;
184 CPPUNIT_ASSERT(!t.suffix(s, all));
185 CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
186
187 // we cannot skip an empty suffix, even in an empty buffer
188 CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
189}
190
c9a4e310
FC
191void
192testTokenizer::testCharacterSet()
193{
194
195}
a56b469c
FC
196
197void
198testTokenizer::testTokenizerInt64()
199{
a56b469c
FC
200 // successful parse in base 10
201 {
e48aef3e 202 int64_t rv;
a56b469c
FC
203 Parser::Tokenizer t(SBuf("1234"));
204 const int64_t benchmark = 1234;
205 CPPUNIT_ASSERT(t.int64(rv, 10));
206 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
566c0b2b 207 CPPUNIT_ASSERT(t.buf().isEmpty());
a56b469c
FC
208 }
209
210 // successful parse, autodetect base
211 {
e48aef3e 212 int64_t rv;
a56b469c
FC
213 Parser::Tokenizer t(SBuf("1234"));
214 const int64_t benchmark = 1234;
215 CPPUNIT_ASSERT(t.int64(rv));
216 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
566c0b2b 217 CPPUNIT_ASSERT(t.buf().isEmpty());
a56b469c
FC
218 }
219
220 // successful parse, autodetect base
221 {
e48aef3e 222 int64_t rv;
a56b469c
FC
223 Parser::Tokenizer t(SBuf("01234"));
224 const int64_t benchmark = 01234;
225 CPPUNIT_ASSERT(t.int64(rv));
226 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
566c0b2b 227 CPPUNIT_ASSERT(t.buf().isEmpty());
a56b469c
FC
228 }
229
230 // successful parse, autodetect base
231 {
e48aef3e 232 int64_t rv;
a56b469c
FC
233 Parser::Tokenizer t(SBuf("0x12f4"));
234 const int64_t benchmark = 0x12f4;
235 CPPUNIT_ASSERT(t.int64(rv));
236 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
566c0b2b 237 CPPUNIT_ASSERT(t.buf().isEmpty());
a56b469c
FC
238 }
239
240 // API mismatch: don't eat leading space
241 {
e48aef3e 242 int64_t rv;
a56b469c
FC
243 Parser::Tokenizer t(SBuf(" 1234"));
244 CPPUNIT_ASSERT(!t.int64(rv));
566c0b2b 245 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
a56b469c
FC
246 }
247
248 // API mismatch: don't eat multiple leading spaces
249 {
e48aef3e 250 int64_t rv;
a56b469c
FC
251 Parser::Tokenizer t(SBuf(" 1234"));
252 CPPUNIT_ASSERT(!t.int64(rv));
566c0b2b 253 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
a56b469c
FC
254 }
255
256 // trailing spaces
257 {
e48aef3e 258 int64_t rv;
a56b469c
FC
259 Parser::Tokenizer t(SBuf("1234 foo"));
260 const int64_t benchmark = 1234;
261 CPPUNIT_ASSERT(t.int64(rv));
262 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
263 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
264 }
265
266 // trailing nonspaces
267 {
e48aef3e 268 int64_t rv;
a56b469c
FC
269 Parser::Tokenizer t(SBuf("1234foo"));
270 const int64_t benchmark = 1234;
271 CPPUNIT_ASSERT(t.int64(rv));
272 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
273 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
274 }
275
276 // trailing nonspaces
277 {
e48aef3e 278 int64_t rv;
a56b469c
FC
279 Parser::Tokenizer t(SBuf("0x1234foo"));
280 const int64_t benchmark = 0x1234f;
281 CPPUNIT_ASSERT(t.int64(rv));
282 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
283 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
284 }
e48aef3e
FC
285
286 // overflow
287 {
288 int64_t rv;
289 Parser::Tokenizer t(SBuf("1029397752385698678762234"));
290 CPPUNIT_ASSERT(!t.int64(rv));
566c0b2b 291 CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
e48aef3e 292 }
0f5e5bb3
AJ
293
294 // buffered sub-string parsing
295 {
296 int64_t rv;
297 SBuf base("1029397752385698678762234");
298 const int64_t benchmark = 22;
299 Parser::Tokenizer t(base.substr(base.length()-4,2));
300 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
301 CPPUNIT_ASSERT(t.int64(rv));
302 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
566c0b2b 303 CPPUNIT_ASSERT(t.buf().isEmpty());
0f5e5bb3 304 }
01f2137d
FC
305
306 // base-16, prefix
307 {
308 int64_t rv;
309 SBuf base("deadbeefrow");
310 const int64_t benchmark=0xdeadbeef;
311 Parser::Tokenizer t(base);
312 CPPUNIT_ASSERT(t.int64(rv,16));
313 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
314 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
315
316 }
a56b469c 317}
f53969cc 318