]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/tests/testTokenizer.cc
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 #include "base/CharacterSet.h"
11 #include "parser/Tokenizer.h"
12 #include "tests/testTokenizer.h"
13 #include "unitTestMain.h"
15 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer
);
17 SBuf
text("GET http://resource.com/path HTTP/1.1\r\n"
18 "Host: resource.com\r\n"
19 "Cookie: laijkpk3422r j1noin \r\n"
21 const CharacterSet
alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22 const CharacterSet
whitespace("whitespace"," \r\n");
23 const CharacterSet
crlf("crlf","\r\n");
24 const CharacterSet
tab("tab","\t");
25 const CharacterSet
numbers("numbers","0123456789");
28 testTokenizer::testTokenizerPrefix()
30 const SBuf
canary("This text should not be changed.");
32 Parser::Tokenizer
t(text
);
35 CharacterSet
all(whitespace
);
39 all
.add(':').add('.').add('/');
41 // an empty prefix should return false (the full output buffer case)
43 const SBuf before
= t
.remaining();
44 CPPUNIT_ASSERT(!t
.prefix(s
, all
, 0));
45 // ... and a false return value means no parameter changes
46 CPPUNIT_ASSERT_EQUAL(canary
, s
);
47 // ... and a false return value means no input buffer changes
48 CPPUNIT_ASSERT_EQUAL(before
, t
.remaining());
50 // successful prefix tokenization
51 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
52 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
53 CPPUNIT_ASSERT(t
.prefix(s
,whitespace
));
54 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s
);
56 //no match (first char is not in the prefix set)
57 CPPUNIT_ASSERT(!t
.prefix(s
,whitespace
));
58 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s
);
60 // one more match to set S to something meaningful
61 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
62 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
);
64 //no match (no characters from the character set in the prefix)
65 CPPUNIT_ASSERT(!t
.prefix(s
,tab
));
66 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
); //output SBuf left untouched
68 // match until the end of the sample
69 CPPUNIT_ASSERT(t
.prefix(s
,all
));
70 CPPUNIT_ASSERT_EQUAL(SBuf(),t
.remaining());
72 // empty prefix should return false (the empty input buffer case)
74 CPPUNIT_ASSERT(!t
.prefix(s
, all
));
75 // ... and a false return value means no parameter changes
76 CPPUNIT_ASSERT_EQUAL(canary
, s
);
80 testTokenizer::testTokenizerSkip()
82 Parser::Tokenizer
t(text
);
85 // first scenario: patterns match
87 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
88 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
90 // test skipping one character from a character set
91 CPPUNIT_ASSERT(t
.skipOne(whitespace
));
92 // check that skip was right
93 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
94 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
);
97 CPPUNIT_ASSERT(t
.skip(SBuf("://")));
99 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
100 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s
);
103 CPPUNIT_ASSERT(!t
.skipOne(alpha
));
104 CPPUNIT_ASSERT(!t
.skip(SBuf("://")));
105 CPPUNIT_ASSERT(!t
.skip('a'));
107 // test skipping all characters from a character set while looking at .com
108 CPPUNIT_ASSERT(t
.skip('.'));
109 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type
>(3), t
.skipAll(alpha
));
110 CPPUNIT_ASSERT(t
.remaining().startsWith(SBuf("/path")));
114 testTokenizer::testTokenizerToken()
116 Parser::Tokenizer
t(text
);
119 // first scenario: patterns match
120 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
121 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
122 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
123 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s
);
124 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
125 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s
);
126 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
127 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s
);
132 testTokenizer::testTokenizerSuffix()
134 const SBuf
canary("This text should not be changed.");
136 Parser::Tokenizer
t(text
);
139 CharacterSet
all(whitespace
);
143 all
.add(':').add('.').add('/');
145 // an empty suffix should return false (the full output buffer case)
147 const SBuf before
= t
.remaining();
148 CPPUNIT_ASSERT(!t
.suffix(s
, all
, 0));
149 // ... and a false return value means no parameter changes
150 CPPUNIT_ASSERT_EQUAL(canary
, s
);
151 // ... and a false return value means no input buffer changes
152 CPPUNIT_ASSERT_EQUAL(before
, t
.remaining());
154 // consume suffix until the last CRLF, including that last CRLF
155 SBuf::size_type remaining
= t
.remaining().length();
156 while (t
.remaining().findLastOf(crlf
) != SBuf::npos
) {
157 CPPUNIT_ASSERT(t
.remaining().length() > 0);
158 CPPUNIT_ASSERT(t
.skipOneTrailing(all
));
159 // ensure steady progress
160 CPPUNIT_ASSERT_EQUAL(remaining
, t
.remaining().length() + 1);
164 // no match (last char is not in the suffix set)
165 CPPUNIT_ASSERT(!t
.suffix(s
, crlf
));
166 CPPUNIT_ASSERT(!t
.suffix(s
, whitespace
));
168 // successful suffix tokenization
169 CPPUNIT_ASSERT(t
.suffix(s
, numbers
));
170 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s
);
171 CPPUNIT_ASSERT(t
.skipSuffix(SBuf("1.")));
172 CPPUNIT_ASSERT(t
.skipSuffix(SBuf("/")));
173 CPPUNIT_ASSERT(t
.suffix(s
, alpha
));
174 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s
);
175 CPPUNIT_ASSERT(t
.suffix(s
, whitespace
));
176 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s
);
178 // match until the end of the sample
179 CPPUNIT_ASSERT(t
.suffix(s
, all
));
180 CPPUNIT_ASSERT_EQUAL(SBuf(), t
.remaining());
182 // an empty buffer does not end with a token
184 CPPUNIT_ASSERT(!t
.suffix(s
, all
));
185 CPPUNIT_ASSERT_EQUAL(canary
, s
); // no parameter changes
187 // we cannot skip an empty suffix, even in an empty buffer
188 CPPUNIT_ASSERT(!t
.skipSuffix(SBuf()));
192 testTokenizer::testCharacterSet()
198 testTokenizer::testTokenizerInt64()
200 // successful parse in base 10
203 Parser::Tokenizer
t(SBuf("1234"));
204 const int64_t benchmark
= 1234;
205 CPPUNIT_ASSERT(t
.int64(rv
, 10));
206 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
207 CPPUNIT_ASSERT(t
.buf().isEmpty());
210 // successful parse, autodetect base
213 Parser::Tokenizer
t(SBuf("1234"));
214 const int64_t benchmark
= 1234;
215 CPPUNIT_ASSERT(t
.int64(rv
));
216 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
217 CPPUNIT_ASSERT(t
.buf().isEmpty());
220 // successful parse, autodetect base
223 Parser::Tokenizer
t(SBuf("01234"));
224 const int64_t benchmark
= 01234;
225 CPPUNIT_ASSERT(t
.int64(rv
));
226 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
227 CPPUNIT_ASSERT(t
.buf().isEmpty());
230 // successful parse, autodetect base
233 Parser::Tokenizer
t(SBuf("0x12f4"));
234 const int64_t benchmark
= 0x12f4;
235 CPPUNIT_ASSERT(t
.int64(rv
));
236 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
237 CPPUNIT_ASSERT(t
.buf().isEmpty());
240 // API mismatch: don't eat leading space
243 Parser::Tokenizer
t(SBuf(" 1234"));
244 CPPUNIT_ASSERT(!t
.int64(rv
));
245 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t
.buf());
248 // API mismatch: don't eat multiple leading spaces
251 Parser::Tokenizer
t(SBuf(" 1234"));
252 CPPUNIT_ASSERT(!t
.int64(rv
));
253 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t
.buf());
259 Parser::Tokenizer
t(SBuf("1234 foo"));
260 const int64_t benchmark
= 1234;
261 CPPUNIT_ASSERT(t
.int64(rv
));
262 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
263 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t
.buf());
266 // trailing nonspaces
269 Parser::Tokenizer
t(SBuf("1234foo"));
270 const int64_t benchmark
= 1234;
271 CPPUNIT_ASSERT(t
.int64(rv
));
272 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
273 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t
.buf());
276 // trailing nonspaces
279 Parser::Tokenizer
t(SBuf("0x1234foo"));
280 const int64_t benchmark
= 0x1234f;
281 CPPUNIT_ASSERT(t
.int64(rv
));
282 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
283 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t
.buf());
289 Parser::Tokenizer
t(SBuf("1029397752385698678762234"));
290 CPPUNIT_ASSERT(!t
.int64(rv
));
291 CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t
.buf());
294 // buffered sub-string parsing
297 SBuf
base("1029397752385698678762234");
298 const int64_t benchmark
= 22;
299 Parser::Tokenizer
t(base
.substr(base
.length()-4,2));
300 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t
.buf());
301 CPPUNIT_ASSERT(t
.int64(rv
));
302 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
303 CPPUNIT_ASSERT(t
.buf().isEmpty());
309 SBuf
base("deadbeefrow");
310 const int64_t benchmark
=0xdeadbeef;
311 Parser::Tokenizer
t(base
);
312 CPPUNIT_ASSERT(t
.int64(rv
,16));
313 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
314 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t
.buf());