]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/tests/testTokenizer.cc
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 #include "base/CharacterSet.h"
11 #include "parser/Tokenizer.h"
12 #include "tests/testTokenizer.h"
13 #include "unitTestMain.h"
15 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer
);
17 SBuf
text("GET http://resource.com/path HTTP/1.1\r\n"
18 "Host: resource.com\r\n"
19 "Cookie: laijkpk3422r j1noin \r\n"
21 const CharacterSet
alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22 const CharacterSet
whitespace("whitespace"," \r\n");
23 const CharacterSet
crlf("crlf","\r\n");
24 const CharacterSet
tab("tab","\t");
25 const CharacterSet
numbers("numbers","0123456789");
28 testTokenizer::testTokenizerPrefix()
30 const SBuf
canary("This text should not be changed.");
32 Parser::Tokenizer
t(text
);
35 CharacterSet
all(whitespace
);
39 all
.add(':').add('.').add('/');
41 // an empty prefix should return false (the full output buffer case)
43 const SBuf before
= t
.remaining();
44 CPPUNIT_ASSERT(!t
.prefix(s
, all
, 0));
45 // ... and a false return value means no parameter changes
46 CPPUNIT_ASSERT_EQUAL(canary
, s
);
47 // ... and a false return value means no input buffer changes
48 CPPUNIT_ASSERT_EQUAL(before
, t
.remaining());
50 // successful prefix tokenization
51 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
52 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
53 CPPUNIT_ASSERT(t
.prefix(s
,whitespace
));
54 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s
);
56 //no match (first char is not in the prefix set)
57 CPPUNIT_ASSERT(!t
.prefix(s
,whitespace
));
58 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s
);
60 // one more match to set S to something meaningful
61 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
62 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
);
64 //no match (no characters from the character set in the prefix)
65 CPPUNIT_ASSERT(!t
.prefix(s
,tab
));
66 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
); //output SBuf left untouched
68 // match until the end of the sample
69 CPPUNIT_ASSERT(t
.prefix(s
,all
));
70 CPPUNIT_ASSERT_EQUAL(SBuf(),t
.remaining());
72 // empty prefix should return false (the empty input buffer case)
74 CPPUNIT_ASSERT(!t
.prefix(s
, all
));
75 // ... and a false return value means no parameter changes
76 CPPUNIT_ASSERT_EQUAL(canary
, s
);
80 testTokenizer::testTokenizerSkip()
82 Parser::Tokenizer
t(text
);
85 // first scenario: patterns match
87 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
88 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
90 // test skipping one character from a character set
91 CPPUNIT_ASSERT(t
.skipOne(whitespace
));
92 // check that skip was right
93 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
94 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s
);
97 CPPUNIT_ASSERT(t
.skip(SBuf("://")));
99 CPPUNIT_ASSERT(t
.prefix(s
,alpha
));
100 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s
);
103 CPPUNIT_ASSERT(!t
.skipOne(alpha
));
104 CPPUNIT_ASSERT(!t
.skip(SBuf("://")));
105 CPPUNIT_ASSERT(!t
.skip('a'));
107 // test skipping all characters from a character set while looking at .com
108 CPPUNIT_ASSERT(t
.skip('.'));
109 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type
>(3), t
.skipAll(alpha
));
110 CPPUNIT_ASSERT(t
.remaining().startsWith(SBuf("/path")));
114 testTokenizer::testTokenizerToken()
116 Parser::Tokenizer
t(text
);
119 // first scenario: patterns match
120 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
121 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s
);
122 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
123 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s
);
124 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
125 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s
);
126 CPPUNIT_ASSERT(t
.token(s
,whitespace
));
127 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s
);
132 testTokenizer::testTokenizerSuffix()
134 const SBuf
canary("This text should not be changed.");
136 Parser::Tokenizer
t(text
);
139 CharacterSet
all(whitespace
);
143 all
.add(':').add('.').add('/');
145 // an empty suffix should return false (the full output buffer case)
147 const SBuf before
= t
.remaining();
148 CPPUNIT_ASSERT(!t
.suffix(s
, all
, 0));
149 // ... and a false return value means no parameter changes
150 CPPUNIT_ASSERT_EQUAL(canary
, s
);
151 // ... and a false return value means no input buffer changes
152 CPPUNIT_ASSERT_EQUAL(before
, t
.remaining());
154 // consume suffix until the last CRLF, including that last CRLF
155 SBuf::size_type remaining
= t
.remaining().length();
156 while (t
.remaining().findLastOf(crlf
) != SBuf::npos
) {
157 CPPUNIT_ASSERT(t
.remaining().length() > 0);
158 CPPUNIT_ASSERT(t
.skipOneTrailing(all
));
159 // ensure steady progress
160 CPPUNIT_ASSERT_EQUAL(remaining
, t
.remaining().length() + 1);
164 // no match (last char is not in the suffix set)
165 CPPUNIT_ASSERT(!t
.suffix(s
, crlf
));
166 CPPUNIT_ASSERT(!t
.suffix(s
, whitespace
));
168 // successful suffix tokenization
169 CPPUNIT_ASSERT(t
.suffix(s
, numbers
));
170 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s
);
171 CPPUNIT_ASSERT(t
.skipSuffix(SBuf("1.")));
172 CPPUNIT_ASSERT(t
.skipSuffix(SBuf("/")));
173 CPPUNIT_ASSERT(t
.suffix(s
, alpha
));
174 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s
);
175 CPPUNIT_ASSERT(t
.suffix(s
, whitespace
));
176 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s
);
178 // match until the end of the sample
179 CPPUNIT_ASSERT(t
.suffix(s
, all
));
180 CPPUNIT_ASSERT_EQUAL(SBuf(), t
.remaining());
182 // an empty buffer does not end with a token
184 CPPUNIT_ASSERT(!t
.suffix(s
, all
));
185 CPPUNIT_ASSERT_EQUAL(canary
, s
); // no parameter changes
187 // we cannot skip an empty suffix, even in an empty buffer
188 CPPUNIT_ASSERT(!t
.skipSuffix(SBuf()));
192 testTokenizer::testCharacterSet()
198 testTokenizer::testTokenizerInt64()
200 // successful parse in base 10
203 Parser::Tokenizer
t(SBuf("1234"));
204 const int64_t benchmark
= 1234;
205 CPPUNIT_ASSERT(t
.int64(rv
, 10));
206 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
209 // successful parse, autodetect base
212 Parser::Tokenizer
t(SBuf("1234"));
213 const int64_t benchmark
= 1234;
214 CPPUNIT_ASSERT(t
.int64(rv
));
215 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
218 // successful parse, autodetect base
221 Parser::Tokenizer
t(SBuf("01234"));
222 const int64_t benchmark
= 01234;
223 CPPUNIT_ASSERT(t
.int64(rv
));
224 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
227 // successful parse, autodetect base
230 Parser::Tokenizer
t(SBuf("0x12f4"));
231 const int64_t benchmark
= 0x12f4;
232 CPPUNIT_ASSERT(t
.int64(rv
));
233 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
236 // API mismatch: don't eat leading space
239 Parser::Tokenizer
t(SBuf(" 1234"));
240 CPPUNIT_ASSERT(!t
.int64(rv
));
243 // API mismatch: don't eat multiple leading spaces
246 Parser::Tokenizer
t(SBuf(" 1234"));
247 CPPUNIT_ASSERT(!t
.int64(rv
));
253 Parser::Tokenizer
t(SBuf("1234 foo"));
254 const int64_t benchmark
= 1234;
255 CPPUNIT_ASSERT(t
.int64(rv
));
256 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
257 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t
.buf());
260 // trailing nonspaces
263 Parser::Tokenizer
t(SBuf("1234foo"));
264 const int64_t benchmark
= 1234;
265 CPPUNIT_ASSERT(t
.int64(rv
));
266 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
267 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t
.buf());
270 // trailing nonspaces
273 Parser::Tokenizer
t(SBuf("0x1234foo"));
274 const int64_t benchmark
= 0x1234f;
275 CPPUNIT_ASSERT(t
.int64(rv
));
276 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
277 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t
.buf());
283 Parser::Tokenizer
t(SBuf("1029397752385698678762234"));
284 CPPUNIT_ASSERT(!t
.int64(rv
));
287 // buffered sub-string parsing
290 SBuf
base("1029397752385698678762234");
291 const int64_t benchmark
= 22;
292 Parser::Tokenizer
t(base
.substr(base
.length()-4,2));
293 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t
.buf());
294 CPPUNIT_ASSERT(t
.int64(rv
));
295 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
301 SBuf
base("deadbeefrow");
302 const int64_t benchmark
=0xdeadbeef;
303 Parser::Tokenizer
t(base
);
304 CPPUNIT_ASSERT(t
.int64(rv
,16));
305 CPPUNIT_ASSERT_EQUAL(benchmark
,rv
);
306 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t
.buf());