]>
Commit | Line | Data |
---|---|---|
bbc27441 | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
bbc27441 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c9a4e310 | 9 | #include "squid.h" |
c9a4e310 | 10 | #include "base/CharacterSet.h" |
5d4cfe02 | 11 | #include "parser/Tokenizer.h" |
37122e21 | 12 | #include "tests/testTokenizer.h" |
7f861c77 AJ |
13 | #include "unitTestMain.h" |
14 | ||
c9a4e310 FC |
15 | CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); |
16 | ||
17 | SBuf text("GET http://resource.com/path HTTP/1.1\r\n" | |
11bd4370 A |
18 | "Host: resource.com\r\n" |
19 | "Cookie: laijkpk3422r j1noin \r\n" | |
20 | "\r\n"); | |
c9a4e310 FC |
21 | const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); |
22 | const CharacterSet whitespace("whitespace"," \r\n"); | |
23 | const CharacterSet crlf("crlf","\r\n"); | |
24 | const CharacterSet tab("tab","\t"); | |
25 | const CharacterSet numbers("numbers","0123456789"); | |
26 | ||
27 | void | |
28 | testTokenizer::testTokenizerPrefix() | |
29 | { | |
1d53a60a AR |
30 | const SBuf canary("This text should not be changed."); |
31 | ||
c9a4e310 FC |
32 | Parser::Tokenizer t(text); |
33 | SBuf s; | |
34 | ||
1d53a60a AR |
35 | CharacterSet all(whitespace); |
36 | all += alpha; | |
37 | all += crlf; | |
38 | all += numbers; | |
39 | all.add(':').add('.').add('/'); | |
40 | ||
41 | // an empty prefix should return false (the full output buffer case) | |
42 | s = canary; | |
43 | const SBuf before = t.remaining(); | |
44 | CPPUNIT_ASSERT(!t.prefix(s, all, 0)); | |
45 | // ... and a false return value means no parameter changes | |
46 | CPPUNIT_ASSERT_EQUAL(canary, s); | |
47 | // ... and a false return value means no input buffer changes | |
48 | CPPUNIT_ASSERT_EQUAL(before, t.remaining()); | |
49 | ||
c9a4e310 FC |
50 | // successful prefix tokenization |
51 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
52 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
53 | CPPUNIT_ASSERT(t.prefix(s,whitespace)); | |
54 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
55 | ||
56 | //no match (first char is not in the prefix set) | |
57 | CPPUNIT_ASSERT(!t.prefix(s,whitespace)); | |
58 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
59 | ||
60 | // one more match to set S to something meaningful | |
61 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
62 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
63 | ||
64 | //no match (no characters from the character set in the prefix) | |
65 | CPPUNIT_ASSERT(!t.prefix(s,tab)); | |
66 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched | |
67 | ||
68 | // match until the end of the sample | |
c9a4e310 FC |
69 | CPPUNIT_ASSERT(t.prefix(s,all)); |
70 | CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); | |
1d53a60a AR |
71 | |
72 | // empty prefix should return false (the empty input buffer case) | |
73 | s = canary; | |
74 | CPPUNIT_ASSERT(!t.prefix(s, all)); | |
75 | // ... and a false return value means no parameter changes | |
76 | CPPUNIT_ASSERT_EQUAL(canary, s); | |
c9a4e310 FC |
77 | } |
78 | ||
79 | void | |
80 | testTokenizer::testTokenizerSkip() | |
81 | { | |
82 | Parser::Tokenizer t(text); | |
83 | SBuf s; | |
84 | ||
85 | // first scenario: patterns match | |
86 | // prep for test | |
87 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
88 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
89 | ||
1ab04517 AR |
90 | // test skipping one character from a character set |
91 | CPPUNIT_ASSERT(t.skipOne(whitespace)); | |
c9a4e310 FC |
92 | // check that skip was right |
93 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
94 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
95 | ||
96 | //check skip prefix | |
97 | CPPUNIT_ASSERT(t.skip(SBuf("://"))); | |
98 | // verify | |
99 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
100 | CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s); | |
101 | ||
102 | // no skip | |
1ab04517 | 103 | CPPUNIT_ASSERT(!t.skipOne(alpha)); |
c9a4e310 FC |
104 | CPPUNIT_ASSERT(!t.skip(SBuf("://"))); |
105 | CPPUNIT_ASSERT(!t.skip('a')); | |
106 | ||
1ab04517 AR |
107 | // test skipping all characters from a character set while looking at .com |
108 | CPPUNIT_ASSERT(t.skip('.')); | |
109 | CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha)); | |
110 | CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path"))); | |
c9a4e310 FC |
111 | } |
112 | ||
113 | void | |
114 | testTokenizer::testTokenizerToken() | |
115 | { | |
116 | Parser::Tokenizer t(text); | |
117 | SBuf s; | |
118 | ||
119 | // first scenario: patterns match | |
120 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
121 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
122 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
123 | CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); | |
124 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
125 | CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); | |
126 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
127 | CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); | |
128 | ||
c9a4e310 FC |
129 | } |
130 | ||
c571034b AR |
131 | void |
132 | testTokenizer::testTokenizerSuffix() | |
133 | { | |
134 | const SBuf canary("This text should not be changed."); | |
135 | ||
136 | Parser::Tokenizer t(text); | |
137 | SBuf s; | |
138 | ||
139 | CharacterSet all(whitespace); | |
140 | all += alpha; | |
141 | all += crlf; | |
142 | all += numbers; | |
143 | all.add(':').add('.').add('/'); | |
144 | ||
145 | // an empty suffix should return false (the full output buffer case) | |
146 | s = canary; | |
147 | const SBuf before = t.remaining(); | |
148 | CPPUNIT_ASSERT(!t.suffix(s, all, 0)); | |
149 | // ... and a false return value means no parameter changes | |
150 | CPPUNIT_ASSERT_EQUAL(canary, s); | |
151 | // ... and a false return value means no input buffer changes | |
152 | CPPUNIT_ASSERT_EQUAL(before, t.remaining()); | |
153 | ||
154 | // consume suffix until the last CRLF, including that last CRLF | |
155 | SBuf::size_type remaining = t.remaining().length(); | |
156 | while (t.remaining().findLastOf(crlf) != SBuf::npos) { | |
157 | CPPUNIT_ASSERT(t.remaining().length() > 0); | |
158 | CPPUNIT_ASSERT(t.skipOneTrailing(all)); | |
159 | // ensure steady progress | |
160 | CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1); | |
161 | --remaining; | |
162 | } | |
163 | ||
164 | // no match (last char is not in the suffix set) | |
165 | CPPUNIT_ASSERT(!t.suffix(s, crlf)); | |
166 | CPPUNIT_ASSERT(!t.suffix(s, whitespace)); | |
167 | ||
168 | // successful suffix tokenization | |
169 | CPPUNIT_ASSERT(t.suffix(s, numbers)); | |
170 | CPPUNIT_ASSERT_EQUAL(SBuf("1"), s); | |
171 | CPPUNIT_ASSERT(t.skipSuffix(SBuf("1."))); | |
172 | CPPUNIT_ASSERT(t.skipSuffix(SBuf("/"))); | |
173 | CPPUNIT_ASSERT(t.suffix(s, alpha)); | |
174 | CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s); | |
175 | CPPUNIT_ASSERT(t.suffix(s, whitespace)); | |
176 | CPPUNIT_ASSERT_EQUAL(SBuf(" "), s); | |
177 | ||
178 | // match until the end of the sample | |
179 | CPPUNIT_ASSERT(t.suffix(s, all)); | |
180 | CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining()); | |
181 | ||
182 | // an empty buffer does not end with a token | |
183 | s = canary; | |
184 | CPPUNIT_ASSERT(!t.suffix(s, all)); | |
185 | CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes | |
186 | ||
187 | // we cannot skip an empty suffix, even in an empty buffer | |
188 | CPPUNIT_ASSERT(!t.skipSuffix(SBuf())); | |
189 | } | |
190 | ||
c9a4e310 FC |
191 | void |
192 | testTokenizer::testCharacterSet() | |
193 | { | |
194 | ||
195 | } | |
a56b469c FC |
196 | |
197 | void | |
198 | testTokenizer::testTokenizerInt64() | |
199 | { | |
a56b469c FC |
200 | // successful parse in base 10 |
201 | { | |
e48aef3e | 202 | int64_t rv; |
a56b469c FC |
203 | Parser::Tokenizer t(SBuf("1234")); |
204 | const int64_t benchmark = 1234; | |
205 | CPPUNIT_ASSERT(t.int64(rv, 10)); | |
206 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
566c0b2b | 207 | CPPUNIT_ASSERT(t.buf().isEmpty()); |
a56b469c FC |
208 | } |
209 | ||
210 | // successful parse, autodetect base | |
211 | { | |
e48aef3e | 212 | int64_t rv; |
a56b469c FC |
213 | Parser::Tokenizer t(SBuf("1234")); |
214 | const int64_t benchmark = 1234; | |
215 | CPPUNIT_ASSERT(t.int64(rv)); | |
216 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
566c0b2b | 217 | CPPUNIT_ASSERT(t.buf().isEmpty()); |
a56b469c FC |
218 | } |
219 | ||
220 | // successful parse, autodetect base | |
221 | { | |
e48aef3e | 222 | int64_t rv; |
a56b469c FC |
223 | Parser::Tokenizer t(SBuf("01234")); |
224 | const int64_t benchmark = 01234; | |
225 | CPPUNIT_ASSERT(t.int64(rv)); | |
226 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
566c0b2b | 227 | CPPUNIT_ASSERT(t.buf().isEmpty()); |
a56b469c FC |
228 | } |
229 | ||
230 | // successful parse, autodetect base | |
231 | { | |
e48aef3e | 232 | int64_t rv; |
a56b469c FC |
233 | Parser::Tokenizer t(SBuf("0x12f4")); |
234 | const int64_t benchmark = 0x12f4; | |
235 | CPPUNIT_ASSERT(t.int64(rv)); | |
236 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
566c0b2b | 237 | CPPUNIT_ASSERT(t.buf().isEmpty()); |
a56b469c FC |
238 | } |
239 | ||
240 | // API mismatch: don't eat leading space | |
241 | { | |
e48aef3e | 242 | int64_t rv; |
a56b469c FC |
243 | Parser::Tokenizer t(SBuf(" 1234")); |
244 | CPPUNIT_ASSERT(!t.int64(rv)); | |
566c0b2b | 245 | CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf()); |
a56b469c FC |
246 | } |
247 | ||
248 | // API mismatch: don't eat multiple leading spaces | |
249 | { | |
e48aef3e | 250 | int64_t rv; |
a56b469c FC |
251 | Parser::Tokenizer t(SBuf(" 1234")); |
252 | CPPUNIT_ASSERT(!t.int64(rv)); | |
566c0b2b | 253 | CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf()); |
a56b469c FC |
254 | } |
255 | ||
256 | // trailing spaces | |
257 | { | |
e48aef3e | 258 | int64_t rv; |
a56b469c FC |
259 | Parser::Tokenizer t(SBuf("1234 foo")); |
260 | const int64_t benchmark = 1234; | |
261 | CPPUNIT_ASSERT(t.int64(rv)); | |
262 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
263 | CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf()); | |
264 | } | |
265 | ||
266 | // trailing nonspaces | |
267 | { | |
e48aef3e | 268 | int64_t rv; |
a56b469c FC |
269 | Parser::Tokenizer t(SBuf("1234foo")); |
270 | const int64_t benchmark = 1234; | |
271 | CPPUNIT_ASSERT(t.int64(rv)); | |
272 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
273 | CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf()); | |
274 | } | |
275 | ||
276 | // trailing nonspaces | |
277 | { | |
e48aef3e | 278 | int64_t rv; |
a56b469c FC |
279 | Parser::Tokenizer t(SBuf("0x1234foo")); |
280 | const int64_t benchmark = 0x1234f; | |
281 | CPPUNIT_ASSERT(t.int64(rv)); | |
282 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
283 | CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf()); | |
284 | } | |
e48aef3e FC |
285 | |
286 | // overflow | |
287 | { | |
288 | int64_t rv; | |
289 | Parser::Tokenizer t(SBuf("1029397752385698678762234")); | |
290 | CPPUNIT_ASSERT(!t.int64(rv)); | |
566c0b2b | 291 | CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf()); |
e48aef3e | 292 | } |
0f5e5bb3 AJ |
293 | |
294 | // buffered sub-string parsing | |
295 | { | |
296 | int64_t rv; | |
297 | SBuf base("1029397752385698678762234"); | |
298 | const int64_t benchmark = 22; | |
299 | Parser::Tokenizer t(base.substr(base.length()-4,2)); | |
300 | CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf()); | |
301 | CPPUNIT_ASSERT(t.int64(rv)); | |
302 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
566c0b2b | 303 | CPPUNIT_ASSERT(t.buf().isEmpty()); |
0f5e5bb3 | 304 | } |
01f2137d FC |
305 | |
306 | // base-16, prefix | |
307 | { | |
308 | int64_t rv; | |
309 | SBuf base("deadbeefrow"); | |
310 | const int64_t benchmark=0xdeadbeef; | |
311 | Parser::Tokenizer t(base); | |
312 | CPPUNIT_ASSERT(t.int64(rv,16)); | |
313 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
314 | CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf()); | |
315 | ||
316 | } | |
a56b469c | 317 | } |
f53969cc | 318 |