]>
Commit | Line | Data |
---|---|---|
bbc27441 AJ |
1 | /* |
2 | * Copyright (C) 1996-2014 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c9a4e310 | 9 | #include "squid.h" |
c9a4e310 | 10 | #include "base/CharacterSet.h" |
5d4cfe02 AJ |
11 | #include "parser/Tokenizer.h" |
12 | #include "testTokenizer.h" | |
c9a4e310 FC |
13 | |
14 | CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); | |
15 | ||
16 | SBuf text("GET http://resource.com/path HTTP/1.1\r\n" | |
11bd4370 A |
17 | "Host: resource.com\r\n" |
18 | "Cookie: laijkpk3422r j1noin \r\n" | |
19 | "\r\n"); | |
c9a4e310 FC |
20 | const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); |
21 | const CharacterSet whitespace("whitespace"," \r\n"); | |
22 | const CharacterSet crlf("crlf","\r\n"); | |
23 | const CharacterSet tab("tab","\t"); | |
24 | const CharacterSet numbers("numbers","0123456789"); | |
25 | ||
26 | void | |
27 | testTokenizer::testTokenizerPrefix() | |
28 | { | |
1d53a60a AR |
29 | const SBuf canary("This text should not be changed."); |
30 | ||
c9a4e310 FC |
31 | Parser::Tokenizer t(text); |
32 | SBuf s; | |
33 | ||
1d53a60a AR |
34 | CharacterSet all(whitespace); |
35 | all += alpha; | |
36 | all += crlf; | |
37 | all += numbers; | |
38 | all.add(':').add('.').add('/'); | |
39 | ||
40 | // an empty prefix should return false (the full output buffer case) | |
41 | s = canary; | |
42 | const SBuf before = t.remaining(); | |
43 | CPPUNIT_ASSERT(!t.prefix(s, all, 0)); | |
44 | // ... and a false return value means no parameter changes | |
45 | CPPUNIT_ASSERT_EQUAL(canary, s); | |
46 | // ... and a false return value means no input buffer changes | |
47 | CPPUNIT_ASSERT_EQUAL(before, t.remaining()); | |
48 | ||
c9a4e310 FC |
49 | // successful prefix tokenization |
50 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
51 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
52 | CPPUNIT_ASSERT(t.prefix(s,whitespace)); | |
53 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
54 | ||
55 | //no match (first char is not in the prefix set) | |
56 | CPPUNIT_ASSERT(!t.prefix(s,whitespace)); | |
57 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
58 | ||
59 | // one more match to set S to something meaningful | |
60 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
61 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
62 | ||
63 | //no match (no characters from the character set in the prefix) | |
64 | CPPUNIT_ASSERT(!t.prefix(s,tab)); | |
65 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched | |
66 | ||
67 | // match until the end of the sample | |
c9a4e310 FC |
68 | CPPUNIT_ASSERT(t.prefix(s,all)); |
69 | CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); | |
1d53a60a AR |
70 | |
71 | // empty prefix should return false (the empty input buffer case) | |
72 | s = canary; | |
73 | CPPUNIT_ASSERT(!t.prefix(s, all)); | |
74 | // ... and a false return value means no parameter changes | |
75 | CPPUNIT_ASSERT_EQUAL(canary, s); | |
c9a4e310 FC |
76 | } |
77 | ||
78 | void | |
79 | testTokenizer::testTokenizerSkip() | |
80 | { | |
81 | Parser::Tokenizer t(text); | |
82 | SBuf s; | |
83 | ||
84 | // first scenario: patterns match | |
85 | // prep for test | |
86 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
87 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
88 | ||
1ab04517 AR |
89 | // test skipping one character from a character set |
90 | CPPUNIT_ASSERT(t.skipOne(whitespace)); | |
c9a4e310 FC |
91 | // check that skip was right |
92 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
93 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
94 | ||
95 | //check skip prefix | |
96 | CPPUNIT_ASSERT(t.skip(SBuf("://"))); | |
97 | // verify | |
98 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
99 | CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s); | |
100 | ||
101 | // no skip | |
1ab04517 | 102 | CPPUNIT_ASSERT(!t.skipOne(alpha)); |
c9a4e310 FC |
103 | CPPUNIT_ASSERT(!t.skip(SBuf("://"))); |
104 | CPPUNIT_ASSERT(!t.skip('a')); | |
105 | ||
1ab04517 AR |
106 | // test skipping all characters from a character set while looking at .com |
107 | CPPUNIT_ASSERT(t.skip('.')); | |
108 | CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha)); | |
109 | CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path"))); | |
c9a4e310 FC |
110 | } |
111 | ||
112 | void | |
113 | testTokenizer::testTokenizerToken() | |
114 | { | |
115 | Parser::Tokenizer t(text); | |
116 | SBuf s; | |
117 | ||
118 | // first scenario: patterns match | |
119 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
120 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
121 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
122 | CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); | |
123 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
124 | CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); | |
125 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
126 | CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); | |
127 | ||
c9a4e310 FC |
128 | } |
129 | ||
130 | void | |
131 | testTokenizer::testCharacterSet() | |
132 | { | |
133 | ||
134 | } | |
a56b469c FC |
135 | |
136 | void | |
137 | testTokenizer::testTokenizerInt64() | |
138 | { | |
a56b469c FC |
139 | // successful parse in base 10 |
140 | { | |
e48aef3e | 141 | int64_t rv; |
a56b469c FC |
142 | Parser::Tokenizer t(SBuf("1234")); |
143 | const int64_t benchmark = 1234; | |
144 | CPPUNIT_ASSERT(t.int64(rv, 10)); | |
145 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
146 | } | |
147 | ||
148 | // successful parse, autodetect base | |
149 | { | |
e48aef3e | 150 | int64_t rv; |
a56b469c FC |
151 | Parser::Tokenizer t(SBuf("1234")); |
152 | const int64_t benchmark = 1234; | |
153 | CPPUNIT_ASSERT(t.int64(rv)); | |
154 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
155 | } | |
156 | ||
157 | // successful parse, autodetect base | |
158 | { | |
e48aef3e | 159 | int64_t rv; |
a56b469c FC |
160 | Parser::Tokenizer t(SBuf("01234")); |
161 | const int64_t benchmark = 01234; | |
162 | CPPUNIT_ASSERT(t.int64(rv)); | |
163 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
164 | } | |
165 | ||
166 | // successful parse, autodetect base | |
167 | { | |
e48aef3e | 168 | int64_t rv; |
a56b469c FC |
169 | Parser::Tokenizer t(SBuf("0x12f4")); |
170 | const int64_t benchmark = 0x12f4; | |
171 | CPPUNIT_ASSERT(t.int64(rv)); | |
172 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
173 | } | |
174 | ||
175 | // API mismatch: don't eat leading space | |
176 | { | |
e48aef3e | 177 | int64_t rv; |
a56b469c FC |
178 | Parser::Tokenizer t(SBuf(" 1234")); |
179 | CPPUNIT_ASSERT(!t.int64(rv)); | |
180 | } | |
181 | ||
182 | // API mismatch: don't eat multiple leading spaces | |
183 | { | |
e48aef3e | 184 | int64_t rv; |
a56b469c FC |
185 | Parser::Tokenizer t(SBuf(" 1234")); |
186 | CPPUNIT_ASSERT(!t.int64(rv)); | |
187 | } | |
188 | ||
189 | // trailing spaces | |
190 | { | |
e48aef3e | 191 | int64_t rv; |
a56b469c FC |
192 | Parser::Tokenizer t(SBuf("1234 foo")); |
193 | const int64_t benchmark = 1234; | |
194 | CPPUNIT_ASSERT(t.int64(rv)); | |
195 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
196 | CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf()); | |
197 | } | |
198 | ||
199 | // trailing nonspaces | |
200 | { | |
e48aef3e | 201 | int64_t rv; |
a56b469c FC |
202 | Parser::Tokenizer t(SBuf("1234foo")); |
203 | const int64_t benchmark = 1234; | |
204 | CPPUNIT_ASSERT(t.int64(rv)); | |
205 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
206 | CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf()); | |
207 | } | |
208 | ||
209 | // trailing nonspaces | |
210 | { | |
e48aef3e | 211 | int64_t rv; |
a56b469c FC |
212 | Parser::Tokenizer t(SBuf("0x1234foo")); |
213 | const int64_t benchmark = 0x1234f; | |
214 | CPPUNIT_ASSERT(t.int64(rv)); | |
215 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
216 | CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf()); | |
217 | } | |
e48aef3e FC |
218 | |
219 | // overflow | |
220 | { | |
221 | int64_t rv; | |
222 | Parser::Tokenizer t(SBuf("1029397752385698678762234")); | |
223 | CPPUNIT_ASSERT(!t.int64(rv)); | |
224 | } | |
0f5e5bb3 AJ |
225 | |
226 | // buffered sub-string parsing | |
227 | { | |
228 | int64_t rv; | |
229 | SBuf base("1029397752385698678762234"); | |
230 | const int64_t benchmark = 22; | |
231 | Parser::Tokenizer t(base.substr(base.length()-4,2)); | |
232 | CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf()); | |
233 | CPPUNIT_ASSERT(t.int64(rv)); | |
234 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
235 | } | |
01f2137d FC |
236 | |
237 | // base-16, prefix | |
238 | { | |
239 | int64_t rv; | |
240 | SBuf base("deadbeefrow"); | |
241 | const int64_t benchmark=0xdeadbeef; | |
242 | Parser::Tokenizer t(base); | |
243 | CPPUNIT_ASSERT(t.int64(rv,16)); | |
244 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
245 | CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf()); | |
246 | ||
247 | } | |
a56b469c | 248 | } |