]> git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/testTokenizer.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / parser / testTokenizer.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 #include "parser/Tokenizer.h"
12 #include "testTokenizer.h"
13
14 CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer );
15
16 SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
17 "Host: resource.com\r\n"
18 "Cookie: laijkpk3422r j1noin \r\n"
19 "\r\n");
20 const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
21 const CharacterSet whitespace("whitespace"," \r\n");
22 const CharacterSet crlf("crlf","\r\n");
23 const CharacterSet tab("tab","\t");
24 const CharacterSet numbers("numbers","0123456789");
25
26 void
27 testTokenizer::testTokenizerPrefix()
28 {
29 const SBuf canary("This text should not be changed.");
30
31 Parser::Tokenizer t(text);
32 SBuf s;
33
34 CharacterSet all(whitespace);
35 all += alpha;
36 all += crlf;
37 all += numbers;
38 all.add(':').add('.').add('/');
39
40 // an empty prefix should return false (the full output buffer case)
41 s = canary;
42 const SBuf before = t.remaining();
43 CPPUNIT_ASSERT(!t.prefix(s, all, 0));
44 // ... and a false return value means no parameter changes
45 CPPUNIT_ASSERT_EQUAL(canary, s);
46 // ... and a false return value means no input buffer changes
47 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
48
49 // successful prefix tokenization
50 CPPUNIT_ASSERT(t.prefix(s,alpha));
51 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
52 CPPUNIT_ASSERT(t.prefix(s,whitespace));
53 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
54
55 //no match (first char is not in the prefix set)
56 CPPUNIT_ASSERT(!t.prefix(s,whitespace));
57 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
58
59 // one more match to set S to something meaningful
60 CPPUNIT_ASSERT(t.prefix(s,alpha));
61 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
62
63 //no match (no characters from the character set in the prefix)
64 CPPUNIT_ASSERT(!t.prefix(s,tab));
65 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
66
67 // match until the end of the sample
68 CPPUNIT_ASSERT(t.prefix(s,all));
69 CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
70
71 // empty prefix should return false (the empty input buffer case)
72 s = canary;
73 CPPUNIT_ASSERT(!t.prefix(s, all));
74 // ... and a false return value means no parameter changes
75 CPPUNIT_ASSERT_EQUAL(canary, s);
76 }
77
78 void
79 testTokenizer::testTokenizerSkip()
80 {
81 Parser::Tokenizer t(text);
82 SBuf s;
83
84 // first scenario: patterns match
85 // prep for test
86 CPPUNIT_ASSERT(t.prefix(s,alpha));
87 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
88
89 // test skipping one character from a character set
90 CPPUNIT_ASSERT(t.skipOne(whitespace));
91 // check that skip was right
92 CPPUNIT_ASSERT(t.prefix(s,alpha));
93 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
94
95 //check skip prefix
96 CPPUNIT_ASSERT(t.skip(SBuf("://")));
97 // verify
98 CPPUNIT_ASSERT(t.prefix(s,alpha));
99 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
100
101 // no skip
102 CPPUNIT_ASSERT(!t.skipOne(alpha));
103 CPPUNIT_ASSERT(!t.skip(SBuf("://")));
104 CPPUNIT_ASSERT(!t.skip('a'));
105
106 // test skipping all characters from a character set while looking at .com
107 CPPUNIT_ASSERT(t.skip('.'));
108 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
109 CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
110 }
111
112 void
113 testTokenizer::testTokenizerToken()
114 {
115 Parser::Tokenizer t(text);
116 SBuf s;
117
118 // first scenario: patterns match
119 CPPUNIT_ASSERT(t.token(s,whitespace));
120 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
121 CPPUNIT_ASSERT(t.token(s,whitespace));
122 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
123 CPPUNIT_ASSERT(t.token(s,whitespace));
124 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
125 CPPUNIT_ASSERT(t.token(s,whitespace));
126 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
127
128 }
129
130 void
131 testTokenizer::testCharacterSet()
132 {
133
134 }
135
136 void
137 testTokenizer::testTokenizerInt64()
138 {
139 // successful parse in base 10
140 {
141 int64_t rv;
142 Parser::Tokenizer t(SBuf("1234"));
143 const int64_t benchmark = 1234;
144 CPPUNIT_ASSERT(t.int64(rv, 10));
145 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
146 }
147
148 // successful parse, autodetect base
149 {
150 int64_t rv;
151 Parser::Tokenizer t(SBuf("1234"));
152 const int64_t benchmark = 1234;
153 CPPUNIT_ASSERT(t.int64(rv));
154 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
155 }
156
157 // successful parse, autodetect base
158 {
159 int64_t rv;
160 Parser::Tokenizer t(SBuf("01234"));
161 const int64_t benchmark = 01234;
162 CPPUNIT_ASSERT(t.int64(rv));
163 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
164 }
165
166 // successful parse, autodetect base
167 {
168 int64_t rv;
169 Parser::Tokenizer t(SBuf("0x12f4"));
170 const int64_t benchmark = 0x12f4;
171 CPPUNIT_ASSERT(t.int64(rv));
172 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
173 }
174
175 // API mismatch: don't eat leading space
176 {
177 int64_t rv;
178 Parser::Tokenizer t(SBuf(" 1234"));
179 CPPUNIT_ASSERT(!t.int64(rv));
180 }
181
182 // API mismatch: don't eat multiple leading spaces
183 {
184 int64_t rv;
185 Parser::Tokenizer t(SBuf(" 1234"));
186 CPPUNIT_ASSERT(!t.int64(rv));
187 }
188
189 // trailing spaces
190 {
191 int64_t rv;
192 Parser::Tokenizer t(SBuf("1234 foo"));
193 const int64_t benchmark = 1234;
194 CPPUNIT_ASSERT(t.int64(rv));
195 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
196 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
197 }
198
199 // trailing nonspaces
200 {
201 int64_t rv;
202 Parser::Tokenizer t(SBuf("1234foo"));
203 const int64_t benchmark = 1234;
204 CPPUNIT_ASSERT(t.int64(rv));
205 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
206 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
207 }
208
209 // trailing nonspaces
210 {
211 int64_t rv;
212 Parser::Tokenizer t(SBuf("0x1234foo"));
213 const int64_t benchmark = 0x1234f;
214 CPPUNIT_ASSERT(t.int64(rv));
215 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
216 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
217 }
218
219 // overflow
220 {
221 int64_t rv;
222 Parser::Tokenizer t(SBuf("1029397752385698678762234"));
223 CPPUNIT_ASSERT(!t.int64(rv));
224 }
225
226 // buffered sub-string parsing
227 {
228 int64_t rv;
229 SBuf base("1029397752385698678762234");
230 const int64_t benchmark = 22;
231 Parser::Tokenizer t(base.substr(base.length()-4,2));
232 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
233 CPPUNIT_ASSERT(t.int64(rv));
234 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
235 }
236
237 // base-16, prefix
238 {
239 int64_t rv;
240 SBuf base("deadbeefrow");
241 const int64_t benchmark=0xdeadbeef;
242 Parser::Tokenizer t(base);
243 CPPUNIT_ASSERT(t.int64(rv,16));
244 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
245 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
246
247 }
248 }
249