]>
Commit | Line | Data |
---|---|---|
c9a4e310 | 1 | #include "squid.h" |
c9a4e310 | 2 | #include "base/CharacterSet.h" |
5d4cfe02 AJ |
3 | #include "parser/Tokenizer.h" |
4 | #include "testTokenizer.h" | |
c9a4e310 FC |
5 | |
6 | CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); | |
7 | ||
8 | SBuf text("GET http://resource.com/path HTTP/1.1\r\n" | |
11bd4370 A |
9 | "Host: resource.com\r\n" |
10 | "Cookie: laijkpk3422r j1noin \r\n" | |
11 | "\r\n"); | |
c9a4e310 FC |
12 | const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); |
13 | const CharacterSet whitespace("whitespace"," \r\n"); | |
14 | const CharacterSet crlf("crlf","\r\n"); | |
15 | const CharacterSet tab("tab","\t"); | |
16 | const CharacterSet numbers("numbers","0123456789"); | |
17 | ||
18 | void | |
19 | testTokenizer::testTokenizerPrefix() | |
20 | { | |
21 | Parser::Tokenizer t(text); | |
22 | SBuf s; | |
23 | ||
24 | // successful prefix tokenization | |
25 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
26 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
27 | CPPUNIT_ASSERT(t.prefix(s,whitespace)); | |
28 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
29 | ||
30 | //no match (first char is not in the prefix set) | |
31 | CPPUNIT_ASSERT(!t.prefix(s,whitespace)); | |
32 | CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); | |
33 | ||
34 | // one more match to set S to something meaningful | |
35 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
36 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
37 | ||
38 | //no match (no characters from the character set in the prefix) | |
39 | CPPUNIT_ASSERT(!t.prefix(s,tab)); | |
40 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched | |
41 | ||
42 | // match until the end of the sample | |
43 | CharacterSet all(whitespace); | |
44 | all += alpha; | |
45 | all += crlf; | |
46 | all += numbers; | |
47 | all.add(':').add('.').add('/'); | |
48 | CPPUNIT_ASSERT(t.prefix(s,all)); | |
49 | CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); | |
50 | } | |
51 | ||
52 | void | |
53 | testTokenizer::testTokenizerSkip() | |
54 | { | |
55 | Parser::Tokenizer t(text); | |
56 | SBuf s; | |
57 | ||
58 | // first scenario: patterns match | |
59 | // prep for test | |
60 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
61 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
62 | ||
1ab04517 AR |
63 | // test skipping one character from a character set |
64 | CPPUNIT_ASSERT(t.skipOne(whitespace)); | |
c9a4e310 FC |
65 | // check that skip was right |
66 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
67 | CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); | |
68 | ||
69 | //check skip prefix | |
70 | CPPUNIT_ASSERT(t.skip(SBuf("://"))); | |
71 | // verify | |
72 | CPPUNIT_ASSERT(t.prefix(s,alpha)); | |
73 | CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s); | |
74 | ||
75 | // no skip | |
1ab04517 | 76 | CPPUNIT_ASSERT(!t.skipOne(alpha)); |
c9a4e310 FC |
77 | CPPUNIT_ASSERT(!t.skip(SBuf("://"))); |
78 | CPPUNIT_ASSERT(!t.skip('a')); | |
79 | ||
1ab04517 AR |
80 | // test skipping all characters from a character set while looking at .com |
81 | CPPUNIT_ASSERT(t.skip('.')); | |
82 | CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha)); | |
83 | CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path"))); | |
c9a4e310 FC |
84 | } |
85 | ||
86 | void | |
87 | testTokenizer::testTokenizerToken() | |
88 | { | |
89 | Parser::Tokenizer t(text); | |
90 | SBuf s; | |
91 | ||
92 | // first scenario: patterns match | |
93 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
94 | CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); | |
95 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
96 | CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); | |
97 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
98 | CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); | |
99 | CPPUNIT_ASSERT(t.token(s,whitespace)); | |
100 | CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); | |
101 | ||
c9a4e310 FC |
102 | } |
103 | ||
104 | void | |
105 | testTokenizer::testCharacterSet() | |
106 | { | |
107 | ||
108 | } | |
a56b469c FC |
109 | |
110 | void | |
111 | testTokenizer::testTokenizerInt64() | |
112 | { | |
a56b469c FC |
113 | // successful parse in base 10 |
114 | { | |
e48aef3e | 115 | int64_t rv; |
a56b469c FC |
116 | Parser::Tokenizer t(SBuf("1234")); |
117 | const int64_t benchmark = 1234; | |
118 | CPPUNIT_ASSERT(t.int64(rv, 10)); | |
119 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
120 | } | |
121 | ||
122 | // successful parse, autodetect base | |
123 | { | |
e48aef3e | 124 | int64_t rv; |
a56b469c FC |
125 | Parser::Tokenizer t(SBuf("1234")); |
126 | const int64_t benchmark = 1234; | |
127 | CPPUNIT_ASSERT(t.int64(rv)); | |
128 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
129 | } | |
130 | ||
131 | // successful parse, autodetect base | |
132 | { | |
e48aef3e | 133 | int64_t rv; |
a56b469c FC |
134 | Parser::Tokenizer t(SBuf("01234")); |
135 | const int64_t benchmark = 01234; | |
136 | CPPUNIT_ASSERT(t.int64(rv)); | |
137 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
138 | } | |
139 | ||
140 | // successful parse, autodetect base | |
141 | { | |
e48aef3e | 142 | int64_t rv; |
a56b469c FC |
143 | Parser::Tokenizer t(SBuf("0x12f4")); |
144 | const int64_t benchmark = 0x12f4; | |
145 | CPPUNIT_ASSERT(t.int64(rv)); | |
146 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
147 | } | |
148 | ||
149 | // API mismatch: don't eat leading space | |
150 | { | |
e48aef3e | 151 | int64_t rv; |
a56b469c FC |
152 | Parser::Tokenizer t(SBuf(" 1234")); |
153 | CPPUNIT_ASSERT(!t.int64(rv)); | |
154 | } | |
155 | ||
156 | // API mismatch: don't eat multiple leading spaces | |
157 | { | |
e48aef3e | 158 | int64_t rv; |
a56b469c FC |
159 | Parser::Tokenizer t(SBuf(" 1234")); |
160 | CPPUNIT_ASSERT(!t.int64(rv)); | |
161 | } | |
162 | ||
163 | // trailing spaces | |
164 | { | |
e48aef3e | 165 | int64_t rv; |
a56b469c FC |
166 | Parser::Tokenizer t(SBuf("1234 foo")); |
167 | const int64_t benchmark = 1234; | |
168 | CPPUNIT_ASSERT(t.int64(rv)); | |
169 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
170 | CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf()); | |
171 | } | |
172 | ||
173 | // trailing nonspaces | |
174 | { | |
e48aef3e | 175 | int64_t rv; |
a56b469c FC |
176 | Parser::Tokenizer t(SBuf("1234foo")); |
177 | const int64_t benchmark = 1234; | |
178 | CPPUNIT_ASSERT(t.int64(rv)); | |
179 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
180 | CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf()); | |
181 | } | |
182 | ||
183 | // trailing nonspaces | |
184 | { | |
e48aef3e | 185 | int64_t rv; |
a56b469c FC |
186 | Parser::Tokenizer t(SBuf("0x1234foo")); |
187 | const int64_t benchmark = 0x1234f; | |
188 | CPPUNIT_ASSERT(t.int64(rv)); | |
189 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
190 | CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf()); | |
191 | } | |
e48aef3e FC |
192 | |
193 | // overflow | |
194 | { | |
195 | int64_t rv; | |
196 | Parser::Tokenizer t(SBuf("1029397752385698678762234")); | |
197 | CPPUNIT_ASSERT(!t.int64(rv)); | |
198 | } | |
0f5e5bb3 AJ |
199 | |
200 | // buffered sub-string parsing | |
201 | { | |
202 | int64_t rv; | |
203 | SBuf base("1029397752385698678762234"); | |
204 | const int64_t benchmark = 22; | |
205 | Parser::Tokenizer t(base.substr(base.length()-4,2)); | |
206 | CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf()); | |
207 | CPPUNIT_ASSERT(t.int64(rv)); | |
208 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
209 | } | |
01f2137d FC |
210 | |
211 | // base-16, prefix | |
212 | { | |
213 | int64_t rv; | |
214 | SBuf base("deadbeefrow"); | |
215 | const int64_t benchmark=0xdeadbeef; | |
216 | Parser::Tokenizer t(base); | |
217 | CPPUNIT_ASSERT(t.int64(rv,16)); | |
218 | CPPUNIT_ASSERT_EQUAL(benchmark,rv); | |
219 | CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf()); | |
220 | ||
221 | } | |
a56b469c | 222 | } |