]>
Commit | Line | Data |
---|---|---|
582c2af2 | 1 | #include "squid.h" |
3d93a84d | 2 | #include "base/TextException.h" |
774c051c | 3 | #include "ChunkedCodingParser.h" |
602d9612 | 4 | #include "Debug.h" |
774c051c | 5 | #include "MemBuf.h" |
602d9612 | 6 | #include "Parsing.h" |
774c051c | 7 | |
5c550f5f AR |
8 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize; |
9 | ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension; | |
10 | ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension; | |
774c051c | 11 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody; |
12 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd; | |
13 | ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer; | |
14 | ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd; | |
15 | ||
16 | ChunkedCodingParser::ChunkedCodingParser() | |
17 | { | |
18 | reset(); | |
19 | } | |
20 | ||
21 | void ChunkedCodingParser::reset() | |
22 | { | |
5c550f5f | 23 | theStep = psChunkSize; |
774c051c | 24 | theChunkSize = theLeftBodySize = 0; |
25 | doNeedMoreData = false; | |
774c051c | 26 | theIn = theOut = NULL; |
83c51da9 | 27 | useOriginBody = -1; |
5c550f5f | 28 | inQuoted = inSlashed = false; |
774c051c | 29 | } |
30 | ||
31 | bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent) | |
32 | { | |
33 | Must(rawData && parsedContent); | |
34 | theIn = rawData; | |
35 | theOut = parsedContent; | |
36 | ||
37 | // we must reset this all the time so that mayContinue() lets us | |
38 | // output more content if we stopped due to needsMoreSpace() before | |
39 | doNeedMoreData = !theIn->hasContent(); | |
40 | ||
41 | while (mayContinue()) { | |
42 | (this->*theStep)(); | |
43 | } | |
44 | ||
45 | return theStep == psMessageEnd; | |
46 | } | |
47 | ||
48 | bool ChunkedCodingParser::needsMoreData() const | |
49 | { | |
50 | return doNeedMoreData; | |
51 | } | |
52 | ||
53 | bool ChunkedCodingParser::needsMoreSpace() const | |
54 | { | |
55 | assert(theOut); | |
56 | return theStep == psChunkBody && !theOut->hasPotentialSpace(); | |
57 | } | |
58 | ||
59 | bool ChunkedCodingParser::mayContinue() const | |
60 | { | |
61 | return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd; | |
62 | } | |
63 | ||
5c550f5f | 64 | void ChunkedCodingParser::parseChunkSize() |
774c051c | 65 | { |
66 | Must(theChunkSize <= 0); // Should(), really | |
67 | ||
5c550f5f AR |
68 | const char *p = theIn->content(); |
69 | while (p < theIn->space() && xisxdigit(*p)) ++p; | |
70 | if (p >= theIn->space()) { | |
71 | doNeedMoreData = true; | |
72 | return; | |
73 | } | |
83c51da9 | 74 | |
5c550f5f AR |
75 | int64_t size = -1; |
76 | if (StringToInt64(theIn->content(), size, &p, 16)) { | |
77 | if (size < 0) | |
78 | throw TexcHere("negative chunk size"); | |
79 | ||
80 | theChunkSize = theLeftBodySize = size; | |
81 | debugs(94,7, "found chunk: " << theChunkSize); | |
82 | // parse chunk extensions only in the last-chunk | |
83 | if (theChunkSize) | |
84 | theStep = psUnusedChunkExtension; | |
85 | else { | |
86 | theIn->consume(p - theIn->content()); | |
87 | theStep = psLastChunkExtension; | |
774c051c | 88 | } |
5c550f5f | 89 | } else |
774c051c | 90 | throw TexcHere("corrupted chunk size"); |
5c550f5f | 91 | } |
774c051c | 92 | |
5c550f5f AR |
93 | void ChunkedCodingParser::parseUnusedChunkExtension() |
94 | { | |
95 | size_t crlfBeg = 0; | |
96 | size_t crlfEnd = 0; | |
97 | if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) { | |
98 | inQuoted = inSlashed = false; | |
99 | theIn->consume(crlfEnd); | |
100 | theStep = theChunkSize ? psChunkBody : psTrailer; | |
101 | } else { | |
102 | theIn->consume(theIn->contentSize()); | |
103 | doNeedMoreData = true; | |
104 | } | |
774c051c | 105 | } |
106 | ||
107 | void ChunkedCodingParser::parseChunkBody() | |
108 | { | |
109 | Must(theLeftBodySize > 0); // Should, really | |
110 | ||
d85c3078 AJ |
111 | const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize()); |
112 | const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize()); | |
774c051c | 113 | |
114 | doNeedMoreData = availSize < theLeftBodySize; | |
115 | // and we may also need more space | |
116 | ||
117 | theOut->append(theIn->content(), safeSize); | |
118 | theIn->consume(safeSize); | |
119 | theLeftBodySize -= safeSize; | |
120 | ||
121 | if (theLeftBodySize == 0) | |
122 | theStep = psChunkEnd; | |
123 | else | |
124 | Must(needsMoreData() || needsMoreSpace()); | |
125 | } | |
126 | ||
127 | void ChunkedCodingParser::parseChunkEnd() | |
128 | { | |
129 | Must(theLeftBodySize == 0); // Should(), really | |
130 | ||
131 | size_t crlfBeg = 0; | |
132 | size_t crlfEnd = 0; | |
133 | ||
134 | if (findCrlf(crlfBeg, crlfEnd)) { | |
135 | if (crlfBeg != 0) { | |
5e956603 | 136 | throw TexcHere("found data between chunk end and CRLF"); |
774c051c | 137 | return; |
138 | } | |
139 | ||
140 | theIn->consume(crlfEnd); | |
141 | theChunkSize = 0; // done with the current chunk | |
5c550f5f | 142 | theStep = psChunkSize; |
774c051c | 143 | return; |
144 | } | |
145 | ||
146 | doNeedMoreData = true; | |
147 | } | |
148 | ||
149 | void ChunkedCodingParser::parseTrailer() | |
150 | { | |
151 | Must(theChunkSize == 0); // Should(), really | |
152 | ||
153 | while (mayContinue()) | |
154 | parseTrailerHeader(); | |
155 | } | |
156 | ||
157 | void ChunkedCodingParser::parseTrailerHeader() | |
158 | { | |
159 | size_t crlfBeg = 0; | |
160 | size_t crlfEnd = 0; | |
161 | ||
162 | if (findCrlf(crlfBeg, crlfEnd)) { | |
774c051c | 163 | |
d7e8bdf7 AR |
164 | #if TRAILERS_ARE_SUPPORTED |
165 | if (crlfBeg > 0) | |
166 | theTrailer.append(theIn->content(), crlfEnd); | |
d8b258a9 | 167 | #endif |
774c051c | 168 | |
169 | theIn->consume(crlfEnd); | |
170 | ||
171 | if (crlfBeg == 0) | |
172 | theStep = psMessageEnd; | |
173 | ||
174 | return; | |
175 | } | |
176 | ||
177 | doNeedMoreData = true; | |
178 | } | |
179 | ||
180 | void ChunkedCodingParser::parseMessageEnd() | |
181 | { | |
182 | // termination step, should not be called | |
183 | Must(false); // Should(), really | |
184 | } | |
185 | ||
5c550f5f | 186 | /// Finds next CRLF. Does not store parsing state. |
774c051c | 187 | bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) |
5c550f5f AR |
188 | { |
189 | bool quoted = false; | |
190 | bool slashed = false; | |
191 | return findCrlf(crlfBeg, crlfEnd, quoted, slashed); | |
192 | } | |
193 | ||
194 | /// Finds next CRLF. Parsing state stored in quoted and slashed | |
195 | /// parameters. Incremental: can resume when more data is available. | |
196 | bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed) | |
774c051c | 197 | { |
198 | // XXX: This code was copied, with permission, from another software. | |
199 | // There is a similar and probably better code inside httpHeaderParse | |
200 | // but it seems difficult to isolate due to parsing-unrelated bloat. | |
201 | // Such isolation should probably be done before this class is used | |
202 | // for handling of traffic "more external" than ICAP. | |
203 | ||
204 | const char *buf = theIn->content(); | |
205 | size_t size = theIn->contentSize(); | |
206 | ||
207 | ssize_t crOff = -1; | |
774c051c | 208 | |
209 | for (size_t i = 0; i < size; ++i) { | |
210 | if (slashed) { | |
211 | slashed = false; | |
212 | continue; | |
213 | } | |
214 | ||
215 | const char c = buf[i]; | |
216 | ||
217 | // handle quoted strings | |
218 | if (quoted) { | |
219 | if (c == '\\') | |
220 | slashed = true; | |
e1381638 AJ |
221 | else if (c == '"') |
222 | quoted = false; | |
774c051c | 223 | |
224 | continue; | |
e1381638 AJ |
225 | } else if (c == '"') { |
226 | quoted = true; | |
227 | crOff = -1; | |
228 | continue; | |
229 | } | |
774c051c | 230 | |
231 | if (crOff < 0) { // looking for the first CR or LF | |
232 | ||
233 | if (c == '\n') { | |
234 | crlfBeg = i; | |
235 | crlfEnd = ++i; | |
236 | return true; | |
237 | } | |
238 | ||
239 | if (c == '\r') | |
240 | crOff = i; | |
241 | } else { // skipping CRs, looking for the first LF | |
242 | ||
243 | if (c == '\n') { | |
244 | crlfBeg = crOff; | |
245 | crlfEnd = ++i; | |
246 | return true; | |
247 | } | |
248 | ||
249 | if (c != '\r') | |
250 | crOff = -1; | |
251 | } | |
252 | } | |
253 | ||
254 | return false; | |
255 | } | |
256 | ||
83c51da9 | 257 | // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) |
5c550f5f | 258 | void ChunkedCodingParser::parseLastChunkExtension() |
83c51da9 | 259 | { |
5c550f5f AR |
260 | size_t crlfBeg = 0; |
261 | size_t crlfEnd = 0; | |
262 | ||
263 | if (!findCrlf(crlfBeg, crlfEnd)) { | |
264 | doNeedMoreData = true; | |
265 | return; | |
266 | } | |
267 | ||
268 | const char *const startExt = theIn->content(); | |
269 | const char *const endExt = theIn->content() + crlfBeg; | |
270 | ||
83c51da9 CT |
271 | // chunk-extension starts at startExt and ends with LF at endEx |
272 | for (const char *p = startExt; p < endExt;) { | |
273 | ||
274 | while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';' | |
7ddcfbab | 275 | |
83c51da9 | 276 | if (*p++ != ';') // each ext name=value pair is preceded with ';' |
5c550f5f | 277 | break; |
7ddcfbab | 278 | |
83c51da9 | 279 | while (*p == ' ' || *p == '\t') ++p; // skip spaces before name |
7ddcfbab | 280 | |
83c51da9 | 281 | if (p >= endExt) |
5c550f5f | 282 | break; // malformed extension: ';' without ext name=value pair |
83c51da9 CT |
283 | |
284 | const int extSize = endExt - p; | |
285 | // TODO: we need debugData() stream manipulator to dump data | |
286 | debugs(94,7, "Found chunk extension; size=" << extSize); | |
287 | ||
288 | // TODO: support implied *LWS around '=' | |
289 | if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) { | |
290 | (void)StringToInt64(p+18, useOriginBody, &p, 10); | |
291 | debugs(94, 3, HERE << "use-original-body=" << useOriginBody); | |
5c550f5f | 292 | break; // remove to support more than just use-original-body |
83c51da9 CT |
293 | } else { |
294 | debugs(94, 5, HERE << "skipping unknown chunk extension"); | |
295 | // TODO: support quoted-string chunk-ext-val | |
296 | while (p < endExt && *p != ';') ++p; // skip until the next ';' | |
297 | } | |
298 | } | |
5c550f5f AR |
299 | |
300 | theIn->consume(crlfEnd); | |
301 | theStep = theChunkSize ? psChunkBody : psTrailer; | |
83c51da9 | 302 | } |