]>
Commit | Line | Data |
---|---|---|
bbc27441 | 1 | /* |
bde978a6 | 2 | * Copyright (C) 1996-2015 The Squid Software Foundation and contributors |
bbc27441 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
582c2af2 | 9 | #include "squid.h" |
3d93a84d | 10 | #include "base/TextException.h" |
774c051c | 11 | #include "ChunkedCodingParser.h" |
602d9612 | 12 | #include "Debug.h" |
774c051c | 13 | #include "MemBuf.h" |
602d9612 | 14 | #include "Parsing.h" |
774c051c | 15 | |
5c550f5f AR |
16 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize; |
17 | ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension; | |
18 | ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension; | |
774c051c | 19 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody; |
20 | ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd; | |
21 | ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer; | |
22 | ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd; | |
23 | ||
24 | ChunkedCodingParser::ChunkedCodingParser() | |
25 | { | |
26 | reset(); | |
27 | } | |
28 | ||
29 | void ChunkedCodingParser::reset() | |
30 | { | |
5c550f5f | 31 | theStep = psChunkSize; |
774c051c | 32 | theChunkSize = theLeftBodySize = 0; |
33 | doNeedMoreData = false; | |
774c051c | 34 | theIn = theOut = NULL; |
83c51da9 | 35 | useOriginBody = -1; |
5c550f5f | 36 | inQuoted = inSlashed = false; |
774c051c | 37 | } |
38 | ||
39 | bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent) | |
40 | { | |
41 | Must(rawData && parsedContent); | |
42 | theIn = rawData; | |
43 | theOut = parsedContent; | |
44 | ||
45 | // we must reset this all the time so that mayContinue() lets us | |
46 | // output more content if we stopped due to needsMoreSpace() before | |
47 | doNeedMoreData = !theIn->hasContent(); | |
48 | ||
49 | while (mayContinue()) { | |
50 | (this->*theStep)(); | |
51 | } | |
52 | ||
53 | return theStep == psMessageEnd; | |
54 | } | |
55 | ||
56 | bool ChunkedCodingParser::needsMoreData() const | |
57 | { | |
58 | return doNeedMoreData; | |
59 | } | |
60 | ||
61 | bool ChunkedCodingParser::needsMoreSpace() const | |
62 | { | |
63 | assert(theOut); | |
64 | return theStep == psChunkBody && !theOut->hasPotentialSpace(); | |
65 | } | |
66 | ||
67 | bool ChunkedCodingParser::mayContinue() const | |
68 | { | |
69 | return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd; | |
70 | } | |
71 | ||
5c550f5f | 72 | void ChunkedCodingParser::parseChunkSize() |
774c051c | 73 | { |
74 | Must(theChunkSize <= 0); // Should(), really | |
75 | ||
5c550f5f AR |
76 | const char *p = theIn->content(); |
77 | while (p < theIn->space() && xisxdigit(*p)) ++p; | |
78 | if (p >= theIn->space()) { | |
79 | doNeedMoreData = true; | |
80 | return; | |
81 | } | |
83c51da9 | 82 | |
5c550f5f AR |
83 | int64_t size = -1; |
84 | if (StringToInt64(theIn->content(), size, &p, 16)) { | |
85 | if (size < 0) | |
86 | throw TexcHere("negative chunk size"); | |
87 | ||
88 | theChunkSize = theLeftBodySize = size; | |
89 | debugs(94,7, "found chunk: " << theChunkSize); | |
90 | // parse chunk extensions only in the last-chunk | |
91 | if (theChunkSize) | |
92 | theStep = psUnusedChunkExtension; | |
93 | else { | |
94 | theIn->consume(p - theIn->content()); | |
95 | theStep = psLastChunkExtension; | |
774c051c | 96 | } |
5c550f5f | 97 | } else |
774c051c | 98 | throw TexcHere("corrupted chunk size"); |
5c550f5f | 99 | } |
774c051c | 100 | |
5c550f5f AR |
101 | void ChunkedCodingParser::parseUnusedChunkExtension() |
102 | { | |
103 | size_t crlfBeg = 0; | |
104 | size_t crlfEnd = 0; | |
105 | if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) { | |
106 | inQuoted = inSlashed = false; | |
107 | theIn->consume(crlfEnd); | |
108 | theStep = theChunkSize ? psChunkBody : psTrailer; | |
109 | } else { | |
110 | theIn->consume(theIn->contentSize()); | |
111 | doNeedMoreData = true; | |
112 | } | |
774c051c | 113 | } |
114 | ||
115 | void ChunkedCodingParser::parseChunkBody() | |
116 | { | |
117 | Must(theLeftBodySize > 0); // Should, really | |
118 | ||
d85c3078 AJ |
119 | const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize()); |
120 | const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize()); | |
774c051c | 121 | |
122 | doNeedMoreData = availSize < theLeftBodySize; | |
123 | // and we may also need more space | |
124 | ||
125 | theOut->append(theIn->content(), safeSize); | |
126 | theIn->consume(safeSize); | |
127 | theLeftBodySize -= safeSize; | |
128 | ||
129 | if (theLeftBodySize == 0) | |
130 | theStep = psChunkEnd; | |
131 | else | |
132 | Must(needsMoreData() || needsMoreSpace()); | |
133 | } | |
134 | ||
135 | void ChunkedCodingParser::parseChunkEnd() | |
136 | { | |
137 | Must(theLeftBodySize == 0); // Should(), really | |
138 | ||
139 | size_t crlfBeg = 0; | |
140 | size_t crlfEnd = 0; | |
141 | ||
142 | if (findCrlf(crlfBeg, crlfEnd)) { | |
143 | if (crlfBeg != 0) { | |
5e956603 | 144 | throw TexcHere("found data between chunk end and CRLF"); |
774c051c | 145 | return; |
146 | } | |
147 | ||
148 | theIn->consume(crlfEnd); | |
149 | theChunkSize = 0; // done with the current chunk | |
5c550f5f | 150 | theStep = psChunkSize; |
774c051c | 151 | return; |
152 | } | |
153 | ||
154 | doNeedMoreData = true; | |
155 | } | |
156 | ||
157 | void ChunkedCodingParser::parseTrailer() | |
158 | { | |
159 | Must(theChunkSize == 0); // Should(), really | |
160 | ||
161 | while (mayContinue()) | |
162 | parseTrailerHeader(); | |
163 | } | |
164 | ||
165 | void ChunkedCodingParser::parseTrailerHeader() | |
166 | { | |
167 | size_t crlfBeg = 0; | |
168 | size_t crlfEnd = 0; | |
169 | ||
170 | if (findCrlf(crlfBeg, crlfEnd)) { | |
774c051c | 171 | |
d7e8bdf7 AR |
172 | #if TRAILERS_ARE_SUPPORTED |
173 | if (crlfBeg > 0) | |
174 | theTrailer.append(theIn->content(), crlfEnd); | |
d8b258a9 | 175 | #endif |
774c051c | 176 | |
177 | theIn->consume(crlfEnd); | |
178 | ||
179 | if (crlfBeg == 0) | |
180 | theStep = psMessageEnd; | |
181 | ||
182 | return; | |
183 | } | |
184 | ||
185 | doNeedMoreData = true; | |
186 | } | |
187 | ||
188 | void ChunkedCodingParser::parseMessageEnd() | |
189 | { | |
190 | // termination step, should not be called | |
191 | Must(false); // Should(), really | |
192 | } | |
193 | ||
5c550f5f | 194 | /// Finds next CRLF. Does not store parsing state. |
774c051c | 195 | bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) |
5c550f5f AR |
196 | { |
197 | bool quoted = false; | |
198 | bool slashed = false; | |
199 | return findCrlf(crlfBeg, crlfEnd, quoted, slashed); | |
200 | } | |
201 | ||
202 | /// Finds next CRLF. Parsing state stored in quoted and slashed | |
203 | /// parameters. Incremental: can resume when more data is available. | |
204 | bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed) | |
774c051c | 205 | { |
206 | // XXX: This code was copied, with permission, from another software. | |
207 | // There is a similar and probably better code inside httpHeaderParse | |
208 | // but it seems difficult to isolate due to parsing-unrelated bloat. | |
209 | // Such isolation should probably be done before this class is used | |
210 | // for handling of traffic "more external" than ICAP. | |
211 | ||
212 | const char *buf = theIn->content(); | |
213 | size_t size = theIn->contentSize(); | |
214 | ||
215 | ssize_t crOff = -1; | |
774c051c | 216 | |
217 | for (size_t i = 0; i < size; ++i) { | |
218 | if (slashed) { | |
219 | slashed = false; | |
220 | continue; | |
221 | } | |
222 | ||
223 | const char c = buf[i]; | |
224 | ||
225 | // handle quoted strings | |
226 | if (quoted) { | |
227 | if (c == '\\') | |
228 | slashed = true; | |
e1381638 AJ |
229 | else if (c == '"') |
230 | quoted = false; | |
774c051c | 231 | |
232 | continue; | |
e1381638 AJ |
233 | } else if (c == '"') { |
234 | quoted = true; | |
235 | crOff = -1; | |
236 | continue; | |
237 | } | |
774c051c | 238 | |
239 | if (crOff < 0) { // looking for the first CR or LF | |
240 | ||
241 | if (c == '\n') { | |
242 | crlfBeg = i; | |
243 | crlfEnd = ++i; | |
244 | return true; | |
245 | } | |
246 | ||
247 | if (c == '\r') | |
248 | crOff = i; | |
249 | } else { // skipping CRs, looking for the first LF | |
250 | ||
251 | if (c == '\n') { | |
252 | crlfBeg = crOff; | |
253 | crlfEnd = ++i; | |
254 | return true; | |
255 | } | |
256 | ||
257 | if (c != '\r') | |
258 | crOff = -1; | |
259 | } | |
260 | } | |
261 | ||
262 | return false; | |
263 | } | |
264 | ||
83c51da9 | 265 | // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) |
5c550f5f | 266 | void ChunkedCodingParser::parseLastChunkExtension() |
83c51da9 | 267 | { |
5c550f5f AR |
268 | size_t crlfBeg = 0; |
269 | size_t crlfEnd = 0; | |
270 | ||
271 | if (!findCrlf(crlfBeg, crlfEnd)) { | |
272 | doNeedMoreData = true; | |
273 | return; | |
274 | } | |
275 | ||
276 | const char *const startExt = theIn->content(); | |
277 | const char *const endExt = theIn->content() + crlfBeg; | |
278 | ||
83c51da9 CT |
279 | // chunk-extension starts at startExt and ends with LF at endEx |
280 | for (const char *p = startExt; p < endExt;) { | |
281 | ||
282 | while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';' | |
7ddcfbab | 283 | |
83c51da9 | 284 | if (*p++ != ';') // each ext name=value pair is preceded with ';' |
5c550f5f | 285 | break; |
7ddcfbab | 286 | |
83c51da9 | 287 | while (*p == ' ' || *p == '\t') ++p; // skip spaces before name |
7ddcfbab | 288 | |
83c51da9 | 289 | if (p >= endExt) |
5c550f5f | 290 | break; // malformed extension: ';' without ext name=value pair |
83c51da9 CT |
291 | |
292 | const int extSize = endExt - p; | |
293 | // TODO: we need debugData() stream manipulator to dump data | |
294 | debugs(94,7, "Found chunk extension; size=" << extSize); | |
295 | ||
296 | // TODO: support implied *LWS around '=' | |
297 | if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) { | |
298 | (void)StringToInt64(p+18, useOriginBody, &p, 10); | |
299 | debugs(94, 3, HERE << "use-original-body=" << useOriginBody); | |
5c550f5f | 300 | break; // remove to support more than just use-original-body |
83c51da9 CT |
301 | } else { |
302 | debugs(94, 5, HERE << "skipping unknown chunk extension"); | |
303 | // TODO: support quoted-string chunk-ext-val | |
304 | while (p < endExt && *p != ';') ++p; // skip until the next ';' | |
305 | } | |
306 | } | |
5c550f5f AR |
307 | |
308 | theIn->consume(crlfEnd); | |
309 | theStep = theChunkSize ? psChunkBody : psTrailer; | |
83c51da9 | 310 | } |
f53969cc | 311 |