]> git.ipfire.org Git - thirdparty/squid.git/blob - src/ChunkedCodingParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / ChunkedCodingParser.cc
1 #include "squid.h"
2 #include "base/TextException.h"
3 #include "ChunkedCodingParser.h"
4 #include "Debug.h"
5 #include "MemBuf.h"
6 #include "Parsing.h"
7
8 ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
9 ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
10 ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
11 ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
12 ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
13 ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
14 ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
15
16 ChunkedCodingParser::ChunkedCodingParser()
17 {
18 reset();
19 }
20
21 void ChunkedCodingParser::reset()
22 {
23 theStep = psChunkSize;
24 theChunkSize = theLeftBodySize = 0;
25 doNeedMoreData = false;
26 theIn = theOut = NULL;
27 useOriginBody = -1;
28 inQuoted = inSlashed = false;
29 }
30
31 bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
32 {
33 Must(rawData && parsedContent);
34 theIn = rawData;
35 theOut = parsedContent;
36
37 // we must reset this all the time so that mayContinue() lets us
38 // output more content if we stopped due to needsMoreSpace() before
39 doNeedMoreData = !theIn->hasContent();
40
41 while (mayContinue()) {
42 (this->*theStep)();
43 }
44
45 return theStep == psMessageEnd;
46 }
47
48 bool ChunkedCodingParser::needsMoreData() const
49 {
50 return doNeedMoreData;
51 }
52
53 bool ChunkedCodingParser::needsMoreSpace() const
54 {
55 assert(theOut);
56 return theStep == psChunkBody && !theOut->hasPotentialSpace();
57 }
58
59 bool ChunkedCodingParser::mayContinue() const
60 {
61 return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
62 }
63
64 void ChunkedCodingParser::parseChunkSize()
65 {
66 Must(theChunkSize <= 0); // Should(), really
67
68 const char *p = theIn->content();
69 while (p < theIn->space() && xisxdigit(*p)) ++p;
70 if (p >= theIn->space()) {
71 doNeedMoreData = true;
72 return;
73 }
74
75 int64_t size = -1;
76 if (StringToInt64(theIn->content(), size, &p, 16)) {
77 if (size < 0)
78 throw TexcHere("negative chunk size");
79
80 theChunkSize = theLeftBodySize = size;
81 debugs(94,7, "found chunk: " << theChunkSize);
82 // parse chunk extensions only in the last-chunk
83 if (theChunkSize)
84 theStep = psUnusedChunkExtension;
85 else {
86 theIn->consume(p - theIn->content());
87 theStep = psLastChunkExtension;
88 }
89 } else
90 throw TexcHere("corrupted chunk size");
91 }
92
93 void ChunkedCodingParser::parseUnusedChunkExtension()
94 {
95 size_t crlfBeg = 0;
96 size_t crlfEnd = 0;
97 if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
98 inQuoted = inSlashed = false;
99 theIn->consume(crlfEnd);
100 theStep = theChunkSize ? psChunkBody : psTrailer;
101 } else {
102 theIn->consume(theIn->contentSize());
103 doNeedMoreData = true;
104 }
105 }
106
107 void ChunkedCodingParser::parseChunkBody()
108 {
109 Must(theLeftBodySize > 0); // Should, really
110
111 const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
112 const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
113
114 doNeedMoreData = availSize < theLeftBodySize;
115 // and we may also need more space
116
117 theOut->append(theIn->content(), safeSize);
118 theIn->consume(safeSize);
119 theLeftBodySize -= safeSize;
120
121 if (theLeftBodySize == 0)
122 theStep = psChunkEnd;
123 else
124 Must(needsMoreData() || needsMoreSpace());
125 }
126
127 void ChunkedCodingParser::parseChunkEnd()
128 {
129 Must(theLeftBodySize == 0); // Should(), really
130
131 size_t crlfBeg = 0;
132 size_t crlfEnd = 0;
133
134 if (findCrlf(crlfBeg, crlfEnd)) {
135 if (crlfBeg != 0) {
136 throw TexcHere("found data between chunk end and CRLF");
137 return;
138 }
139
140 theIn->consume(crlfEnd);
141 theChunkSize = 0; // done with the current chunk
142 theStep = psChunkSize;
143 return;
144 }
145
146 doNeedMoreData = true;
147 }
148
149 void ChunkedCodingParser::parseTrailer()
150 {
151 Must(theChunkSize == 0); // Should(), really
152
153 while (mayContinue())
154 parseTrailerHeader();
155 }
156
157 void ChunkedCodingParser::parseTrailerHeader()
158 {
159 size_t crlfBeg = 0;
160 size_t crlfEnd = 0;
161
162 if (findCrlf(crlfBeg, crlfEnd)) {
163
164 #if TRAILERS_ARE_SUPPORTED
165 if (crlfBeg > 0)
166 theTrailer.append(theIn->content(), crlfEnd);
167 #endif
168
169 theIn->consume(crlfEnd);
170
171 if (crlfBeg == 0)
172 theStep = psMessageEnd;
173
174 return;
175 }
176
177 doNeedMoreData = true;
178 }
179
180 void ChunkedCodingParser::parseMessageEnd()
181 {
182 // termination step, should not be called
183 Must(false); // Should(), really
184 }
185
186 /// Finds next CRLF. Does not store parsing state.
187 bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
188 {
189 bool quoted = false;
190 bool slashed = false;
191 return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
192 }
193
194 /// Finds next CRLF. Parsing state stored in quoted and slashed
195 /// parameters. Incremental: can resume when more data is available.
196 bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
197 {
198 // XXX: This code was copied, with permission, from another software.
199 // There is a similar and probably better code inside httpHeaderParse
200 // but it seems difficult to isolate due to parsing-unrelated bloat.
201 // Such isolation should probably be done before this class is used
202 // for handling of traffic "more external" than ICAP.
203
204 const char *buf = theIn->content();
205 size_t size = theIn->contentSize();
206
207 ssize_t crOff = -1;
208
209 for (size_t i = 0; i < size; ++i) {
210 if (slashed) {
211 slashed = false;
212 continue;
213 }
214
215 const char c = buf[i];
216
217 // handle quoted strings
218 if (quoted) {
219 if (c == '\\')
220 slashed = true;
221 else if (c == '"')
222 quoted = false;
223
224 continue;
225 } else if (c == '"') {
226 quoted = true;
227 crOff = -1;
228 continue;
229 }
230
231 if (crOff < 0) { // looking for the first CR or LF
232
233 if (c == '\n') {
234 crlfBeg = i;
235 crlfEnd = ++i;
236 return true;
237 }
238
239 if (c == '\r')
240 crOff = i;
241 } else { // skipping CRs, looking for the first LF
242
243 if (c == '\n') {
244 crlfBeg = crOff;
245 crlfEnd = ++i;
246 return true;
247 }
248
249 if (c != '\r')
250 crOff = -1;
251 }
252 }
253
254 return false;
255 }
256
257 // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
258 void ChunkedCodingParser::parseLastChunkExtension()
259 {
260 size_t crlfBeg = 0;
261 size_t crlfEnd = 0;
262
263 if (!findCrlf(crlfBeg, crlfEnd)) {
264 doNeedMoreData = true;
265 return;
266 }
267
268 const char *const startExt = theIn->content();
269 const char *const endExt = theIn->content() + crlfBeg;
270
271 // chunk-extension starts at startExt and ends with LF at endEx
272 for (const char *p = startExt; p < endExt;) {
273
274 while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'
275
276 if (*p++ != ';') // each ext name=value pair is preceded with ';'
277 break;
278
279 while (*p == ' ' || *p == '\t') ++p; // skip spaces before name
280
281 if (p >= endExt)
282 break; // malformed extension: ';' without ext name=value pair
283
284 const int extSize = endExt - p;
285 // TODO: we need debugData() stream manipulator to dump data
286 debugs(94,7, "Found chunk extension; size=" << extSize);
287
288 // TODO: support implied *LWS around '='
289 if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
290 (void)StringToInt64(p+18, useOriginBody, &p, 10);
291 debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
292 break; // remove to support more than just use-original-body
293 } else {
294 debugs(94, 5, HERE << "skipping unknown chunk extension");
295 // TODO: support quoted-string chunk-ext-val
296 while (p < endExt && *p != ';') ++p; // skip until the next ';'
297 }
298 }
299
300 theIn->consume(crlfEnd);
301 theStep = theChunkSize ? psChunkBody : psTrailer;
302 }