2 #include "base/TextException.h"
3 #include "ChunkedCodingParser.h"
8 ChunkedCodingParser::Step
ChunkedCodingParser::psChunkSize
= &ChunkedCodingParser::parseChunkSize
;
9 ChunkedCodingParser::Step
ChunkedCodingParser::psUnusedChunkExtension
= &ChunkedCodingParser::parseUnusedChunkExtension
;
10 ChunkedCodingParser::Step
ChunkedCodingParser::psLastChunkExtension
= &ChunkedCodingParser::parseLastChunkExtension
;
11 ChunkedCodingParser::Step
ChunkedCodingParser::psChunkBody
= &ChunkedCodingParser::parseChunkBody
;
12 ChunkedCodingParser::Step
ChunkedCodingParser::psChunkEnd
= &ChunkedCodingParser::parseChunkEnd
;
13 ChunkedCodingParser::Step
ChunkedCodingParser::psTrailer
= &ChunkedCodingParser::parseTrailer
;
14 ChunkedCodingParser::Step
ChunkedCodingParser::psMessageEnd
= &ChunkedCodingParser::parseMessageEnd
;
16 ChunkedCodingParser::ChunkedCodingParser()
21 void ChunkedCodingParser::reset()
23 theStep
= psChunkSize
;
24 theChunkSize
= theLeftBodySize
= 0;
25 doNeedMoreData
= false;
26 theIn
= theOut
= NULL
;
28 inQuoted
= inSlashed
= false;
31 bool ChunkedCodingParser::parse(MemBuf
*rawData
, MemBuf
*parsedContent
)
33 Must(rawData
&& parsedContent
);
35 theOut
= parsedContent
;
37 // we must reset this all the time so that mayContinue() lets us
38 // output more content if we stopped due to needsMoreSpace() before
39 doNeedMoreData
= !theIn
->hasContent();
41 while (mayContinue()) {
45 return theStep
== psMessageEnd
;
48 bool ChunkedCodingParser::needsMoreData() const
50 return doNeedMoreData
;
53 bool ChunkedCodingParser::needsMoreSpace() const
56 return theStep
== psChunkBody
&& !theOut
->hasPotentialSpace();
59 bool ChunkedCodingParser::mayContinue() const
61 return !needsMoreData() && !needsMoreSpace() && theStep
!= psMessageEnd
;
64 void ChunkedCodingParser::parseChunkSize()
66 Must(theChunkSize
<= 0); // Should(), really
68 const char *p
= theIn
->content();
69 while (p
< theIn
->space() && xisxdigit(*p
)) ++p
;
70 if (p
>= theIn
->space()) {
71 doNeedMoreData
= true;
76 if (StringToInt64(theIn
->content(), size
, &p
, 16)) {
78 throw TexcHere("negative chunk size");
80 theChunkSize
= theLeftBodySize
= size
;
81 debugs(94,7, "found chunk: " << theChunkSize
);
82 // parse chunk extensions only in the last-chunk
84 theStep
= psUnusedChunkExtension
;
86 theIn
->consume(p
- theIn
->content());
87 theStep
= psLastChunkExtension
;
90 throw TexcHere("corrupted chunk size");
93 void ChunkedCodingParser::parseUnusedChunkExtension()
97 if (findCrlf(crlfBeg
, crlfEnd
, inQuoted
, inSlashed
)) {
98 inQuoted
= inSlashed
= false;
99 theIn
->consume(crlfEnd
);
100 theStep
= theChunkSize
? psChunkBody
: psTrailer
;
102 theIn
->consume(theIn
->contentSize());
103 doNeedMoreData
= true;
107 void ChunkedCodingParser::parseChunkBody()
109 Must(theLeftBodySize
> 0); // Should, really
111 const size_t availSize
= min(theLeftBodySize
, (uint64_t)theIn
->contentSize());
112 const size_t safeSize
= min(availSize
, (size_t)theOut
->potentialSpaceSize());
114 doNeedMoreData
= availSize
< theLeftBodySize
;
115 // and we may also need more space
117 theOut
->append(theIn
->content(), safeSize
);
118 theIn
->consume(safeSize
);
119 theLeftBodySize
-= safeSize
;
121 if (theLeftBodySize
== 0)
122 theStep
= psChunkEnd
;
124 Must(needsMoreData() || needsMoreSpace());
127 void ChunkedCodingParser::parseChunkEnd()
129 Must(theLeftBodySize
== 0); // Should(), really
134 if (findCrlf(crlfBeg
, crlfEnd
)) {
136 throw TexcHere("found data between chunk end and CRLF");
140 theIn
->consume(crlfEnd
);
141 theChunkSize
= 0; // done with the current chunk
142 theStep
= psChunkSize
;
146 doNeedMoreData
= true;
149 void ChunkedCodingParser::parseTrailer()
151 Must(theChunkSize
== 0); // Should(), really
153 while (mayContinue())
154 parseTrailerHeader();
157 void ChunkedCodingParser::parseTrailerHeader()
162 if (findCrlf(crlfBeg
, crlfEnd
)) {
164 #if TRAILERS_ARE_SUPPORTED
166 theTrailer
.append(theIn
->content(), crlfEnd
);
169 theIn
->consume(crlfEnd
);
172 theStep
= psMessageEnd
;
177 doNeedMoreData
= true;
180 void ChunkedCodingParser::parseMessageEnd()
182 // termination step, should not be called
183 Must(false); // Should(), really
186 /// Finds next CRLF. Does not store parsing state.
187 bool ChunkedCodingParser::findCrlf(size_t &crlfBeg
, size_t &crlfEnd
)
190 bool slashed
= false;
191 return findCrlf(crlfBeg
, crlfEnd
, quoted
, slashed
);
194 /// Finds next CRLF. Parsing state stored in quoted and slashed
195 /// parameters. Incremental: can resume when more data is available.
196 bool ChunkedCodingParser::findCrlf(size_t &crlfBeg
, size_t &crlfEnd
, bool "ed
, bool &slashed
)
198 // XXX: This code was copied, with permission, from another software.
199 // There is a similar and probably better code inside httpHeaderParse
200 // but it seems difficult to isolate due to parsing-unrelated bloat.
201 // Such isolation should probably be done before this class is used
202 // for handling of traffic "more external" than ICAP.
204 const char *buf
= theIn
->content();
205 size_t size
= theIn
->contentSize();
209 for (size_t i
= 0; i
< size
; ++i
) {
215 const char c
= buf
[i
];
217 // handle quoted strings
225 } else if (c
== '"') {
231 if (crOff
< 0) { // looking for the first CR or LF
241 } else { // skipping CRs, looking for the first LF
257 // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
258 void ChunkedCodingParser::parseLastChunkExtension()
263 if (!findCrlf(crlfBeg
, crlfEnd
)) {
264 doNeedMoreData
= true;
268 const char *const startExt
= theIn
->content();
269 const char *const endExt
= theIn
->content() + crlfBeg
;
271 // chunk-extension starts at startExt and ends with LF at endEx
272 for (const char *p
= startExt
; p
< endExt
;) {
274 while (*p
== ' ' || *p
== '\t') ++p
; // skip spaces before ';'
276 if (*p
++ != ';') // each ext name=value pair is preceded with ';'
279 while (*p
== ' ' || *p
== '\t') ++p
; // skip spaces before name
282 break; // malformed extension: ';' without ext name=value pair
284 const int extSize
= endExt
- p
;
285 // TODO: we need debugData() stream manipulator to dump data
286 debugs(94,7, "Found chunk extension; size=" << extSize
);
288 // TODO: support implied *LWS around '='
289 if (extSize
> 18 && strncmp(p
, "use-original-body=", 18) == 0) {
290 (void)StringToInt64(p
+18, useOriginBody
, &p
, 10);
291 debugs(94, 3, HERE
<< "use-original-body=" << useOriginBody
);
292 break; // remove to support more than just use-original-body
294 debugs(94, 5, HERE
<< "skipping unknown chunk extension");
295 // TODO: support quoted-string chunk-ext-val
296 while (p
< endExt
&& *p
!= ';') ++p
; // skip until the next ';'
300 theIn
->consume(crlfEnd
);
301 theStep
= theChunkSize
? psChunkBody
: psTrailer
;