2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 #include "base/TextException.h"
12 #include "http/one/TeChunkedParser.h"
13 #include "http/one/Tokenizer.h"
14 #include "http/ProtocolVersion.h"
16 #include "parser/Tokenizer.h"
18 #include "sbuf/Stream.h"
19 #include "SquidConfig.h"
21 Http::One::TeChunkedParser::TeChunkedParser():
22 customExtensionValueParser(nullptr)
24 // chunked encoding only exists in HTTP/1.1
25 Http1::Parser::msgProtocol_
= Http::ProtocolVersion(1,1);
31 Http::One::TeChunkedParser::clear()
33 parsingStage_
= Http1::HTTP_PARSE_NONE
;
35 theChunkSize
= theLeftBodySize
= 0;
37 // XXX: We do not reset customExtensionValueParser here. Based on the
38 // clear() API description, we must, but it makes little sense and could
39 // break method callers if they appear because some of them may forget to
40 // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
41 // parent class and this unnecessary method with it.
45 Http::One::TeChunkedParser::parse(const SBuf
&aBuf
)
47 buf_
= aBuf
; // sync buffers first so calls to remaining() work properly if nothing done.
49 if (buf_
.isEmpty()) // nothing to do (yet)
52 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
54 Must(!buf_
.isEmpty() && theOut
);
56 if (parsingStage_
== Http1::HTTP_PARSE_NONE
)
57 parsingStage_
= Http1::HTTP_PARSE_CHUNK_SZ
;
61 // loop for as many chunks as we can
62 // use do-while instead of while so that we can incrementally
63 // restart in the middle of a chunk/frame
66 if (parsingStage_
== Http1::HTTP_PARSE_CHUNK_EXT
&& !parseChunkMetadataSuffix(tok
))
69 if (parsingStage_
== Http1::HTTP_PARSE_CHUNK
&& !parseChunkBody(tok
))
72 if (parsingStage_
== Http1::HTTP_PARSE_MIME
&& !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
75 // loop for as many chunks as we can
76 } while (parsingStage_
== Http1::HTTP_PARSE_CHUNK_SZ
&& parseChunkSize(tok
));
78 return !needsMoreData() && !needsMoreSpace();
82 Http::One::TeChunkedParser::needsMoreSpace() const
85 return parsingStage_
== Http1::HTTP_PARSE_CHUNK
&& !theOut
->hasPotentialSpace();
88 /// RFC 7230 section 4.1 chunk-size
90 Http::One::TeChunkedParser::parseChunkSize(Tokenizer
&tok
)
92 Must(theChunkSize
<= 0); // Should(), really
95 if (tok
.int64(size
, 16, false) && !tok
.atEnd()) {
97 throw TexcHere("negative chunk size");
99 theChunkSize
= theLeftBodySize
= size
;
100 debugs(94,7, "found chunk: " << theChunkSize
);
101 buf_
= tok
.remaining(); // parse checkpoint
102 parsingStage_
= Http1::HTTP_PARSE_CHUNK_EXT
;
105 } else if (tok
.atEnd()) {
106 return false; // need more data
110 throw TexcHere("corrupted chunk size");
111 return false; // should not be reachable
114 /// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
115 /// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
116 /// last-chunk = 1*"0" [ chunk-ext ] CRLF
118 Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer
&tok
)
120 // Code becomes much simpler when incremental parsing functions throw on
121 // bad or insufficient input, like in the code below. TODO: Expand up.
123 parseChunkExtensions(tok
); // a possibly empty chunk-ext list
124 skipLineTerminator(tok
);
125 buf_
= tok
.remaining();
126 parsingStage_
= theChunkSize
? Http1::HTTP_PARSE_CHUNK
: Http1::HTTP_PARSE_MIME
;
128 } catch (const InsufficientInput
&) {
129 tok
.reset(buf_
); // backtrack to the last commit point
132 // other exceptions bubble up to kill message parsing
135 /// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
136 /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
138 Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer
&tok
)
141 ParseBws(tok
); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
144 return; // reached the end of extensions (if any)
146 parseOneChunkExtension(tok
);
147 buf_
= tok
.remaining(); // got one extension
152 Http::One::ChunkExtensionValueParser::Ignore(Tokenizer
&tok
, const SBuf
&extName
)
154 const auto ignoredValue
= tokenOrQuotedString(tok
);
155 debugs(94, 5, extName
<< " with value " << ignoredValue
);
158 /// Parses a single chunk-ext list element:
159 /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
161 Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer
&tok
)
163 ParseBws(tok
); // Bug 4492: ICAP servers send SP before chunk-ext-name
165 const auto extName
= tok
.prefix("chunk-ext-name", CharacterSet::TCHAR
);
170 return; // parsed a valueless chunk-ext
174 // optimization: the only currently supported extension needs last-chunk
175 if (!theChunkSize
&& customExtensionValueParser
)
176 customExtensionValueParser
->parse(tok
, extName
);
178 ChunkExtensionValueParser::Ignore(tok
, extName
);
182 Http::One::TeChunkedParser::parseChunkBody(Tokenizer
&tok
)
184 if (theLeftBodySize
> 0) {
185 buf_
= tok
.remaining(); // sync buffers before buf_ use
187 // TODO fix type mismatches and casting for these
188 const size_t availSize
= min(theLeftBodySize
, (uint64_t)buf_
.length());
189 const size_t safeSize
= min(availSize
, (size_t)theOut
->potentialSpaceSize());
191 theOut
->append(buf_
.rawContent(), safeSize
);
192 buf_
.consume(safeSize
);
193 theLeftBodySize
-= safeSize
;
195 tok
.reset(buf_
); // sync buffers after consume()
198 if (theLeftBodySize
== 0)
199 return parseChunkEnd(tok
);
201 Must(needsMoreData() || needsMoreSpace());
207 Http::One::TeChunkedParser::parseChunkEnd(Tokenizer
&tok
)
209 Must(theLeftBodySize
== 0); // Should(), really
212 skipLineTerminator(tok
);
213 buf_
= tok
.remaining(); // parse checkpoint
214 theChunkSize
= 0; // done with the current chunk
215 parsingStage_
= Http1::HTTP_PARSE_CHUNK_SZ
;
218 catch (const InsufficientInput
&) {
221 // other exceptions bubble up to kill message parsing