]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/TeChunkedParser.cc
Source Format Enforcement (#532)
[thirdparty/squid.git] / src / http / one / TeChunkedParser.cc
1 /*
2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "base/TextException.h"
11 #include "Debug.h"
12 #include "http/one/TeChunkedParser.h"
13 #include "http/one/Tokenizer.h"
14 #include "http/ProtocolVersion.h"
15 #include "MemBuf.h"
16 #include "parser/Tokenizer.h"
17 #include "Parsing.h"
18 #include "sbuf/Stream.h"
19 #include "SquidConfig.h"
20
21 Http::One::TeChunkedParser::TeChunkedParser():
22 customExtensionValueParser(nullptr)
23 {
24 // chunked encoding only exists in HTTP/1.1
25 Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
26
27 clear();
28 }
29
30 void
31 Http::One::TeChunkedParser::clear()
32 {
33 parsingStage_ = Http1::HTTP_PARSE_NONE;
34 buf_.clear();
35 theChunkSize = theLeftBodySize = 0;
36 theOut = NULL;
37 // XXX: We do not reset customExtensionValueParser here. Based on the
38 // clear() API description, we must, but it makes little sense and could
39 // break method callers if they appear because some of them may forget to
40 // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
41 // parent class and this unnecessary method with it.
42 }
43
44 bool
45 Http::One::TeChunkedParser::parse(const SBuf &aBuf)
46 {
47 buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
48
49 if (buf_.isEmpty()) // nothing to do (yet)
50 return false;
51
52 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
53
54 Must(!buf_.isEmpty() && theOut);
55
56 if (parsingStage_ == Http1::HTTP_PARSE_NONE)
57 parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
58
59 Tokenizer tok(buf_);
60
61 // loop for as many chunks as we can
62 // use do-while instead of while so that we can incrementally
63 // restart in the middle of a chunk/frame
64 do {
65
66 if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
67 return false;
68
69 if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
70 return false;
71
72 if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
73 return false;
74
75 // loop for as many chunks as we can
76 } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
77
78 return !needsMoreData() && !needsMoreSpace();
79 }
80
81 bool
82 Http::One::TeChunkedParser::needsMoreSpace() const
83 {
84 assert(theOut);
85 return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
86 }
87
88 /// RFC 7230 section 4.1 chunk-size
89 bool
90 Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
91 {
92 Must(theChunkSize <= 0); // Should(), really
93
94 int64_t size = -1;
95 if (tok.int64(size, 16, false) && !tok.atEnd()) {
96 if (size < 0)
97 throw TexcHere("negative chunk size");
98
99 theChunkSize = theLeftBodySize = size;
100 debugs(94,7, "found chunk: " << theChunkSize);
101 buf_ = tok.remaining(); // parse checkpoint
102 parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
103 return true;
104
105 } else if (tok.atEnd()) {
106 return false; // need more data
107 }
108
109 // else error
110 throw TexcHere("corrupted chunk size");
111 return false; // should not be reachable
112 }
113
114 /// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
115 /// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
116 /// last-chunk = 1*"0" [ chunk-ext ] CRLF
117 bool
118 Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
119 {
120 // Code becomes much simpler when incremental parsing functions throw on
121 // bad or insufficient input, like in the code below. TODO: Expand up.
122 try {
123 parseChunkExtensions(tok); // a possibly empty chunk-ext list
124 skipLineTerminator(tok);
125 buf_ = tok.remaining();
126 parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
127 return true;
128 } catch (const InsufficientInput &) {
129 tok.reset(buf_); // backtrack to the last commit point
130 return false;
131 }
132 // other exceptions bubble up to kill message parsing
133 }
134
135 /// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
136 /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
137 void
138 Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
139 {
140 do {
141 ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
142
143 if (!tok.skip(';'))
144 return; // reached the end of extensions (if any)
145
146 parseOneChunkExtension(tok);
147 buf_ = tok.remaining(); // got one extension
148 } while (true);
149 }
150
151 void
152 Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
153 {
154 const auto ignoredValue = tokenOrQuotedString(tok);
155 debugs(94, 5, extName << " with value " << ignoredValue);
156 }
157
158 /// Parses a single chunk-ext list element:
159 /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
160 void
161 Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
162 {
163 ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
164
165 const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
166
167 ParseBws(tok);
168
169 if (!tok.skip('='))
170 return; // parsed a valueless chunk-ext
171
172 ParseBws(tok);
173
174 // optimization: the only currently supported extension needs last-chunk
175 if (!theChunkSize && customExtensionValueParser)
176 customExtensionValueParser->parse(tok, extName);
177 else
178 ChunkExtensionValueParser::Ignore(tok, extName);
179 }
180
181 bool
182 Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
183 {
184 if (theLeftBodySize > 0) {
185 buf_ = tok.remaining(); // sync buffers before buf_ use
186
187 // TODO fix type mismatches and casting for these
188 const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
189 const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
190
191 theOut->append(buf_.rawContent(), safeSize);
192 buf_.consume(safeSize);
193 theLeftBodySize -= safeSize;
194
195 tok.reset(buf_); // sync buffers after consume()
196 }
197
198 if (theLeftBodySize == 0)
199 return parseChunkEnd(tok);
200 else
201 Must(needsMoreData() || needsMoreSpace());
202
203 return true;
204 }
205
206 bool
207 Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
208 {
209 Must(theLeftBodySize == 0); // Should(), really
210
211 try {
212 skipLineTerminator(tok);
213 buf_ = tok.remaining(); // parse checkpoint
214 theChunkSize = 0; // done with the current chunk
215 parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
216 return true;
217 }
218 catch (const InsufficientInput &) {
219 return false;
220 }
221 // other exceptions bubble up to kill message parsing
222 }
223