]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Update RFC 7230 compliance
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "http/one/RequestParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "mime_header.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
8
9 void
10 Http::One::RequestParser::clear()
11 {
12 Http1::Parser::clear();
13
14 request_parse_status = Http::scNone;
15 req.start = req.end = -1;
16 req.m_start = req.m_end = -1;
17 req.u_start = req.u_end = -1;
18 req.v_start = req.v_end = -1;
19 method_ = HttpRequestMethod();
20 }
21
22 /**
23 * Attempt to parse the first line of a new request message.
24 *
25 * Governed by RFC 7230 section 3.5
26 * "
27 * In the interest of robustness, a server that is expecting to receive
28 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
29 * received prior to the request-line.
30 * "
31 *
32 * Parsing state is stored between calls to avoid repeating buffer scans.
33 * If garbage is found the parsing offset is incremented.
34 */
35 void
36 Http::One::RequestParser::skipGarbageLines()
37 {
38 if (Config.onoff.relaxed_header_parser) {
39 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
40 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
41 "CRLF bytes received ahead of request-line. " <<
42 "Ignored due to relaxed_header_parser.");
43 // Be tolerant of prefix empty lines
44 // ie any series of either \n or \r\n with no other characters and no repeated \r
45 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
46 buf_.consume(1);
47 }
48 }
49
50 /* XXX: this is a Squid-specific tolerance
51 * it appears never to have been relevant outside out unit-tests
52 * because the ConnStateData parser loop starts with consumeWhitespace()
53 * which absorbs any SP HTAB VTAB CR LF characters.
54 * But unit-tests called the HttpParser method directly without that pruning.
55 */
56 #if USE_HTTP_VIOLATIONS
57 if (Config.onoff.relaxed_header_parser) {
58 if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
59 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
60 "Whitespace bytes received ahead of method. " <<
61 "Ignored due to relaxed_header_parser.");
62 // Be tolerant of prefix spaces (other bytes are valid method values)
63 while (!buf_.isEmpty() && buf_[0] == ' ') {
64 buf_.consume(1);
65 }
66 }
67 #endif
68 }
69
70 /**
71 * Attempt to parse the first line of a new request message.
72 *
73 * Governed by:
74 * RFC 1945 section 5.1
75 * RFC 7230 section 3.1 and 3.5
76 *
77 * Parsing state is stored between calls. However the current implementation
78 * begins parsing from scratch on every call.
79 * The return value tells you whether the parsing state fields are valid or not.
80 *
81 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
82 * \retval 1 successful parse. member fields contain the request-line items
83 * \retval 0 more data is needed to complete the parse
84 */
85 int
86 Http::One::RequestParser::parseRequestFirstLine()
87 {
88 int second_word = -1; // track the suspected URI start
89 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
90 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
91
92 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
93 debugs(74, DBG_DATA, buf_);
94
95 // Single-pass parse: (provided we have the whole line anyways)
96
97 req.start = 0;
98 req.end = -1;
99 for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
100 // track first and last whitespace (SP only)
101 if (buf_[i] == ' ') {
102 last_whitespace = i;
103 if (first_whitespace < req.start)
104 first_whitespace = i;
105 }
106
107 // track next non-SP/non-HT byte after first_whitespace
108 if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
109 second_word = i;
110 }
111
112 // locate line terminator
113 if (buf_[i] == '\n') {
114 req.end = i;
115 line_end = i - 1;
116 break;
117 }
118 if (i < buf_.length() - 1 && buf_[i] == '\r') {
119 if (Config.onoff.relaxed_header_parser) {
120 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
121 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
122 "Series of carriage-return bytes received prior to line terminator. " <<
123 "Ignored due to relaxed_header_parser.");
124
125 // Be tolerant of invalid multiple \r prior to terminal \n
126 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
127 line_end = i - 1;
128 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
129 ++i;
130
131 if (buf_[i + 1] == '\n') {
132 req.end = i + 1;
133 break;
134 }
135 } else {
136 if (buf_[i + 1] == '\n') {
137 req.end = i + 1;
138 line_end = i - 1;
139 break;
140 }
141 }
142
143 // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
144 // However it does explicitly state an exact syntax which omits un-encoded CR
145 // and defines 400 (Bad Request) as the required action when
146 // handed an invalid request-line.
147 request_parse_status = Http::scBadRequest;
148 return -1;
149 }
150 }
151
152 if (req.end == -1) {
153 // DoS protection against long first-line
154 if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
155 debugs(33, 5, "Too large request-line");
156 // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
157 request_parse_status = Http::scUriTooLong;
158 return -1;
159 }
160
161 debugs(74, 5, "Parser: retval 0: from " << req.start <<
162 "->" << req.end << ": needs more data to complete first line.");
163 return 0;
164 }
165
166 // NP: we have now seen EOL, more-data (0) cannot occur.
167 // From here on any failure is -1, success is 1
168
169 // Input Validation:
170
171 // DoS protection against long first-line
172 if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
173 debugs(33, 5, "Too large request-line");
174 request_parse_status = Http::scUriTooLong;
175 return -1;
176 }
177
178 // Process what we now know about the line structure into field offsets
179 // generating HTTP status for any aborts as we go.
180
181 // First non-whitespace = beginning of method
182 if (req.start > line_end) {
183 request_parse_status = Http::scBadRequest;
184 return -1;
185 }
186 req.m_start = req.start;
187
188 // First whitespace = end of method
189 if (first_whitespace > line_end || first_whitespace < req.start) {
190 request_parse_status = Http::scBadRequest; // no method
191 return -1;
192 }
193 req.m_end = first_whitespace - 1;
194 if (req.m_end < req.m_start) {
195 request_parse_status = Http::scBadRequest; // missing URI?
196 return -1;
197 }
198
199 /* Set method_ */
200 const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
201 method_ = HttpRequestMethod(tmp);
202
203 // First non-whitespace after first SP = beginning of URL+Version
204 if (second_word > line_end || second_word < req.start) {
205 request_parse_status = Http::scBadRequest; // missing URI
206 return -1;
207 }
208 req.u_start = second_word;
209
210 // RFC 1945: SP and version following URI are optional, marking version 0.9
211 // we identify this by the last whitespace being earlier than URI start
212 if (last_whitespace < second_word && last_whitespace >= req.start) {
213 msgProtocol_ = Http::ProtocolVersion(0,9);
214 req.u_end = line_end;
215 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
216 request_parse_status = Http::scOkay; // HTTP/0.9
217 return 1;
218 } else {
219 // otherwise last whitespace is somewhere after end of URI.
220 req.u_end = last_whitespace;
221 // crop any trailing whitespace in the area we think of as URI
222 for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
223 }
224 if (req.u_end < req.u_start) {
225 request_parse_status = Http::scBadRequest; // missing URI
226 return -1;
227 }
228 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
229
230 // Last whitespace SP = before start of protocol/version
231 if (last_whitespace >= line_end) {
232 request_parse_status = Http::scBadRequest; // missing version
233 return -1;
234 }
235 req.v_start = last_whitespace + 1;
236 req.v_end = line_end;
237
238 /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
239 if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
240 // non-HTTP/1 protocols not supported / implemented.
241 request_parse_status = Http::scHttpVersionNotSupported;
242 return -1;
243 }
244 // NP: magic octets include the protocol name and major version DIGIT.
245 msgProtocol_.protocol = AnyP::PROTO_HTTP;
246 msgProtocol_.major = 1;
247
248 int i = req.v_start + Http1magic.length() -1;
249
250 // catch missing minor part
251 if (++i > line_end) {
252 request_parse_status = Http::scHttpVersionNotSupported;
253 return -1;
254 }
255 /* next should be one or more digits */
256 if (!isdigit(buf_[i])) {
257 request_parse_status = Http::scHttpVersionNotSupported;
258 return -1;
259 }
260 int min = 0;
261 for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
262 min = min * 10;
263 min = min + (buf_[i]) - '0';
264 }
265 // catch too-big values or trailing garbage
266 if (min >= 65536 || i < line_end) {
267 request_parse_status = Http::scHttpVersionNotSupported;
268 return -1;
269 }
270 msgProtocol_.minor = min;
271
272 /*
273 * Rightio - we have all the schtuff. Return true; we've got enough.
274 */
275 request_parse_status = Http::scOkay;
276 return 1;
277 }
278
279 bool
280 Http::One::RequestParser::parse(const SBuf &aBuf)
281 {
282 buf_ = aBuf;
283 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
284
285 // stage 1: locate the request-line
286 if (parsingStage_ == HTTP_PARSE_NONE) {
287 skipGarbageLines();
288
289 // if we hit something before EOS treat it as a message
290 if (!buf_.isEmpty())
291 parsingStage_ = HTTP_PARSE_FIRST;
292 else
293 return false;
294 }
295
296 // stage 2: parse the request-line
297 if (parsingStage_ == HTTP_PARSE_FIRST) {
298 PROF_start(HttpParserParseReqLine);
299 const int retcode = parseRequestFirstLine();
300
301 // first-line (or a look-alike) found successfully.
302 if (retcode > 0) {
303 buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
304 parsingStage_ = HTTP_PARSE_MIME;
305 }
306
307 debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
308 " line={" << aBuf.length() << ", data='" << aBuf << "'}");
309 debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
310 debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
311 debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
312 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
313 PROF_stop(HttpParserParseReqLine);
314
315 // syntax errors already
316 if (retcode < 0) {
317 parsingStage_ = HTTP_PARSE_DONE;
318 return false;
319 }
320 }
321
322 // stage 3: locate the mime header block
323 if (parsingStage_ == HTTP_PARSE_MIME) {
324 // HTTP/1.x request-line is valid and parsing completed.
325 if (msgProtocol_.major == 1) {
326 /* NOTE: HTTP/0.9 requests do not have a mime header block.
327 * So the rest of the code will need to deal with '0'-byte headers
328 * (ie, none, so don't try parsing em)
329 */
330 int64_t mimeHeaderBytes = 0;
331 // XXX: c_str() reallocates. performance regression.
332 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
333 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
334 debugs(33, 5, "Too large request");
335 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
336 parsingStage_ = HTTP_PARSE_DONE;
337 } else
338 debugs(33, 5, "Incomplete request, waiting for end of headers");
339 return false;
340 }
341 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
342 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
343
344 } else
345 debugs(33, 3, "Missing HTTP/1.x identifier");
346
347 // NP: we do not do any further stages here yet so go straight to DONE
348 parsingStage_ = HTTP_PARSE_DONE;
349
350 // Squid could handle these headers, but admin does not want to
351 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
352 debugs(33, 5, "Too large request");
353 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
354 return false;
355 }
356 }
357
358 return !needsMoreData();
359 }