]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Merged from trunk
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "profiler/Profiler.h"
16 #include "SquidConfig.h"
17
18 Http::One::RequestParser::RequestParser() :
19 Parser(),
20 request_parse_status(Http::scNone),
21 firstLineGarbage_(0)
22 {}
23
24 Http1::Parser::size_type
25 Http::One::RequestParser::firstLineSize() const
26 {
27 // RFC 7230 section 2.6
28 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
29 return method_.image().length() + uri_.length() + 12;
30 }
31
32 /**
33 * Attempt to parse the first line of a new request message.
34 *
35 * Governed by RFC 7230 section 3.5
36 * "
37 * In the interest of robustness, a server that is expecting to receive
38 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
39 * received prior to the request-line.
40 * "
41 *
42 * Parsing state is stored between calls to avoid repeating buffer scans.
43 * If garbage is found the parsing offset is incremented.
44 */
45 void
46 Http::One::RequestParser::skipGarbageLines()
47 {
48 if (Config.onoff.relaxed_header_parser) {
49 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
50 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
51 "CRLF bytes received ahead of request-line. " <<
52 "Ignored due to relaxed_header_parser.");
53 // Be tolerant of prefix empty lines
54 // ie any series of either \n or \r\n with no other characters and no repeated \r
55 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
56 buf_.consume(1);
57 }
58 }
59 }
60
61 /**
62 * Attempt to parse the method field out of an HTTP message request-line.
63 *
64 * Governed by:
65 * RFC 1945 section 5.1
66 * RFC 7230 section 2.6, 3.1 and 3.5
67 *
68 * Parsing state is stored between calls. The current implementation uses
69 * checkpoints after each successful request-line field.
70 * The return value tells you whether the parsing is completed or not.
71 *
72 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
73 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
74 * \retval 0 more data is needed to complete the parse
75 */
76 int
77 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
78 {
79 // scan for up to 16 valid method characters.
80 static const size_t maxMethodLength = 16; // TODO: make this configurable?
81
82 // method field is a sequence of TCHAR.
83 SBuf methodFound;
84 if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
85
86 method_ = HttpRequestMethod(methodFound);
87 buf_ = tok.remaining(); // incremental parse checkpoint
88 return 1;
89
90 } else if (tok.atEnd()) {
91 debugs(74, 5, "Parser needs more data to find method");
92 return 0;
93
94 } // else error(s)
95
96 // non-delimiter found after accepted method bytes means ...
97 if (methodFound.length() == maxMethodLength) {
98 // method longer than acceptible.
99 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
100 request_parse_status = Http::scNotImplemented;
101 debugs(33, 5, "invalid request-line. method too long");
102 } else {
103 // invalid character in the URL
104 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
105 request_parse_status = Http::scBadRequest;
106 debugs(33, 5, "invalid request-line. missing method delimiter");
107 }
108 return -1;
109 }
110
111 static CharacterSet
112 uriValidCharacters()
113 {
114 CharacterSet UriChars("URI-Chars","");
115
116 /* RFC 3986 section 2:
117 * "
118 * A URI is composed from a limited set of characters consisting of
119 * digits, letters, and a few graphic symbols.
120 * "
121 */
122 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
123 UriChars.add('%');
124 UriChars += CharacterSet::HEXDIG;
125 // RFC 3986 section 2.2 - reserved characters
126 UriChars += CharacterSet("gen-delims", ":/?#[]@");
127 UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
128 // RFC 3986 section 2.3 - unreserved characters
129 UriChars += CharacterSet::ALPHA;
130 UriChars += CharacterSet::DIGIT;
131 UriChars += CharacterSet("unreserved", "-._~");
132
133 return UriChars;
134 }
135
136 int
137 Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok)
138 {
139 // URI field is a sequence of ... what? segments all have different valid charset
140 // go with non-whitespace non-binary characters for now
141 static CharacterSet UriChars = uriValidCharacters();
142
143 /* Arbitrary 64KB URI upper length limit.
144 *
145 * Not quite as arbitrary as it seems though. Old SquidString objects
146 * cannot store strings larger than 64KB, so we must limit until they
147 * have all been replaced with SBuf.
148 *
149 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
150 * at least 8000 octets for the whole line, including method and version.
151 */
152 const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
153 static_cast<size_t>((64*1024)-1));
154
155 SBuf uriFound;
156
157 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
158 if (tok.prefix(uriFound, UriChars, maxUriLength) && tok.skipOne(CharacterSet::SP)) {
159 uri_ = uriFound;
160 buf_ = tok.remaining(); // incremental parse checkpoint
161 return 1;
162
163 // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
164 } else if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
165 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
166 msgProtocol_ = Http::ProtocolVersion(0,9);
167 uri_ = uriFound; // found by successful prefix() call earlier.
168 request_parse_status = Http::scOkay;
169 buf_ = tok.remaining(); // incremental parse checkpoint
170 return 1;
171
172 } else if (tok.atEnd()) {
173 debugs(74, 5, "Parser needs more data to find URI");
174 return 0;
175 }
176
177 // else errors...
178
179 if (uriFound.length() == maxUriLength) {
180 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
181 request_parse_status = Http::scUriTooLong;
182 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
183 } else {
184 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
185 request_parse_status = Http::scBadRequest;
186 debugs(33, 5, "invalid request-line. missing URI delimiter");
187 }
188 return -1;
189 }
190
191 int
192 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer &tok)
193 {
194 // partial match of HTTP/1 magic prefix
195 if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
196 debugs(74, 5, "Parser needs more data to find version");
197 return 0;
198 }
199
200 if (!tok.skip(Http1magic)) {
201 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
202 request_parse_status = Http::scHttpVersionNotSupported;
203 return -1;
204 }
205
206 if (tok.atEnd()) {
207 debugs(74, 5, "Parser needs more data to find version");
208 return 0;
209 }
210
211 // get the version minor DIGIT
212 SBuf digit;
213 if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
214
215 // found version fully AND terminator
216 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
217 request_parse_status = Http::scOkay;
218 buf_ = tok.remaining(); // incremental parse checkpoint
219 return 1;
220
221 } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
222 debugs(74, 5, "Parser needs more data to find version");
223 return 0;
224
225 } // else error ...
226
227 // non-DIGIT. invalid version number.
228 request_parse_status = Http::scHttpVersionNotSupported;
229 debugs(33, 5, "invalid request-line. garbage before line terminator");
230 return -1;
231 }
232
233 /**
234 * Attempt to parse the first line of a new request message.
235 *
236 * Governed by:
237 * RFC 1945 section 5.1
238 * RFC 7230 section 2.6, 3.1 and 3.5
239 *
240 * Parsing state is stored between calls. The current implementation uses
241 * checkpoints after each successful request-line field.
242 * The return value tells you whether the parsing is completed or not.
243 *
244 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
245 * \retval 1 successful parse. member fields contain the request-line items
246 * \retval 0 more data is needed to complete the parse
247 */
248 int
249 Http::One::RequestParser::parseRequestFirstLine()
250 {
251 ::Parser::Tokenizer tok(buf_);
252
253 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
254 debugs(74, DBG_DATA, buf_);
255
256 // NP: would be static, except it need to change with reconfigure
257 CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
258
259 if (Config.onoff.relaxed_header_parser) {
260 // RFC 7230 section 3.5
261 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
262 // as whitespace between request-line fields
263 WspDelim += CharacterSet::HTAB
264 + CharacterSet("VT,FF","\x0B\x0C")
265 + CharacterSet::CR;
266 }
267
268 // only search for method if we have not yet found one
269 if (method_ == Http::METHOD_NONE) {
270 const int res = parseMethodField(tok, WspDelim);
271 if (res < 1)
272 return res;
273 // else keep going...
274 }
275
276 // tolerant parser allows multiple whitespace characters between request-line fields
277 if (Config.onoff.relaxed_header_parser) {
278 const size_t garbage = tok.skipAll(WspDelim);
279 if (garbage > 0) {
280 firstLineGarbage_ += garbage;
281 buf_ = tok.remaining(); // re-checkpoint after garbage
282 }
283 }
284 if (tok.atEnd()) {
285 debugs(74, 5, "Parser needs more data");
286 return 0;
287 }
288
289 // from here on, we have two possible parse paths: whitespace tolerant, and strict
290 if (Config.onoff.relaxed_header_parser) {
291 // whitespace tolerant
292
293 // NOTES:
294 // * this would be static, except WspDelim changes with reconfigure
295 // * HTTP-version charset is included by uriValidCharacters()
296 // * terminal CR is included by WspDelim here in relaxed parsing
297 CharacterSet LfDelim = uriValidCharacters() + WspDelim;
298
299 // seek the LF character, then tokenize the line in reverse
300 SBuf line;
301 if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
302 ::Parser::Tokenizer rTok(line);
303 SBuf nil;
304 (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
305 SBuf digit;
306 if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
307 uri_ = rTok.remaining();
308 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
309 if (uri_.isEmpty()) {
310 debugs(33, 5, "invalid request-line. missing URL");
311 request_parse_status = Http::scBadRequest;
312 return -1;
313 }
314
315 request_parse_status = Http::scOkay;
316 buf_ = tok.remaining(); // incremental parse checkpoint
317 return 1;
318
319 } else if (method_ == Http::METHOD_GET) {
320 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
321 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
322 msgProtocol_ = Http::ProtocolVersion(0,9);
323 static const SBuf cr("\r",1);
324 uri_ = line.trim(cr,false,true);
325 request_parse_status = Http::scOkay;
326 buf_ = tok.remaining(); // incremental parse checkpoint
327 return 1;
328 }
329
330 debugs(33, 5, "invalid request-line. not HTTP");
331 request_parse_status = Http::scBadRequest;
332 return -1;
333 }
334
335 debugs(74, 5, "Parser needs more data");
336 return 0;
337 }
338 // else strict non-whitespace tolerant parse
339
340 // only search for request-target (URL) if we have not yet found one
341 if (uri_.isEmpty()) {
342 const int res = parseUriField(tok);
343 if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
344 return res;
345 // else keep going...
346 }
347
348 if (tok.atEnd()) {
349 debugs(74, 5, "Parser needs more data");
350 return 0;
351 }
352
353 // HTTP/1 version suffix (protocol magic) followed by CR*LF
354 if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
355 return parseHttpVersionField(tok);
356 }
357
358 // If we got here this method has been called too many times
359 request_parse_status = Http::scInternalServerError;
360 debugs(33, 5, "ERROR: Parser already processed request-line");
361 return -1;
362 }
363
364 bool
365 Http::One::RequestParser::parse(const SBuf &aBuf)
366 {
367 buf_ = aBuf;
368 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
369
370 // stage 1: locate the request-line
371 if (parsingStage_ == HTTP_PARSE_NONE) {
372 skipGarbageLines();
373
374 // if we hit something before EOS treat it as a message
375 if (!buf_.isEmpty())
376 parsingStage_ = HTTP_PARSE_FIRST;
377 else
378 return false;
379 }
380
381 // stage 2: parse the request-line
382 if (parsingStage_ == HTTP_PARSE_FIRST) {
383 PROF_start(HttpParserParseReqLine);
384 const int retcode = parseRequestFirstLine();
385
386 // first-line (or a look-alike) found successfully.
387 if (retcode > 0) {
388 parsingStage_ = HTTP_PARSE_MIME;
389 }
390
391 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
392 debugs(74, 5, "request-line: method: " << method_);
393 debugs(74, 5, "request-line: url: " << uri_);
394 debugs(74, 5, "request-line: proto: " << msgProtocol_);
395 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
396 PROF_stop(HttpParserParseReqLine);
397
398 // syntax errors already
399 if (retcode < 0) {
400 parsingStage_ = HTTP_PARSE_DONE;
401 return false;
402 }
403 }
404
405 // stage 3: locate the mime header block
406 if (parsingStage_ == HTTP_PARSE_MIME) {
407 // HTTP/1.x request-line is valid and parsing completed.
408 if (msgProtocol_.major == 1) {
409 /* NOTE: HTTP/0.9 requests do not have a mime header block.
410 * So the rest of the code will need to deal with '0'-byte headers
411 * (ie, none, so don't try parsing em)
412 */
413 int64_t mimeHeaderBytes = 0;
414 // XXX: c_str() reallocates. performance regression.
415 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
416 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
417 debugs(33, 5, "Too large request");
418 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
419 parsingStage_ = HTTP_PARSE_DONE;
420 } else
421 debugs(33, 5, "Incomplete request, waiting for end of headers");
422 return false;
423 }
424 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
425 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
426
427 } else
428 debugs(33, 3, "Missing HTTP/1.x identifier");
429
430 // NP: we do not do any further stages here yet so go straight to DONE
431 parsingStage_ = HTTP_PARSE_DONE;
432
433 // Squid could handle these headers, but admin does not want to
434 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
435 debugs(33, 5, "Too large request");
436 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
437 return false;
438 }
439 }
440
441 return !needsMoreData();
442 }
443