]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Merge from trunk rev.13907
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "profiler/Profiler.h"
16 #include "SquidConfig.h"
17
18 Http::One::RequestParser::RequestParser() :
19 Parser(),
20 request_parse_status(Http::scNone),
21 firstLineGarbage_(0)
22 {}
23
24 Http1::Parser::size_type
25 Http::One::RequestParser::firstLineSize() const
26 {
27 // RFC 7230 section 2.6
28 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
29 return method_.image().length() + uri_.length() + 12;
30 }
31
32 /**
33 * Attempt to parse the first line of a new request message.
34 *
35 * Governed by RFC 7230 section 3.5
36 * "
37 * In the interest of robustness, a server that is expecting to receive
38 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
39 * received prior to the request-line.
40 * "
41 *
42 * Parsing state is stored between calls to avoid repeating buffer scans.
43 * If garbage is found the parsing offset is incremented.
44 */
45 void
46 Http::One::RequestParser::skipGarbageLines()
47 {
48 if (Config.onoff.relaxed_header_parser) {
49 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
50 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
51 "CRLF bytes received ahead of request-line. " <<
52 "Ignored due to relaxed_header_parser.");
53 // Be tolerant of prefix empty lines
54 // ie any series of either \n or \r\n with no other characters and no repeated \r
55 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
56 buf_.consume(1);
57 }
58 }
59 }
60
61 /**
62 * Attempt to parse the method field out of an HTTP message request-line.
63 *
64 * Governed by:
65 * RFC 1945 section 5.1
66 * RFC 7230 section 2.6, 3.1 and 3.5
67 *
68 * Parsing state is stored between calls. The current implementation uses
69 * checkpoints after each successful request-line field.
70 * The return value tells you whether the parsing is completed or not.
71 *
72 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
73 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
74 * \retval 0 more data is needed to complete the parse
75 */
76 int
77 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
78 {
79 // scan for up to 16 valid method characters.
80 static const size_t maxMethodLength = 16; // TODO: make this configurable?
81
82 // method field is a sequence of TCHAR.
83 SBuf methodFound;
84 if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
85
86 method_ = HttpRequestMethod(methodFound);
87 buf_ = tok.remaining(); // incremental parse checkpoint
88 return 1;
89
90 } else if (tok.atEnd()) {
91 debugs(74, 5, "Parser needs more data to find method");
92 return 0;
93
94 } // else error(s)
95
96 // non-delimiter found after accepted method bytes means ...
97 if (methodFound.length() == maxMethodLength) {
98 // method longer than acceptible.
99 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
100 request_parse_status = Http::scNotImplemented;
101 debugs(33, 5, "invalid request-line. method too long");
102 } else {
103 // invalid character in the URL
104 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
105 request_parse_status = Http::scBadRequest;
106 debugs(33, 5, "invalid request-line. missing method delimiter");
107 }
108 return -1;
109 }
110
111 int
112 Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
113 {
114 // URI field is a sequence of ... what? segments all have different valid charset
115 // go with non-whitespace non-binary characters for now
116 static CharacterSet UriChars("URI-Chars","");
117 if (!UriChars['a']) { // if it needs initializing...
118 /* RFC 3986 section 2:
119 * "
120 * A URI is composed from a limited set of characters consisting of
121 * digits, letters, and a few graphic symbols.
122 * "
123 */
124 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
125 UriChars.add('%');
126 UriChars += CharacterSet::HEXDIG;
127 // RFC 3986 section 2.2 - reserved characters
128 UriChars += CharacterSet("gen-delims", ":/?#[]@");
129 UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
130 // RFC 3986 section 2.3 - unreserved characters
131 UriChars += CharacterSet::ALPHA;
132 UriChars += CharacterSet::DIGIT;
133 UriChars += CharacterSet("unreserved", "-._~");
134 }
135
136 /* Arbitrary 64KB URI upper length limit.
137 *
138 * Not quite as arbitrary as it seems though. Old SquidString objects
139 * cannot store strings larger than 64KB, so we must limit until they
140 * have all been replaced with SBuf.
141 *
142 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
143 * at least 8000 octets for the whole line, including method and version.
144 */
145 const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
146 static_cast<size_t>((64*1024)-1));
147
148 SBuf uriFound;
149 if (!tok.prefix(uriFound, UriChars, maxUriLength)) {
150 // NP: prefix() returns true if it finds ANY valid chars
151 debugs(33, 5, "invalid request-line. missing URL");
152 request_parse_status = Http::scBadRequest;
153 return -1;
154 }
155
156 /* NOTE: we do have to check for token/state in this order.
157 * Because RFC 7230 tolerant parse accepts CR as a whitespace
158 * delimiter in HTTP/1.1 and we may not yet have the LF final
159 * terminator character on HTTP/0.9 simple-request lines.
160 */
161
162 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
163 if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
164 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
165 msgProtocol_ = Http::ProtocolVersion(0,9);
166 uri_ = uriFound;
167 request_parse_status = Http::scOkay;
168 buf_ = tok.remaining(); // incremental parse checkpoint
169 return 1;
170 } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
171 debugs(74, 5, "Parser needs more data to find URI");
172 return 0;
173 }
174
175 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
176 if (tok.skipOne(WspDelim)) {
177 uri_ = uriFound;
178 buf_ = tok.remaining(); // incremental parse checkpoint
179 return 1;
180
181 } else if (tok.atEnd()) {
182 debugs(74, 5, "Parser needs more data to find URI");
183 return 0;
184 }
185
186 // else errors...
187
188 if (uriFound.length() == maxUriLength) {
189 // URL longer than acceptible.
190 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
191 request_parse_status = Http::scUriTooLong;
192 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
193 } else {
194 // invalid non-delimiter character ended the URL
195 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
196 request_parse_status = Http::scBadRequest;
197 debugs(33, 5, "invalid request-line. missing URI delimiter");
198 }
199 return -1;
200 }
201
202 int
203 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer &tok)
204 {
205 // partial match of HTTP/1 magic prefix
206 if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
207 debugs(74, 5, "Parser needs more data to find version");
208 return 0;
209 }
210
211 if (!tok.skip(Http1magic)) {
212 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
213 request_parse_status = Http::scHttpVersionNotSupported;
214 return -1;
215 }
216
217 if (tok.atEnd()) {
218 debugs(74, 5, "Parser needs more data to find version");
219 return 0;
220 }
221
222 // get the version minor DIGIT
223 SBuf digit;
224 if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
225
226 // found version fully AND terminator
227 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
228 request_parse_status = Http::scOkay;
229 buf_ = tok.remaining(); // incremental parse checkpoint
230 return 1;
231
232 } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
233 debugs(74, 5, "Parser needs more data to find version");
234 return 0;
235
236 } // else error ...
237
238 // non-DIGIT. invalid version number.
239 request_parse_status = Http::scHttpVersionNotSupported;
240 debugs(33, 5, "invalid request-line. garabge before line terminator");
241 return -1;
242 }
243
244 /**
245 * Attempt to parse the first line of a new request message.
246 *
247 * Governed by:
248 * RFC 1945 section 5.1
249 * RFC 7230 section 2.6, 3.1 and 3.5
250 *
251 * Parsing state is stored between calls. The current implementation uses
252 * checkpoints after each successful request-line field.
253 * The return value tells you whether the parsing is completed or not.
254 *
255 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
256 * \retval 1 successful parse. member fields contain the request-line items
257 * \retval 0 more data is needed to complete the parse
258 */
259 int
260 Http::One::RequestParser::parseRequestFirstLine()
261 {
262 ::Parser::Tokenizer tok(buf_);
263
264 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
265 debugs(74, DBG_DATA, buf_);
266
267 CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
268
269 if (Config.onoff.relaxed_header_parser) {
270 // RFC 7230 section 3.5
271 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
272 // as whitespace between request-line fields
273 WspDelim += CharacterSet::HTAB
274 + CharacterSet("VT,FF","\x0B\x0C")
275 + CharacterSet::CR;
276 }
277
278 // only search for method if we have not yet found one
279 if (method_ == Http::METHOD_NONE) {
280 const int res = parseMethodField(tok, WspDelim);
281 if (res < 1)
282 return res;
283 // else keep going...
284 }
285
286 // tolerant parser allows multiple whitespace characters between request-line fields
287 if (Config.onoff.relaxed_header_parser) {
288 const size_t garbage = tok.skipAll(WspDelim);
289 if (garbage > 0) {
290 firstLineGarbage_ += garbage;
291 buf_ = tok.remaining(); // re-checkpoint after garbage
292 }
293 }
294 if (tok.atEnd()) {
295 debugs(74, 5, "Parser needs more data");
296 return 0;
297 }
298
299 // only search for request-target (URL) if we have not yet found one
300 if (uri_.isEmpty()) {
301 const int res = parseUriField(tok, WspDelim);
302 if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
303 return res;
304 // else keep going...
305 }
306
307 // tolerant parser allows multiple whitespace characters between request-line fields
308 if (Config.onoff.relaxed_header_parser) {
309 const size_t garbage = tok.skipAll(WspDelim);
310 if (garbage > 0) {
311 firstLineGarbage_ += garbage;
312 buf_ = tok.remaining(); // re-checkpoint after garbage
313 }
314 }
315 if (tok.atEnd()) {
316 debugs(74, 5, "Parser needs more data");
317 return 0;
318 }
319
320 // HTTP/1 version suffix (protocol magic) followed by CR*LF
321 if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
322 return parseHttpVersionField(tok);
323 }
324
325 // If we got here this method has been called too many times
326 request_parse_status = Http::scInternalServerError;
327 debugs(33, 5, "ERROR: Parser already processed request-line");
328 return -1;
329 }
330
331 bool
332 Http::One::RequestParser::parse(const SBuf &aBuf)
333 {
334 buf_ = aBuf;
335 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
336
337 // stage 1: locate the request-line
338 if (parsingStage_ == HTTP_PARSE_NONE) {
339 skipGarbageLines();
340
341 // if we hit something before EOS treat it as a message
342 if (!buf_.isEmpty())
343 parsingStage_ = HTTP_PARSE_FIRST;
344 else
345 return false;
346 }
347
348 // stage 2: parse the request-line
349 if (parsingStage_ == HTTP_PARSE_FIRST) {
350 PROF_start(HttpParserParseReqLine);
351 const int retcode = parseRequestFirstLine();
352
353 // first-line (or a look-alike) found successfully.
354 if (retcode > 0) {
355 parsingStage_ = HTTP_PARSE_MIME;
356 }
357
358 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
359 debugs(74, 5, "request-line: method: " << method_);
360 debugs(74, 5, "request-line: url: " << uri_);
361 debugs(74, 5, "request-line: proto: " << msgProtocol_);
362 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
363 PROF_stop(HttpParserParseReqLine);
364
365 // syntax errors already
366 if (retcode < 0) {
367 parsingStage_ = HTTP_PARSE_DONE;
368 return false;
369 }
370 }
371
372 // stage 3: locate the mime header block
373 if (parsingStage_ == HTTP_PARSE_MIME) {
374 // HTTP/1.x request-line is valid and parsing completed.
375 if (msgProtocol_.major == 1) {
376 /* NOTE: HTTP/0.9 requests do not have a mime header block.
377 * So the rest of the code will need to deal with '0'-byte headers
378 * (ie, none, so don't try parsing em)
379 */
380 int64_t mimeHeaderBytes = 0;
381 // XXX: c_str() reallocates. performance regression.
382 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
383 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
384 debugs(33, 5, "Too large request");
385 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
386 parsingStage_ = HTTP_PARSE_DONE;
387 } else
388 debugs(33, 5, "Incomplete request, waiting for end of headers");
389 return false;
390 }
391 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
392 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
393
394 } else
395 debugs(33, 3, "Missing HTTP/1.x identifier");
396
397 // NP: we do not do any further stages here yet so go straight to DONE
398 parsingStage_ = HTTP_PARSE_DONE;
399
400 // Squid could handle these headers, but admin does not want to
401 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
402 debugs(33, 5, "Too large request");
403 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
404 return false;
405 }
406 }
407
408 return !needsMoreData();
409 }
410