2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "profiler/Profiler.h"
16 #include "SquidConfig.h"
18 Http::One::RequestParser::RequestParser() :
20 request_parse_status(Http::scNone
),
24 Http1::Parser::size_type
25 Http::One::RequestParser::firstLineSize() const
27 // RFC 7230 section 2.6
28 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
29 return method_
.image().length() + uri_
.length() + 12;
33 * Attempt to parse the first line of a new request message.
35 * Governed by RFC 7230 section 3.5
37 * In the interest of robustness, a server that is expecting to receive
38 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
39 * received prior to the request-line.
42 * Parsing state is stored between calls to avoid repeating buffer scans.
43 * If garbage is found the parsing offset is incremented.
46 Http::One::RequestParser::skipGarbageLines()
48 if (Config
.onoff
.relaxed_header_parser
) {
49 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
50 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
51 "CRLF bytes received ahead of request-line. " <<
52 "Ignored due to relaxed_header_parser.");
53 // Be tolerant of prefix empty lines
54 // ie any series of either \n or \r\n with no other characters and no repeated \r
55 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
62 * Attempt to parse the method field out of an HTTP message request-line.
65 * RFC 1945 section 5.1
66 * RFC 7230 section 2.6, 3.1 and 3.5
68 * Parsing state is stored between calls. The current implementation uses
69 * checkpoints after each successful request-line field.
70 * The return value tells you whether the parsing is completed or not.
72 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
73 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
74 * \retval 0 more data is needed to complete the parse
77 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
79 // scan for up to 16 valid method characters.
80 static const size_t maxMethodLength
= 16; // TODO: make this configurable?
82 // method field is a sequence of TCHAR.
84 if (tok
.prefix(methodFound
, CharacterSet::TCHAR
, maxMethodLength
) && tok
.skipOne(WspDelim
)) {
86 method_
= HttpRequestMethod(methodFound
);
87 buf_
= tok
.remaining(); // incremental parse checkpoint
90 } else if (tok
.atEnd()) {
91 debugs(74, 5, "Parser needs more data to find method");
96 // non-delimiter found after accepted method bytes means ...
97 if (methodFound
.length() == maxMethodLength
) {
98 // method longer than acceptible.
99 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
100 request_parse_status
= Http::scNotImplemented
;
101 debugs(33, 5, "invalid request-line. method too long");
103 // invalid character in the URL
104 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
105 request_parse_status
= Http::scBadRequest
;
106 debugs(33, 5, "invalid request-line. missing method delimiter");
114 CharacterSet
UriChars("URI-Chars","");
116 /* RFC 3986 section 2:
118 * A URI is composed from a limited set of characters consisting of
119 * digits, letters, and a few graphic symbols.
122 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
124 UriChars
+= CharacterSet::HEXDIG
;
125 // RFC 3986 section 2.2 - reserved characters
126 UriChars
+= CharacterSet("gen-delims", ":/?#[]@");
127 UriChars
+= CharacterSet("sub-delims", "!$&'()*+,;=");
128 // RFC 3986 section 2.3 - unreserved characters
129 UriChars
+= CharacterSet::ALPHA
;
130 UriChars
+= CharacterSet::DIGIT
;
131 UriChars
+= CharacterSet("unreserved", "-._~");
137 Http::One::RequestParser::parseUriField(::Parser::Tokenizer
&tok
)
139 // URI field is a sequence of ... what? segments all have different valid charset
140 // go with non-whitespace non-binary characters for now
141 static CharacterSet UriChars
= uriValidCharacters();
143 /* Arbitrary 64KB URI upper length limit.
145 * Not quite as arbitrary as it seems though. Old SquidString objects
146 * cannot store strings larger than 64KB, so we must limit until they
147 * have all been replaced with SBuf.
149 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
150 * at least 8000 octets for the whole line, including method and version.
152 const size_t maxUriLength
= min(static_cast<size_t>(Config
.maxRequestHeaderSize
) - firstLineSize(),
153 static_cast<size_t>((64*1024)-1));
157 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
158 if (tok
.prefix(uriFound
, UriChars
, maxUriLength
) && tok
.skipOne(CharacterSet::SP
)) {
160 buf_
= tok
.remaining(); // incremental parse checkpoint
163 // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
164 } else if (method_
== Http::METHOD_GET
&& skipLineTerminator(tok
)) {
165 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
166 msgProtocol_
= Http::ProtocolVersion(0,9);
167 uri_
= uriFound
; // found by successful prefix() call earlier.
168 request_parse_status
= Http::scOkay
;
169 buf_
= tok
.remaining(); // incremental parse checkpoint
172 } else if (tok
.atEnd()) {
173 debugs(74, 5, "Parser needs more data to find URI");
179 if (uriFound
.length() == maxUriLength
) {
180 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
181 request_parse_status
= Http::scUriTooLong
;
182 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength
<< " bytes");
184 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
185 request_parse_status
= Http::scBadRequest
;
186 debugs(33, 5, "invalid request-line. missing URI delimiter");
192 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer
&tok
)
194 // partial match of HTTP/1 magic prefix
195 if (tok
.remaining().length() < Http1magic
.length() && Http1magic
.startsWith(tok
.remaining())) {
196 debugs(74, 5, "Parser needs more data to find version");
200 if (!tok
.skip(Http1magic
)) {
201 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
202 request_parse_status
= Http::scHttpVersionNotSupported
;
207 debugs(74, 5, "Parser needs more data to find version");
211 // get the version minor DIGIT
213 if (tok
.prefix(digit
, CharacterSet::DIGIT
, 1) && skipLineTerminator(tok
)) {
215 // found version fully AND terminator
216 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
217 request_parse_status
= Http::scOkay
;
218 buf_
= tok
.remaining(); // incremental parse checkpoint
221 } else if (tok
.atEnd() || (tok
.skip('\r') && tok
.atEnd())) {
222 debugs(74, 5, "Parser needs more data to find version");
227 // non-DIGIT. invalid version number.
228 request_parse_status
= Http::scHttpVersionNotSupported
;
229 debugs(33, 5, "invalid request-line. garbage before line terminator");
234 * Attempt to parse the first line of a new request message.
237 * RFC 1945 section 5.1
238 * RFC 7230 section 2.6, 3.1 and 3.5
240 * Parsing state is stored between calls. The current implementation uses
241 * checkpoints after each successful request-line field.
242 * The return value tells you whether the parsing is completed or not.
244 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
245 * \retval 1 successful parse. member fields contain the request-line items
246 * \retval 0 more data is needed to complete the parse
249 Http::One::RequestParser::parseRequestFirstLine()
251 ::Parser::Tokenizer
tok(buf_
);
253 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
254 debugs(74, DBG_DATA
, buf_
);
256 // NP: would be static, except it need to change with reconfigure
257 CharacterSet WspDelim
= CharacterSet::SP
; // strict parse only accepts SP
259 if (Config
.onoff
.relaxed_header_parser
) {
260 // RFC 7230 section 3.5
261 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
262 // as whitespace between request-line fields
263 WspDelim
+= CharacterSet::HTAB
264 + CharacterSet("VT,FF","\x0B\x0C")
268 // only search for method if we have not yet found one
269 if (method_
== Http::METHOD_NONE
) {
270 const int res
= parseMethodField(tok
, WspDelim
);
273 // else keep going...
276 // tolerant parser allows multiple whitespace characters between request-line fields
277 if (Config
.onoff
.relaxed_header_parser
) {
278 const size_t garbage
= tok
.skipAll(WspDelim
);
280 firstLineGarbage_
+= garbage
;
281 buf_
= tok
.remaining(); // re-checkpoint after garbage
285 debugs(74, 5, "Parser needs more data");
289 // from here on, we have two possible parse paths: whitespace tolerant, and strict
290 if (Config
.onoff
.relaxed_header_parser
) {
291 // whitespace tolerant
294 // * this would be static, except WspDelim changes with reconfigure
295 // * HTTP-version charset is included by uriValidCharacters()
296 // * terminal CR is included by WspDelim here in relaxed parsing
297 CharacterSet LfDelim
= uriValidCharacters() + WspDelim
;
299 // seek the LF character, then tokenize the line in reverse
301 if (tok
.prefix(line
, LfDelim
) && tok
.skip('\n')) {
302 ::Parser::Tokenizer
rTok(line
);
304 (void)rTok
.suffix(nil
,CharacterSet::CR
); // optional CR in terminator
306 if (rTok
.suffix(digit
,CharacterSet::DIGIT
) && rTok
.skipSuffix(Http1magic
) && rTok
.suffix(nil
,WspDelim
)) {
307 uri_
= rTok
.remaining();
308 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
309 if (uri_
.isEmpty()) {
310 debugs(33, 5, "invalid request-line. missing URL");
311 request_parse_status
= Http::scBadRequest
;
315 request_parse_status
= Http::scOkay
;
316 buf_
= tok
.remaining(); // incremental parse checkpoint
319 } else if (method_
== Http::METHOD_GET
) {
320 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
321 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
322 msgProtocol_
= Http::ProtocolVersion(0,9);
323 static const SBuf
cr("\r",1);
324 uri_
= line
.trim(cr
,false,true);
325 request_parse_status
= Http::scOkay
;
326 buf_
= tok
.remaining(); // incremental parse checkpoint
330 debugs(33, 5, "invalid request-line. not HTTP");
331 request_parse_status
= Http::scBadRequest
;
335 debugs(74, 5, "Parser needs more data");
338 // else strict non-whitespace tolerant parse
340 // only search for request-target (URL) if we have not yet found one
341 if (uri_
.isEmpty()) {
342 const int res
= parseUriField(tok
);
343 if (res
< 1 || msgProtocol_
.protocol
== AnyP::PROTO_HTTP
)
345 // else keep going...
349 debugs(74, 5, "Parser needs more data");
353 // HTTP/1 version suffix (protocol magic) followed by CR*LF
354 if (msgProtocol_
.protocol
== AnyP::PROTO_NONE
) {
355 return parseHttpVersionField(tok
);
358 // If we got here this method has been called too many times
359 request_parse_status
= Http::scInternalServerError
;
360 debugs(33, 5, "ERROR: Parser already processed request-line");
365 Http::One::RequestParser::parse(const SBuf
&aBuf
)
368 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
370 // stage 1: locate the request-line
371 if (parsingStage_
== HTTP_PARSE_NONE
) {
374 // if we hit something before EOS treat it as a message
376 parsingStage_
= HTTP_PARSE_FIRST
;
381 // stage 2: parse the request-line
382 if (parsingStage_
== HTTP_PARSE_FIRST
) {
383 PROF_start(HttpParserParseReqLine
);
384 const int retcode
= parseRequestFirstLine();
386 // first-line (or a look-alike) found successfully.
388 parsingStage_
= HTTP_PARSE_MIME
;
391 debugs(74, 5, "request-line: retval " << retcode
<< ": line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
392 debugs(74, 5, "request-line: method: " << method_
);
393 debugs(74, 5, "request-line: url: " << uri_
);
394 debugs(74, 5, "request-line: proto: " << msgProtocol_
);
395 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
396 PROF_stop(HttpParserParseReqLine
);
398 // syntax errors already
400 parsingStage_
= HTTP_PARSE_DONE
;
405 // stage 3: locate the mime header block
406 if (parsingStage_
== HTTP_PARSE_MIME
) {
407 // HTTP/1.x request-line is valid and parsing completed.
408 if (msgProtocol_
.major
== 1) {
409 /* NOTE: HTTP/0.9 requests do not have a mime header block.
410 * So the rest of the code will need to deal with '0'-byte headers
411 * (ie, none, so don't try parsing em)
413 int64_t mimeHeaderBytes
= 0;
414 // XXX: c_str() reallocates. performance regression.
415 if ((mimeHeaderBytes
= headersEnd(buf_
.c_str(), buf_
.length())) == 0) {
416 if (buf_
.length()+firstLineSize() >= Config
.maxRequestHeaderSize
) {
417 debugs(33, 5, "Too large request");
418 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
419 parsingStage_
= HTTP_PARSE_DONE
;
421 debugs(33, 5, "Incomplete request, waiting for end of headers");
424 mimeHeaderBlock_
= buf_
.consume(mimeHeaderBytes
);
425 debugs(74, 5, "mime header (0-" << mimeHeaderBytes
<< ") {" << mimeHeaderBlock_
<< "}");
428 debugs(33, 3, "Missing HTTP/1.x identifier");
430 // NP: we do not do any further stages here yet so go straight to DONE
431 parsingStage_
= HTTP_PARSE_DONE
;
433 // Squid could handle these headers, but admin does not want to
434 if (messageHeaderSize() >= Config
.maxRequestHeaderSize
) {
435 debugs(33, 5, "Too large request");
436 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
441 return !needsMoreData();