2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "profiler/Profiler.h"
16 #include "SquidConfig.h"
18 Http::One::RequestParser::RequestParser() :
20 request_parse_status(Http::scNone
),
24 Http1::Parser::size_type
25 Http::One::RequestParser::firstLineSize() const
27 // RFC 7230 section 2.6
28 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
29 return method_
.image().length() + uri_
.length() + 12;
33 * Attempt to parse the first line of a new request message.
35 * Governed by RFC 7230 section 3.5
37 * In the interest of robustness, a server that is expecting to receive
38 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
39 * received prior to the request-line.
42 * Parsing state is stored between calls to avoid repeating buffer scans.
43 * If garbage is found the parsing offset is incremented.
46 Http::One::RequestParser::skipGarbageLines()
48 if (Config
.onoff
.relaxed_header_parser
) {
49 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
50 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
51 "CRLF bytes received ahead of request-line. " <<
52 "Ignored due to relaxed_header_parser.");
53 // Be tolerant of prefix empty lines
54 // ie any series of either \n or \r\n with no other characters and no repeated \r
55 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
61 /// detect and skip the CRLF or LF line terminator
62 /// consume from the tokenizer and return true only if found
64 Http::One::RequestParser::skipLineTerminator(::Parser::Tokenizer
&tok
) const
66 static const SBuf
crlf("\r\n");
70 if (Config
.onoff
.relaxed_header_parser
&& tok
.skipOne(CharacterSet::LF
))
77 * Attempt to parse the method field out of an HTTP message request-line.
80 * RFC 1945 section 5.1
81 * RFC 7230 section 2.6, 3.1 and 3.5
83 * Parsing state is stored between calls. The current implementation uses
84 * checkpoints after each successful request-line field.
85 * The return value tells you whether the parsing is completed or not.
87 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
88 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
89 * \retval 0 more data is needed to complete the parse
92 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
94 // scan for up to 16 valid method characters.
95 static const size_t maxMethodLength
= 16;
99 // method field is a sequence of TCHAR.
100 // NP: prefix-with-limit returns true if it finds ANY valid chars
101 if (!tok
.prefix(methodFound
, CharacterSet::TCHAR
, maxMethodLength
)) {
102 // missing/invalid 'method'.
103 request_parse_status
= Http::scBadRequest
;
104 debugs(33, 5, "invalid request-line. missing method");
108 // we may be at the end if we found exactly maxMethodLength bytes
110 debugs(74, 5, "Parser needs more data to find method");
114 // ... followed by at least one whitespace character.
115 if (!tok
.skipOne(WspDelim
)) {
116 // non-delimiter found after accepted method bytes means ...
117 if (methodFound
.length() == maxMethodLength
) {
118 // method longer than acceptible.
119 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
120 request_parse_status
= Http::scNotImplemented
;
121 debugs(33, 5, "invalid request-line. method too long");
123 // invalid character in the URL
124 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
125 request_parse_status
= Http::scBadRequest
;
126 debugs(33, 5, "invalid request-line. missing method delimiter");
130 method_
= HttpRequestMethod(methodFound
);
131 buf_
= tok
.remaining(); // incremental parse checkpoint
136 Http::One::RequestParser::parseUriField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
138 // URI field is a sequence of ... what? segments all have different valid charset
139 // go with non-whitespace non-binary characters for now
140 static CharacterSet
UriChars("URI-Chars","");
141 if (!UriChars
['a']) { // if it needs initializing...
142 /* RFC 3986 section 2:
144 * A URI is composed from a limited set of characters consisting of
145 * digits, letters, and a few graphic symbols.
148 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
150 UriChars
+= CharacterSet::HEXDIG
;
151 // RFC 3986 section 2.2 - reserved characters
152 UriChars
+= CharacterSet("gen-delims", ":/?#[]@");
153 UriChars
+= CharacterSet("sub-delims", "!$&'()*+,;=");
154 // RFC 3986 section 2.3 - unreserved characters
155 UriChars
+= CharacterSet::ALPHA
;
156 UriChars
+= CharacterSet::DIGIT
;
157 UriChars
+= CharacterSet("unreserved", "-._~");
160 /* Arbitrary 64KB URI upper length limit.
162 * Not quite as arbitrary as it seems though. Old SquidString objects
163 * cannot store strings larger than 64KB, so we must limit until they
164 * have all been replaced with SBuf.
166 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
167 * at least 8000 octets for the whole line, including method and version.
169 const size_t maxUriLength
= min(static_cast<size_t>(Config
.maxRequestHeaderSize
) - firstLineSize(),
170 static_cast<size_t>((64*1024)-1));
173 // NP: prefix-with-limit returns true if it finds ANY valid chars
174 if (!tok
.prefix(uriFound
, UriChars
, maxUriLength
)) {
175 // else did not find any valid TCHAR
176 debugs(33, 5, "invalid request-line. missing URL");
177 request_parse_status
= Http::scBadRequest
;
181 // we may be at the end if we found exactly maxUriLength bytes
183 debugs(74, 5, "Parser needs more data to find URI");
187 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
188 if (method_
== Http::METHOD_GET
&& skipLineTerminator(tok
)) {
189 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
190 msgProtocol_
= Http::ProtocolVersion(0,9);
192 request_parse_status
= Http::scOkay
;
193 buf_
= tok
.remaining(); // incremental parse checkpoint
197 // ... followed by at least one whitespace character.
198 if (!tok
.skipOne(WspDelim
)) {
199 // non-delimiter found after accepted URL bytes means ...
200 if (uriFound
.length() == maxUriLength
) {
201 // URL longer than acceptible.
202 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
203 request_parse_status
= Http::scUriTooLong
;
204 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength
<< " bytes");
207 // invalid non-delimiter character ended the URL
208 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
209 request_parse_status
= Http::scBadRequest
;
210 debugs(33, 5, "invalid request-line. missing URI delimiter");
215 buf_
= tok
.remaining(); // incremental parse checkpoint
220 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer
&tok
)
222 // partial match of HTTP/1 magic prefix
223 if (tok
.remaining().length() < Http1magic
.length() && Http1magic
.startsWith(tok
.remaining())) {
224 debugs(74, 5, "Parser needs more data to find version");
228 if (!tok
.skip(Http1magic
)) {
229 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
230 request_parse_status
= Http::scHttpVersionNotSupported
;
235 debugs(74, 5, "Parser needs more data to find version");
239 // get the version minor DIGIT
241 if (!tok
.prefix(digit
, CharacterSet::DIGIT
, 1)) {
242 // non-DIGIT. invalid version number.
243 request_parse_status
= Http::scHttpVersionNotSupported
;
244 debugs(33, 5, "invalid request-line. non-numeric or too-large HTTP minor version");
249 debugs(74, 5, "Parser needs more data to find version");
253 // version is always followed by the terminator
254 if (!skipLineTerminator(tok
)) {
255 if (tok
.skipOne(CharacterSet::CR
) && tok
.atEnd()) {
256 debugs(74, 5, "Parser needs more data to find version");
259 request_parse_status
= Http::scHttpVersionNotSupported
;
260 debugs(33, 5, "invalid request-line. garabge before line terminator");
264 // found version fully AND terminator
265 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
266 request_parse_status
= Http::scOkay
;
267 buf_
= tok
.remaining(); // incremental parse checkpoint
272 * Attempt to parse the first line of a new request message.
275 * RFC 1945 section 5.1
276 * RFC 7230 section 2.6, 3.1 and 3.5
278 * Parsing state is stored between calls. The current implementation uses
279 * checkpoints after each successful request-line field.
280 * The return value tells you whether the parsing is completed or not.
282 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
283 * \retval 1 successful parse. member fields contain the request-line items
284 * \retval 0 more data is needed to complete the parse
287 Http::One::RequestParser::parseRequestFirstLine()
289 ::Parser::Tokenizer
tok(buf_
);
291 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
292 debugs(74, DBG_DATA
, buf_
);
294 CharacterSet WspDelim
= CharacterSet::SP
; // strict parse only accepts SP
296 if (Config
.onoff
.relaxed_header_parser
) {
297 // RFC 7230 section 3.5
298 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
299 // as whitespace between request-line fields
300 WspDelim
+= CharacterSet::HTAB
301 + CharacterSet("VT,FF","\x0B\x0C")
305 // only search for method if we have not yet found one
306 if (method_
== Http::METHOD_NONE
) {
307 const int res
= parseMethodField(tok
, WspDelim
);
310 // else keep going...
313 // tolerant parser allows multiple whitespace characters between fields
314 if (Config
.onoff
.relaxed_header_parser
) {
315 const size_t garbage
= tok
.skipAll(WspDelim
);
317 firstLineGarbage_
+= garbage
;
318 buf_
= tok
.remaining(); // re-checkpoint after garbage
322 debugs(74, 5, "Parser needs more data");
326 // only search for request-target (URL) if we have not yet found one
327 if (uri_
.isEmpty()) {
328 const int res
= parseUriField(tok
, WspDelim
);
329 if (res
< 1 || msgProtocol_
.protocol
== AnyP::PROTO_HTTP
)
331 // else keep going...
334 // tolerant parser allows multiple whitespace characters between fields
335 if (Config
.onoff
.relaxed_header_parser
) {
336 const size_t garbage
= tok
.skipAll(WspDelim
);
338 firstLineGarbage_
+= garbage
;
339 buf_
= tok
.remaining(); // re-checkpoint after garbage
343 debugs(74, 5, "Parser needs more data");
347 // HTTP/1 version suffix (protocol magic) followed by CR*LF
348 if (msgProtocol_
.protocol
== AnyP::PROTO_NONE
) {
349 return parseHttpVersionField(tok
);
352 // If we got here this method has been called too many times
353 request_parse_status
= Http::scInternalServerError
;
354 debugs(33, 5, "ERROR: Parser already processed request-line");
359 Http::One::RequestParser::parse(const SBuf
&aBuf
)
362 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
364 // stage 1: locate the request-line
365 if (parsingStage_
== HTTP_PARSE_NONE
) {
368 // if we hit something before EOS treat it as a message
370 parsingStage_
= HTTP_PARSE_FIRST
;
375 // stage 2: parse the request-line
376 if (parsingStage_
== HTTP_PARSE_FIRST
) {
377 PROF_start(HttpParserParseReqLine
);
378 const int retcode
= parseRequestFirstLine();
380 // first-line (or a look-alike) found successfully.
382 parsingStage_
= HTTP_PARSE_MIME
;
385 debugs(74, 5, "request-line: retval " << retcode
<< ": line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
386 debugs(74, 5, "request-line: method: " << method_
);
387 debugs(74, 5, "request-line: url: " << uri_
);
388 debugs(74, 5, "request-line: proto: " << msgProtocol_
);
389 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
390 PROF_stop(HttpParserParseReqLine
);
392 // syntax errors already
394 parsingStage_
= HTTP_PARSE_DONE
;
399 // stage 3: locate the mime header block
400 if (parsingStage_
== HTTP_PARSE_MIME
) {
401 // HTTP/1.x request-line is valid and parsing completed.
402 if (msgProtocol_
.major
== 1) {
403 /* NOTE: HTTP/0.9 requests do not have a mime header block.
404 * So the rest of the code will need to deal with '0'-byte headers
405 * (ie, none, so don't try parsing em)
407 int64_t mimeHeaderBytes
= 0;
408 // XXX: c_str() reallocates. performance regression.
409 if ((mimeHeaderBytes
= headersEnd(buf_
.c_str(), buf_
.length())) == 0) {
410 if (buf_
.length()+firstLineSize() >= Config
.maxRequestHeaderSize
) {
411 debugs(33, 5, "Too large request");
412 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
413 parsingStage_
= HTTP_PARSE_DONE
;
415 debugs(33, 5, "Incomplete request, waiting for end of headers");
418 mimeHeaderBlock_
= buf_
.consume(mimeHeaderBytes
);
419 debugs(74, 5, "mime header (0-" << mimeHeaderBytes
<< ") {" << mimeHeaderBlock_
<< "}");
422 debugs(33, 3, "Missing HTTP/1.x identifier");
424 // NP: we do not do any further stages here yet so go straight to DONE
425 parsingStage_
= HTTP_PARSE_DONE
;
427 // Squid could handle these headers, but admin does not want to
428 if (messageHeaderSize() >= Config
.maxRequestHeaderSize
) {
429 debugs(33, 5, "Too large request");
430 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
435 return !needsMoreData();