2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "profiler/Profiler.h"
16 #include "SquidConfig.h"
18 Http::One::RequestParser::RequestParser() :
20 request_parse_status(Http::scNone
),
24 Http1::Parser::size_type
25 Http::One::RequestParser::firstLineSize() const
27 // RFC 7230 section 2.6
28 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
29 return method_
.image().length() + uri_
.length() + 12;
33 * Attempt to parse the first line of a new request message.
35 * Governed by RFC 7230 section 3.5
37 * In the interest of robustness, a server that is expecting to receive
38 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
39 * received prior to the request-line.
42 * Parsing state is stored between calls to avoid repeating buffer scans.
43 * If garbage is found the parsing offset is incremented.
46 Http::One::RequestParser::skipGarbageLines()
48 if (Config
.onoff
.relaxed_header_parser
) {
49 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
50 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
51 "CRLF bytes received ahead of request-line. " <<
52 "Ignored due to relaxed_header_parser.");
53 // Be tolerant of prefix empty lines
54 // ie any series of either \n or \r\n with no other characters and no repeated \r
55 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
62 * Attempt to parse the method field out of an HTTP message request-line.
65 * RFC 1945 section 5.1
66 * RFC 7230 section 2.6, 3.1 and 3.5
68 * Parsing state is stored between calls. The current implementation uses
69 * checkpoints after each successful request-line field.
70 * The return value tells you whether the parsing is completed or not.
72 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
73 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
74 * \retval 0 more data is needed to complete the parse
77 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
79 // scan for up to 16 valid method characters.
80 static const size_t maxMethodLength
= 16; // TODO: make this configurable?
82 // method field is a sequence of TCHAR.
84 if (tok
.prefix(methodFound
, CharacterSet::TCHAR
, maxMethodLength
) && tok
.skipOne(WspDelim
)) {
86 method_
= HttpRequestMethod(methodFound
);
87 buf_
= tok
.remaining(); // incremental parse checkpoint
90 } else if (tok
.atEnd()) {
91 debugs(74, 5, "Parser needs more data to find method");
96 // non-delimiter found after accepted method bytes means ...
97 if (methodFound
.length() == maxMethodLength
) {
98 // method longer than acceptible.
99 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
100 request_parse_status
= Http::scNotImplemented
;
101 debugs(33, 5, "invalid request-line. method too long");
103 // invalid character in the URL
104 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
105 request_parse_status
= Http::scBadRequest
;
106 debugs(33, 5, "invalid request-line. missing method delimiter");
112 Http::One::RequestParser::parseUriField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
114 // URI field is a sequence of ... what? segments all have different valid charset
115 // go with non-whitespace non-binary characters for now
116 static CharacterSet
UriChars("URI-Chars","");
117 if (!UriChars
['a']) { // if it needs initializing...
118 /* RFC 3986 section 2:
120 * A URI is composed from a limited set of characters consisting of
121 * digits, letters, and a few graphic symbols.
124 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
126 UriChars
+= CharacterSet::HEXDIG
;
127 // RFC 3986 section 2.2 - reserved characters
128 UriChars
+= CharacterSet("gen-delims", ":/?#[]@");
129 UriChars
+= CharacterSet("sub-delims", "!$&'()*+,;=");
130 // RFC 3986 section 2.3 - unreserved characters
131 UriChars
+= CharacterSet::ALPHA
;
132 UriChars
+= CharacterSet::DIGIT
;
133 UriChars
+= CharacterSet("unreserved", "-._~");
136 /* Arbitrary 64KB URI upper length limit.
138 * Not quite as arbitrary as it seems though. Old SquidString objects
139 * cannot store strings larger than 64KB, so we must limit until they
140 * have all been replaced with SBuf.
142 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
143 * at least 8000 octets for the whole line, including method and version.
145 const size_t maxUriLength
= min(static_cast<size_t>(Config
.maxRequestHeaderSize
) - firstLineSize(),
146 static_cast<size_t>((64*1024)-1));
149 if (!tok
.prefix(uriFound
, UriChars
, maxUriLength
)) {
150 // NP: prefix() returns true if it finds ANY valid chars
151 debugs(33, 5, "invalid request-line. missing URL");
152 request_parse_status
= Http::scBadRequest
;
156 /* NOTE: we do have to check for token/state in this order.
157 * Because RFC 7230 tolerant parse accepts CR as a whitespace
158 * delimiter in HTTP/1.1 and we may not yet have the LF final
159 * terminator character on HTTP/0.9 simple-request lines.
162 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
163 if (method_
== Http::METHOD_GET
&& skipLineTerminator(tok
)) {
164 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
165 msgProtocol_
= Http::ProtocolVersion(0,9);
167 request_parse_status
= Http::scOkay
;
168 buf_
= tok
.remaining(); // incremental parse checkpoint
170 } else if (tok
.atEnd() || (tok
.skip('\r') && tok
.atEnd())) {
171 debugs(74, 5, "Parser needs more data to find URI");
175 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
176 if (tok
.skipOne(WspDelim
)) {
178 buf_
= tok
.remaining(); // incremental parse checkpoint
181 } else if (tok
.atEnd()) {
182 debugs(74, 5, "Parser needs more data to find URI");
188 if (uriFound
.length() == maxUriLength
) {
189 // URL longer than acceptible.
190 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
191 request_parse_status
= Http::scUriTooLong
;
192 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength
<< " bytes");
194 // invalid non-delimiter character ended the URL
195 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
196 request_parse_status
= Http::scBadRequest
;
197 debugs(33, 5, "invalid request-line. missing URI delimiter");
203 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer
&tok
)
205 // partial match of HTTP/1 magic prefix
206 if (tok
.remaining().length() < Http1magic
.length() && Http1magic
.startsWith(tok
.remaining())) {
207 debugs(74, 5, "Parser needs more data to find version");
211 if (!tok
.skip(Http1magic
)) {
212 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
213 request_parse_status
= Http::scHttpVersionNotSupported
;
218 debugs(74, 5, "Parser needs more data to find version");
222 // get the version minor DIGIT
224 if (tok
.prefix(digit
, CharacterSet::DIGIT
, 1) && skipLineTerminator(tok
)) {
226 // found version fully AND terminator
227 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
228 request_parse_status
= Http::scOkay
;
229 buf_
= tok
.remaining(); // incremental parse checkpoint
232 } else if (tok
.atEnd() || (tok
.skip('\r') && tok
.atEnd())) {
233 debugs(74, 5, "Parser needs more data to find version");
238 // non-DIGIT. invalid version number.
239 request_parse_status
= Http::scHttpVersionNotSupported
;
240 debugs(33, 5, "invalid request-line. garabge before line terminator");
245 * Attempt to parse the first line of a new request message.
248 * RFC 1945 section 5.1
249 * RFC 7230 section 2.6, 3.1 and 3.5
251 * Parsing state is stored between calls. The current implementation uses
252 * checkpoints after each successful request-line field.
253 * The return value tells you whether the parsing is completed or not.
255 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
256 * \retval 1 successful parse. member fields contain the request-line items
257 * \retval 0 more data is needed to complete the parse
260 Http::One::RequestParser::parseRequestFirstLine()
262 ::Parser::Tokenizer
tok(buf_
);
264 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
265 debugs(74, DBG_DATA
, buf_
);
267 CharacterSet WspDelim
= CharacterSet::SP
; // strict parse only accepts SP
269 if (Config
.onoff
.relaxed_header_parser
) {
270 // RFC 7230 section 3.5
271 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
272 // as whitespace between request-line fields
273 WspDelim
+= CharacterSet::HTAB
274 + CharacterSet("VT,FF","\x0B\x0C")
278 // only search for method if we have not yet found one
279 if (method_
== Http::METHOD_NONE
) {
280 const int res
= parseMethodField(tok
, WspDelim
);
283 // else keep going...
286 // tolerant parser allows multiple whitespace characters between request-line fields
287 if (Config
.onoff
.relaxed_header_parser
) {
288 const size_t garbage
= tok
.skipAll(WspDelim
);
290 firstLineGarbage_
+= garbage
;
291 buf_
= tok
.remaining(); // re-checkpoint after garbage
295 debugs(74, 5, "Parser needs more data");
299 // only search for request-target (URL) if we have not yet found one
300 if (uri_
.isEmpty()) {
301 const int res
= parseUriField(tok
, WspDelim
);
302 if (res
< 1 || msgProtocol_
.protocol
== AnyP::PROTO_HTTP
)
304 // else keep going...
307 // tolerant parser allows multiple whitespace characters between request-line fields
308 if (Config
.onoff
.relaxed_header_parser
) {
309 const size_t garbage
= tok
.skipAll(WspDelim
);
311 firstLineGarbage_
+= garbage
;
312 buf_
= tok
.remaining(); // re-checkpoint after garbage
316 debugs(74, 5, "Parser needs more data");
320 // HTTP/1 version suffix (protocol magic) followed by CR*LF
321 if (msgProtocol_
.protocol
== AnyP::PROTO_NONE
) {
322 return parseHttpVersionField(tok
);
325 // If we got here this method has been called too many times
326 request_parse_status
= Http::scInternalServerError
;
327 debugs(33, 5, "ERROR: Parser already processed request-line");
332 Http::One::RequestParser::parse(const SBuf
&aBuf
)
335 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
337 // stage 1: locate the request-line
338 if (parsingStage_
== HTTP_PARSE_NONE
) {
341 // if we hit something before EOS treat it as a message
343 parsingStage_
= HTTP_PARSE_FIRST
;
348 // stage 2: parse the request-line
349 if (parsingStage_
== HTTP_PARSE_FIRST
) {
350 PROF_start(HttpParserParseReqLine
);
351 const int retcode
= parseRequestFirstLine();
353 // first-line (or a look-alike) found successfully.
355 parsingStage_
= HTTP_PARSE_MIME
;
358 debugs(74, 5, "request-line: retval " << retcode
<< ": line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
359 debugs(74, 5, "request-line: method: " << method_
);
360 debugs(74, 5, "request-line: url: " << uri_
);
361 debugs(74, 5, "request-line: proto: " << msgProtocol_
);
362 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
363 PROF_stop(HttpParserParseReqLine
);
365 // syntax errors already
367 parsingStage_
= HTTP_PARSE_DONE
;
372 // stage 3: locate the mime header block
373 if (parsingStage_
== HTTP_PARSE_MIME
) {
374 // HTTP/1.x request-line is valid and parsing completed.
375 if (msgProtocol_
.major
== 1) {
376 /* NOTE: HTTP/0.9 requests do not have a mime header block.
377 * So the rest of the code will need to deal with '0'-byte headers
378 * (ie, none, so don't try parsing em)
380 int64_t mimeHeaderBytes
= 0;
381 // XXX: c_str() reallocates. performance regression.
382 if ((mimeHeaderBytes
= headersEnd(buf_
.c_str(), buf_
.length())) == 0) {
383 if (buf_
.length()+firstLineSize() >= Config
.maxRequestHeaderSize
) {
384 debugs(33, 5, "Too large request");
385 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
386 parsingStage_
= HTTP_PARSE_DONE
;
388 debugs(33, 5, "Incomplete request, waiting for end of headers");
391 mimeHeaderBlock_
= buf_
.consume(mimeHeaderBytes
);
392 debugs(74, 5, "mime header (0-" << mimeHeaderBytes
<< ") {" << mimeHeaderBlock_
<< "}");
395 debugs(33, 3, "Missing HTTP/1.x identifier");
397 // NP: we do not do any further stages here yet so go straight to DONE
398 parsingStage_
= HTTP_PARSE_DONE
;
400 // Squid could handle these headers, but admin does not want to
401 if (messageHeaderSize() >= Config
.maxRequestHeaderSize
) {
402 debugs(33, 5, "Too large request");
403 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
408 return !needsMoreData();