2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "parser/Tokenizer.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
17 Http::One::RequestParser::RequestParser() :
22 Http1::Parser::size_type
23 Http::One::RequestParser::firstLineSize() const
25 // RFC 7230 section 2.6
26 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
27 return method_
.image().length() + uri_
.length() + 12;
31 * Attempt to parse the first line of a new request message.
33 * Governed by RFC 7230 section 3.5
35 * In the interest of robustness, a server that is expecting to receive
36 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
37 * received prior to the request-line.
40 * Parsing state is stored between calls to avoid repeating buffer scans.
41 * If garbage is found the parsing offset is incremented.
44 Http::One::RequestParser::skipGarbageLines()
46 if (Config
.onoff
.relaxed_header_parser
) {
47 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
48 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
49 "CRLF bytes received ahead of request-line. " <<
50 "Ignored due to relaxed_header_parser.");
51 // Be tolerant of prefix empty lines
52 // ie any series of either \n or \r\n with no other characters and no repeated \r
53 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
60 * Attempt to parse the method field out of an HTTP message request-line.
63 * RFC 1945 section 5.1
64 * RFC 7230 section 2.6, 3.1 and 3.5
66 * Parsing state is stored between calls. The current implementation uses
67 * checkpoints after each successful request-line field.
68 * The return value tells you whether the parsing is completed or not.
70 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
71 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
72 * \retval 0 more data is needed to complete the parse
75 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer
&tok
, const CharacterSet
&WspDelim
)
77 // scan for up to 16 valid method characters.
78 static const size_t maxMethodLength
= 16; // TODO: make this configurable?
80 // method field is a sequence of TCHAR.
82 if (tok
.prefix(methodFound
, CharacterSet::TCHAR
, maxMethodLength
) && tok
.skipOne(WspDelim
)) {
84 method_
= HttpRequestMethod(methodFound
);
85 buf_
= tok
.remaining(); // incremental parse checkpoint
88 } else if (tok
.atEnd()) {
89 debugs(74, 5, "Parser needs more data to find method");
94 // non-delimiter found after accepted method bytes means ...
95 if (methodFound
.length() == maxMethodLength
) {
96 // method longer than acceptible.
97 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
98 parseStatusCode
= Http::scNotImplemented
;
99 debugs(33, 5, "invalid request-line. method too long");
101 // invalid character in the URL
102 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
103 parseStatusCode
= Http::scBadRequest
;
104 debugs(33, 5, "invalid request-line. missing method delimiter");
112 CharacterSet
UriChars("URI-Chars","");
114 /* RFC 3986 section 2:
116 * A URI is composed from a limited set of characters consisting of
117 * digits, letters, and a few graphic symbols.
120 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
122 UriChars
+= CharacterSet::HEXDIG
;
123 // RFC 3986 section 2.2 - reserved characters
124 UriChars
+= CharacterSet("gen-delims", ":/?#[]@");
125 UriChars
+= CharacterSet("sub-delims", "!$&'()*+,;=");
126 // RFC 3986 section 2.3 - unreserved characters
127 UriChars
+= CharacterSet::ALPHA
;
128 UriChars
+= CharacterSet::DIGIT
;
129 UriChars
+= CharacterSet("unreserved", "-._~");
135 Http::One::RequestParser::parseUriField(::Parser::Tokenizer
&tok
)
137 // URI field is a sequence of ... what? segments all have different valid charset
138 // go with non-whitespace non-binary characters for now
139 static CharacterSet UriChars
= uriValidCharacters();
141 /* Arbitrary 64KB URI upper length limit.
143 * Not quite as arbitrary as it seems though. Old SquidString objects
144 * cannot store strings larger than 64KB, so we must limit until they
145 * have all been replaced with SBuf.
147 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
148 * at least 8000 octets for the whole line, including method and version.
150 const size_t maxUriLength
= min(static_cast<size_t>(Config
.maxRequestHeaderSize
) - firstLineSize(),
151 static_cast<size_t>((64*1024)-1));
155 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
156 if (tok
.prefix(uriFound
, UriChars
, maxUriLength
) && tok
.skipOne(CharacterSet::SP
)) {
158 buf_
= tok
.remaining(); // incremental parse checkpoint
161 // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
162 } else if (method_
== Http::METHOD_GET
&& skipLineTerminator(tok
)) {
163 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
164 msgProtocol_
= Http::ProtocolVersion(0,9);
165 uri_
= uriFound
; // found by successful prefix() call earlier.
166 parseStatusCode
= Http::scOkay
;
167 buf_
= tok
.remaining(); // incremental parse checkpoint
170 } else if (tok
.atEnd()) {
171 debugs(74, 5, "Parser needs more data to find URI");
177 if (uriFound
.length() == maxUriLength
) {
178 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
179 parseStatusCode
= Http::scUriTooLong
;
180 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength
<< " bytes");
182 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
183 parseStatusCode
= Http::scBadRequest
;
184 debugs(33, 5, "invalid request-line. missing URI delimiter");
190 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer
&tok
)
192 // partial match of HTTP/1 magic prefix
193 if (tok
.remaining().length() < Http1magic
.length() && Http1magic
.startsWith(tok
.remaining())) {
194 debugs(74, 5, "Parser needs more data to find version");
198 if (!tok
.skip(Http1magic
)) {
199 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
200 parseStatusCode
= Http::scHttpVersionNotSupported
;
205 debugs(74, 5, "Parser needs more data to find version");
209 // get the version minor DIGIT
211 if (tok
.prefix(digit
, CharacterSet::DIGIT
, 1) && skipLineTerminator(tok
)) {
213 // found version fully AND terminator
214 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
215 parseStatusCode
= Http::scOkay
;
216 buf_
= tok
.remaining(); // incremental parse checkpoint
219 } else if (tok
.atEnd() || (tok
.skip('\r') && tok
.atEnd())) {
220 debugs(74, 5, "Parser needs more data to find version");
225 // non-DIGIT. invalid version number.
226 parseStatusCode
= Http::scHttpVersionNotSupported
;
227 debugs(33, 5, "invalid request-line. garbage before line terminator");
232 * Attempt to parse the first line of a new request message.
235 * RFC 1945 section 5.1
236 * RFC 7230 section 2.6, 3.1 and 3.5
238 * Parsing state is stored between calls. The current implementation uses
239 * checkpoints after each successful request-line field.
240 * The return value tells you whether the parsing is completed or not.
242 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
243 * \retval 1 successful parse. member fields contain the request-line items
244 * \retval 0 more data is needed to complete the parse
247 Http::One::RequestParser::parseRequestFirstLine()
249 ::Parser::Tokenizer
tok(buf_
);
251 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
252 debugs(74, DBG_DATA
, buf_
);
254 // NP: would be static, except it need to change with reconfigure
255 CharacterSet WspDelim
= CharacterSet::SP
; // strict parse only accepts SP
257 if (Config
.onoff
.relaxed_header_parser
) {
258 // RFC 7230 section 3.5
259 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
260 // as whitespace between request-line fields
261 WspDelim
+= CharacterSet::HTAB
262 + CharacterSet("VT,FF","\x0B\x0C")
266 // only search for method if we have not yet found one
267 if (method_
== Http::METHOD_NONE
) {
268 const int res
= parseMethodField(tok
, WspDelim
);
271 // else keep going...
274 // tolerant parser allows multiple whitespace characters between request-line fields
275 if (Config
.onoff
.relaxed_header_parser
) {
276 const size_t garbage
= tok
.skipAll(WspDelim
);
278 firstLineGarbage_
+= garbage
;
279 buf_
= tok
.remaining(); // re-checkpoint after garbage
283 debugs(74, 5, "Parser needs more data");
287 // from here on, we have two possible parse paths: whitespace tolerant, and strict
288 if (Config
.onoff
.relaxed_header_parser
) {
289 // whitespace tolerant
292 // * this would be static, except WspDelim changes with reconfigure
293 // * HTTP-version charset is included by uriValidCharacters()
294 // * terminal CR is included by WspDelim here in relaxed parsing
295 CharacterSet LfDelim
= uriValidCharacters() + WspDelim
;
297 // seek the LF character, then tokenize the line in reverse
299 if (tok
.prefix(line
, LfDelim
) && tok
.skip('\n')) {
300 ::Parser::Tokenizer
rTok(line
);
302 (void)rTok
.suffix(nil
,CharacterSet::CR
); // optional CR in terminator
304 if (rTok
.suffix(digit
,CharacterSet::DIGIT
) && rTok
.skipSuffix(Http1magic
) && rTok
.suffix(nil
,WspDelim
)) {
305 uri_
= rTok
.remaining();
306 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
307 if (uri_
.isEmpty()) {
308 debugs(33, 5, "invalid request-line. missing URL");
309 parseStatusCode
= Http::scBadRequest
;
313 parseStatusCode
= Http::scOkay
;
314 buf_
= tok
.remaining(); // incremental parse checkpoint
317 } else if (method_
== Http::METHOD_GET
) {
318 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
319 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
320 msgProtocol_
= Http::ProtocolVersion(0,9);
321 static const SBuf
cr("\r",1);
322 uri_
= line
.trim(cr
,false,true);
323 parseStatusCode
= Http::scOkay
;
324 buf_
= tok
.remaining(); // incremental parse checkpoint
328 debugs(33, 5, "invalid request-line. not HTTP");
329 parseStatusCode
= Http::scBadRequest
;
333 debugs(74, 5, "Parser needs more data");
336 // else strict non-whitespace tolerant parse
338 // only search for request-target (URL) if we have not yet found one
339 if (uri_
.isEmpty()) {
340 const int res
= parseUriField(tok
);
341 if (res
< 1 || msgProtocol_
.protocol
== AnyP::PROTO_HTTP
)
343 // else keep going...
347 debugs(74, 5, "Parser needs more data");
351 // HTTP/1 version suffix (protocol magic) followed by CR*LF
352 if (msgProtocol_
.protocol
== AnyP::PROTO_NONE
) {
353 return parseHttpVersionField(tok
);
356 // If we got here this method has been called too many times
357 parseStatusCode
= Http::scInternalServerError
;
358 debugs(33, 5, "ERROR: Parser already processed request-line");
363 Http::One::RequestParser::parse(const SBuf
&aBuf
)
366 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
368 // stage 1: locate the request-line
369 if (parsingStage_
== HTTP_PARSE_NONE
) {
372 // if we hit something before EOS treat it as a message
374 parsingStage_
= HTTP_PARSE_FIRST
;
379 // stage 2: parse the request-line
380 if (parsingStage_
== HTTP_PARSE_FIRST
) {
381 PROF_start(HttpParserParseReqLine
);
382 const int retcode
= parseRequestFirstLine();
384 // first-line (or a look-alike) found successfully.
386 parsingStage_
= HTTP_PARSE_MIME
;
389 debugs(74, 5, "request-line: retval " << retcode
<< ": line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
390 debugs(74, 5, "request-line: method: " << method_
);
391 debugs(74, 5, "request-line: url: " << uri_
);
392 debugs(74, 5, "request-line: proto: " << msgProtocol_
);
393 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
394 PROF_stop(HttpParserParseReqLine
);
396 // syntax errors already
398 parsingStage_
= HTTP_PARSE_DONE
;
403 // stage 3: locate the mime header block
404 if (parsingStage_
== HTTP_PARSE_MIME
) {
405 // HTTP/1.x request-line is valid and parsing completed.
406 if (!grabMimeBlock("Request", Config
.maxRequestHeaderSize
)) {
407 if (parseStatusCode
== Http::scHeaderTooLarge
)
408 parseStatusCode
= Http::scRequestHeaderFieldsTooLarge
;
413 return !needsMoreData();