2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/RequestParser.h"
12 #include "http/one/Tokenizer.h"
13 #include "http/ProtocolVersion.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
17 // the right debugs() level for parsing errors
20 return Config
.onoff
.relaxed_header_parser
< 0 ? DBG_IMPORTANT
: 5;
23 Http::One::RequestParser::RequestParser() :
27 Http1::Parser::size_type
28 Http::One::RequestParser::firstLineSize() const
30 // RFC 7230 section 2.6
31 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
32 return method_
.image().length() + uri_
.length() + 12;
36 * Attempt to parse the first line of a new request message.
38 * Governed by RFC 7230 section 3.5
40 * In the interest of robustness, a server that is expecting to receive
41 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
42 * received prior to the request-line.
45 * Parsing state is stored between calls to avoid repeating buffer scans.
46 * If garbage is found the parsing offset is incremented.
49 Http::One::RequestParser::skipGarbageLines()
51 if (Config
.onoff
.relaxed_header_parser
) {
52 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
53 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
54 "CRLF bytes received ahead of request-line. " <<
55 "Ignored due to relaxed_header_parser.");
56 // Be tolerant of prefix empty lines
57 // ie any series of either \n or \r\n with no other characters and no repeated \r
58 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
65 * Attempt to parse the method field out of an HTTP message request-line.
68 * RFC 1945 section 5.1
69 * RFC 7230 section 2.6, 3.1 and 3.5
72 Http::One::RequestParser::parseMethodField(Http1::Tokenizer
&tok
)
74 // method field is a sequence of TCHAR.
75 // Limit to 32 characters to prevent overly long sequences of non-HTTP
76 // being sucked in before mismatch is detected. 32 is itself annoyingly
77 // big but there are methods registered by IANA that reach 17 bytes:
78 // http://www.iana.org/assignments/http-methods
79 static const size_t maxMethodLength
= 32; // TODO: make this configurable?
82 if (!tok
.prefix(methodFound
, CharacterSet::TCHAR
, maxMethodLength
)) {
83 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
84 parseStatusCode
= Http::scBadRequest
;
87 method_
= HttpRequestMethod(methodFound
);
89 if (!skipDelimiter(tok
.skipAll(DelimiterCharacters()), "after method"))
95 /// the characters which truly are valid within URI
96 static const CharacterSet
&
99 /* RFC 3986 section 2:
101 * A URI is composed from a limited set of characters consisting of
102 * digits, letters, and a few graphic symbols.
105 static const CharacterSet UriChars
=
106 CharacterSet("URI-Chars","") +
107 // RFC 3986 section 2.2 - reserved characters
108 CharacterSet("gen-delims", ":/?#[]@") +
109 CharacterSet("sub-delims", "!$&'()*+,;=") +
110 // RFC 3986 section 2.3 - unreserved characters
111 CharacterSet::ALPHA
+
112 CharacterSet::DIGIT
+
113 CharacterSet("unreserved", "-._~") +
114 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
115 CharacterSet("pct-encoded", "%") +
116 CharacterSet::HEXDIG
;
121 /// characters which Squid will accept in the HTTP request-target (URI)
123 Http::One::RequestParser::RequestTargetCharacters()
125 if (Config
.onoff
.relaxed_header_parser
) {
126 #if USE_HTTP_VIOLATIONS
127 static const CharacterSet RelaxedExtended
=
128 UriValidCharacters() +
129 // accept whitespace (extended), it will be dealt with later
130 DelimiterCharacters() +
131 // RFC 2396 unwise character set which must never be transmitted
132 // in un-escaped form. But many web services do anyway.
133 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
134 // UTF-8 because we want to be future-proof
135 CharacterSet("UTF-8", 128, 255);
137 return RelaxedExtended
;
139 static const CharacterSet RelaxedCompliant
=
140 UriValidCharacters() +
141 // accept whitespace (extended), it will be dealt with later.
142 DelimiterCharacters();
144 return RelaxedCompliant
;
148 // strict parse only accepts what the RFC say we can
149 return UriValidCharacters();
153 Http::One::RequestParser::parseUriField(Http1::Tokenizer
&tok
)
155 /* Arbitrary 64KB URI upper length limit.
157 * Not quite as arbitrary as it seems though. Old SquidString objects
158 * cannot store strings larger than 64KB, so we must limit until they
159 * have all been replaced with SBuf.
161 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
162 * at least 8000 octets for the whole line, including method and version.
164 const size_t maxUriLength
= static_cast<size_t>((64*1024)-1);
167 if (!tok
.prefix(uriFound
, RequestTargetCharacters())) {
168 parseStatusCode
= Http::scBadRequest
;
169 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
173 if (uriFound
.length() > maxUriLength
) {
174 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
175 parseStatusCode
= Http::scUriTooLong
;
176 debugs(33, ErrorLevel(), "invalid request-line: " << uriFound
.length() <<
177 "-byte URI exceeds " << maxUriLength
<< "-byte limit");
186 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer
&tok
)
188 const auto savedTok
= tok
;
191 // Searching for Http1magic precludes detecting HTTP/2+ versions.
192 // Rewrite if we ever _need_ to return 505 (Version Not Supported) errors.
193 if (tok
.suffix(digit
, CharacterSet::DIGIT
) && tok
.skipSuffix(Http1magic
)) {
194 msgProtocol_
= Http::ProtocolVersion(1, (*digit
.rawContent() - '0'));
198 // A GET request might use HTTP/0.9 syntax
199 if (method_
== Http::METHOD_GET
) {
200 // RFC 1945 - no HTTP version field at all
201 tok
= savedTok
; // in case the URI ends with a digit
202 // report this assumption as an error if configured to triage parsing
203 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
204 msgProtocol_
= Http::ProtocolVersion(0,9);
208 debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
209 parseStatusCode
= Http::scBadRequest
;
214 * Skip characters separating request-line fields.
215 * To handle bidirectional parsing, the caller does the actual skipping and
216 * we just check how many character the caller has skipped.
219 Http::One::RequestParser::skipDelimiter(const size_t count
, const char *where
)
222 debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where
);
223 parseStatusCode
= Http::scBadRequest
;
227 // tolerant parser allows multiple whitespace characters between request-line fields
228 if (count
> 1 && !Config
.onoff
.relaxed_header_parser
) {
229 debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where
);
230 parseStatusCode
= Http::scBadRequest
;
237 /// Parse CRs at the end of request-line, just before the terminating LF.
239 Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer
&tok
)
241 if (Config
.onoff
.relaxed_header_parser
) {
242 (void)tok
.skipAllTrailing(CharacterSet::CR
); // optional; multiple OK
244 if (!tok
.skipOneTrailing(CharacterSet::CR
)) {
245 debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
246 parseStatusCode
= Http::scBadRequest
;
254 * Attempt to parse the first line of a new request message.
257 * RFC 1945 section 5.1
258 * RFC 7230 section 2.6, 3.1 and 3.5
260 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
261 * \retval 1 successful parse. member fields contain the request-line items
262 * \retval 0 more data is needed to complete the parse
265 Http::One::RequestParser::parseRequestFirstLine()
267 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
268 debugs(74, DBG_DATA
, buf_
);
272 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
273 // Now, the request line has to end at the first LF.
274 static const CharacterSet lineChars
= CharacterSet::LF
.complement("notLF");
275 ::Parser::Tokenizer
lineTok(buf_
);
276 if (!lineTok
.prefix(line
, lineChars
) || !lineTok
.skip('\n')) {
277 if (buf_
.length() >= Config
.maxRequestHeaderSize
) {
278 /* who should we blame for our failure to parse this line? */
280 Http1::Tokenizer
methodTok(buf_
);
281 if (!parseMethodField(methodTok
))
282 return -1; // blame a bad method (or its delimiter)
284 // assume it is the URI
285 debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
286 Config
.maxRequestHeaderSize
<< "-byte limit");
287 parseStatusCode
= Http::scUriTooLong
;
290 debugs(74, 5, "Parser needs more data");
294 Http1::Tokenizer
tok(line
);
296 if (!parseMethodField(tok
))
299 /* now parse backwards, to leave just the URI */
300 if (!skipTrailingCrs(tok
))
303 if (!parseHttpVersionField(tok
))
306 if (!http0() && !skipDelimiter(tok
.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
309 /* parsed everything before and after the URI */
311 if (!parseUriField(tok
))
315 debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
316 parseStatusCode
= Http::scBadRequest
;
320 parseStatusCode
= Http::scOkay
;
321 buf_
= lineTok
.remaining(); // incremental parse checkpoint
326 Http::One::RequestParser::parse(const SBuf
&aBuf
)
329 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
331 // stage 1: locate the request-line
332 if (parsingStage_
== HTTP_PARSE_NONE
) {
335 // if we hit something before EOS treat it as a message
337 parsingStage_
= HTTP_PARSE_FIRST
;
342 // stage 2: parse the request-line
343 if (parsingStage_
== HTTP_PARSE_FIRST
) {
344 PROF_start(HttpParserParseReqLine
);
345 const int retcode
= parseRequestFirstLine();
347 // first-line (or a look-alike) found successfully.
349 parsingStage_
= HTTP_PARSE_MIME
;
352 debugs(74, 5, "request-line: retval " << retcode
<< ": line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
353 debugs(74, 5, "request-line: method: " << method_
);
354 debugs(74, 5, "request-line: url: " << uri_
);
355 debugs(74, 5, "request-line: proto: " << msgProtocol_
);
356 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
357 PROF_stop(HttpParserParseReqLine
);
359 // syntax errors already
361 parsingStage_
= HTTP_PARSE_DONE
;
366 // stage 3: locate the mime header block
367 if (parsingStage_
== HTTP_PARSE_MIME
) {
368 // HTTP/1.x request-line is valid and parsing completed.
369 if (!grabMimeBlock("Request", Config
.maxRequestHeaderSize
)) {
370 if (parseStatusCode
== Http::scHeaderTooLarge
)
371 parseStatusCode
= Http::scRequestHeaderFieldsTooLarge
;
376 return !needsMoreData();