]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/RequestParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / http / one / RequestParser.cc
CommitLineData
eac61ce1 1/*
4ac4a490 2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
eac61ce1
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
f7f3304a 9#include "squid.h"
4c14658e 10#include "Debug.h"
c99510dd 11#include "http/one/RequestParser.h"
f29718b0 12#include "http/one/Tokenizer.h"
c99510dd 13#include "http/ProtocolVersion.h"
582c2af2 14#include "profiler/Profiler.h"
4d5904f7 15#include "SquidConfig.h"
4c14658e 16
e02f963c
AR
17// the right debugs() level for parsing errors
18inline static int
19ErrorLevel() {
20 return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
21}
22
6b2b6cfe
CT
23Http::One::RequestParser::RequestParser(bool preserveParsed) :
24 Parser(),
25 preserveParsed_(preserveParsed)
947ca0c6
AJ
26{}
27
28Http1::Parser::size_type
29Http::One::RequestParser::firstLineSize() const
7322c9dd 30{
947ca0c6
AJ
31 // RFC 7230 section 2.6
32 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
33 return method_.image().length() + uri_.length() + 12;
4c14658e
AJ
34}
35
c11191e0
AJ
36/**
37 * Attempt to parse the first line of a new request message.
38 *
a4c74dd8 39 * Governed by RFC 7230 section 3.5
c11191e0 40 * "
a4c74dd8
AJ
41 * In the interest of robustness, a server that is expecting to receive
42 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
43 * received prior to the request-line.
c11191e0
AJ
44 * "
45 *
46 * Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df 47 * If garbage is found the parsing offset is incremented.
c11191e0 48 */
cbcd99df 49void
678451c0 50Http::One::RequestParser::skipGarbageLines()
c11191e0 51{
c11191e0 52 if (Config.onoff.relaxed_header_parser) {
b749de75 53 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
c11191e0
AJ
54 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
55 "CRLF bytes received ahead of request-line. " <<
56 "Ignored due to relaxed_header_parser.");
57 // Be tolerant of prefix empty lines
cbcd99df 58 // ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75
AJ
59 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
60 buf_.consume(1);
7a4fa6a0 61 }
c11191e0 62 }
c11191e0
AJ
63}
64
65/**
947ca0c6 66 * Attempt to parse the method field out of an HTTP message request-line.
c11191e0
AJ
67 *
68 * Governed by:
69 * RFC 1945 section 5.1
947ca0c6 70 * RFC 7230 section 2.6, 3.1 and 3.5
c11191e0 71 */
e02f963c
AR
72bool
73Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
4c14658e 74{
e03114f8 75 // method field is a sequence of TCHAR.
e02f963c
AR
76 // Limit to 32 characters to prevent overly long sequences of non-HTTP
77 // being sucked in before mismatch is detected. 32 is itself annoyingly
78 // big but there are methods registered by IANA that reach 17 bytes:
79 // http://www.iana.org/assignments/http-methods
80 static const size_t maxMethodLength = 32; // TODO: make this configurable?
4c14658e 81
e02f963c
AR
82 SBuf methodFound;
83 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
84 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
de158bf5 85 parseStatusCode = Http::scBadRequest;
e02f963c 86 return false;
947ca0c6 87 }
e02f963c 88 method_ = HttpRequestMethod(methodFound);
f8b58a68
EB
89
90 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
91 return false;
92
e02f963c 93 return true;
947ca0c6 94}
4c14658e 95
e02f963c
AR
96/// the characters which truly are valid within URI
97static const CharacterSet &
98UriValidCharacters()
78a63ed1 99{
78a63ed1
AJ
100 /* RFC 3986 section 2:
101 * "
102 * A URI is composed from a limited set of characters consisting of
103 * digits, letters, and a few graphic symbols.
104 * "
105 */
e02f963c
AR
106 static const CharacterSet UriChars =
107 CharacterSet("URI-Chars","") +
108 // RFC 3986 section 2.2 - reserved characters
109 CharacterSet("gen-delims", ":/?#[]@") +
110 CharacterSet("sub-delims", "!$&'()*+,;=") +
111 // RFC 3986 section 2.3 - unreserved characters
112 CharacterSet::ALPHA +
113 CharacterSet::DIGIT +
114 CharacterSet("unreserved", "-._~") +
115 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
116 CharacterSet("pct-encoded", "%") +
117 CharacterSet::HEXDIG;
78a63ed1
AJ
118
119 return UriChars;
120}
016a316b 121
e02f963c
AR
122/// characters which Squid will accept in the HTTP request-target (URI)
123const CharacterSet &
124Http::One::RequestParser::RequestTargetCharacters()
947ca0c6 125{
e02f963c
AR
126 if (Config.onoff.relaxed_header_parser) {
127#if USE_HTTP_VIOLATIONS
128 static const CharacterSet RelaxedExtended =
129 UriValidCharacters() +
130 // accept whitespace (extended), it will be dealt with later
131 DelimiterCharacters() +
132 // RFC 2396 unwise character set which must never be transmitted
133 // in un-escaped form. But many web services do anyway.
134 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
135 // UTF-8 because we want to be future-proof
136 CharacterSet("UTF-8", 128, 255);
137
138 return RelaxedExtended;
139#else
140 static const CharacterSet RelaxedCompliant =
141 UriValidCharacters() +
142 // accept whitespace (extended), it will be dealt with later.
143 DelimiterCharacters();
144
145 return RelaxedCompliant;
146#endif
147 }
148
149 // strict parse only accepts what the RFC say we can
150 return UriValidCharacters();
151}
947ca0c6 152
e02f963c
AR
153bool
154Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
155{
947ca0c6
AJ
156 /* Arbitrary 64KB URI upper length limit.
157 *
158 * Not quite as arbitrary as it seems though. Old SquidString objects
159 * cannot store strings larger than 64KB, so we must limit until they
160 * have all been replaced with SBuf.
161 *
162 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
163 * at least 8000 octets for the whole line, including method and version.
164 */
e02f963c 165 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
016a316b 166
947ca0c6 167 SBuf uriFound;
e02f963c
AR
168 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
169 parseStatusCode = Http::scBadRequest;
170 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
171 return false;
016a316b
AJ
172 }
173
e02f963c 174 if (uriFound.length() > maxUriLength) {
e03114f8 175 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
de158bf5 176 parseStatusCode = Http::scUriTooLong;
e02f963c
AR
177 debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
178 "-byte URI exceeds " << maxUriLength << "-byte limit");
179 return false;
4c14658e 180 }
e02f963c
AR
181
182 uri_ = uriFound;
183 return true;
947ca0c6 184}
4c14658e 185
e02f963c 186bool
f29718b0 187Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
947ca0c6 188{
294083a1
EB
189 static const SBuf http1p0("HTTP/1.0");
190 static const SBuf http1p1("HTTP/1.1");
e02f963c 191 const auto savedTok = tok;
4c14658e 192
294083a1
EB
193 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
194 // the vast majority of cases.
195 if (tok.skipSuffix(http1p1)) {
196 msgProtocol_ = Http::ProtocolVersion(1, 1);
e02f963c 197 return true;
294083a1
EB
198 } else if (tok.skipSuffix(http1p0)) {
199 msgProtocol_ = Http::ProtocolVersion(1, 0);
200 return true;
201 } else {
202 // RFC 7230 section 2.6:
203 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
204 static const CharacterSet period("Decimal point", ".");
205 static const SBuf proto("HTTP/");
206 SBuf majorDigit;
207 SBuf minorDigit;
208 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
209 tok.skipOneTrailing(period) &&
210 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
211 tok.skipSuffix(proto)) {
212 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
213 // use '0.0' for unsupported multiple digit version numbers
214 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
215 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
216 msgProtocol_ = Http::ProtocolVersion(major, minor);
217 return true;
218 }
4c14658e
AJ
219 }
220
e02f963c
AR
221 // A GET request might use HTTP/0.9 syntax
222 if (method_ == Http::METHOD_GET) {
223 // RFC 1945 - no HTTP version field at all
224 tok = savedTok; // in case the URI ends with a digit
225 // report this assumption as an error if configured to triage parsing
226 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
227 msgProtocol_ = Http::ProtocolVersion(0,9);
228 return true;
4c14658e 229 }
4c14658e 230
e02f963c
AR
231 debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
232 parseStatusCode = Http::scBadRequest;
233 return false;
234}
4c14658e 235
e02f963c
AR
236/**
237 * Skip characters separating request-line fields.
238 * To handle bidirectional parsing, the caller does the actual skipping and
239 * we just check how many character the caller has skipped.
240 */
241bool
f8b58a68 242Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
e02f963c
AR
243{
244 if (count <= 0) {
f8b58a68 245 debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
e02f963c
AR
246 parseStatusCode = Http::scBadRequest;
247 return false;
248 }
e03114f8 249
e02f963c
AR
250 // tolerant parser allows multiple whitespace characters between request-line fields
251 if (count > 1 && !Config.onoff.relaxed_header_parser) {
f8b58a68 252 debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
e02f963c
AR
253 parseStatusCode = Http::scBadRequest;
254 return false;
255 }
947ca0c6 256
e02f963c
AR
257 return true;
258}
4c14658e 259
e02f963c
AR
260/// Parse CRs at the end of request-line, just before the terminating LF.
261bool
262Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
263{
264 if (Config.onoff.relaxed_header_parser) {
265 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
266 } else {
267 if (!tok.skipOneTrailing(CharacterSet::CR)) {
268 debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
269 parseStatusCode = Http::scBadRequest;
270 return false;
271 }
272 }
273 return true;
947ca0c6 274}
274bd5ad 275
947ca0c6
AJ
276/**
277 * Attempt to parse the first line of a new request message.
278 *
279 * Governed by:
280 * RFC 1945 section 5.1
281 * RFC 7230 section 2.6, 3.1 and 3.5
282 *
de158bf5 283 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
947ca0c6
AJ
284 * \retval 1 successful parse. member fields contain the request-line items
285 * \retval 0 more data is needed to complete the parse
286 */
287int
288Http::One::RequestParser::parseRequestFirstLine()
289{
947ca0c6
AJ
290 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
291 debugs(74, DBG_DATA, buf_);
4c14658e 292
e02f963c 293 SBuf line;
947ca0c6 294
e02f963c
AR
295 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
296 // Now, the request line has to end at the first LF.
297 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
298 ::Parser::Tokenizer lineTok(buf_);
299 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
f8b58a68
EB
300 if (buf_.length() >= Config.maxRequestHeaderSize) {
301 /* who should we blame for our failure to parse this line? */
302
303 Http1::Tokenizer methodTok(buf_);
304 if (!parseMethodField(methodTok))
305 return -1; // blame a bad method (or its delimiter)
306
307 // assume it is the URI
308 debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
a95f4c73 309 Config.maxRequestHeaderSize << "-byte limit");
f8b58a68
EB
310 parseStatusCode = Http::scUriTooLong;
311 return -1;
312 }
947ca0c6
AJ
313 debugs(74, 5, "Parser needs more data");
314 return 0;
4c14658e
AJ
315 }
316
e02f963c 317 Http1::Tokenizer tok(line);
78a63ed1 318
e02f963c
AR
319 if (!parseMethodField(tok))
320 return -1;
e47e0802 321
e02f963c
AR
322 /* now parse backwards, to leave just the URI */
323 if (!skipTrailingCrs(tok))
324 return -1;
325
326 if (!parseHttpVersionField(tok))
327 return -1;
947ca0c6 328
f8b58a68 329 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
e02f963c
AR
330 return -1;
331
332 /* parsed everything before and after the URI */
333
334 if (!parseUriField(tok))
335 return -1;
336
337 if (!tok.atEnd()) {
338 debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
339 parseStatusCode = Http::scBadRequest;
340 return -1;
4c14658e 341 }
4c14658e 342
e02f963c
AR
343 parseStatusCode = Http::scOkay;
344 buf_ = lineTok.remaining(); // incremental parse checkpoint
345 return 1;
4c14658e 346}
7a4fa6a0 347
87abd755 348bool
36a9c964 349Http::One::RequestParser::parse(const SBuf &aBuf)
6b2b6cfe
CT
350{
351 const bool result = doParse(aBuf);
352 if (preserveParsed_) {
353 assert(aBuf.length() >= remaining().length());
354 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
355 }
356
357 return result;
358}
359
360// raw is not a reference because a reference might point back to our own buf_ or parsed_
361bool
362Http::One::RequestParser::doParse(const SBuf &aBuf)
4c14658e 363{
b749de75 364 buf_ = aBuf;
36a9c964
AJ
365 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
366
cbcd99df 367 // stage 1: locate the request-line
36a9c964 368 if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df 369 skipGarbageLines();
cbcd99df
AJ
370
371 // if we hit something before EOS treat it as a message
b749de75 372 if (!buf_.isEmpty())
cbcd99df
AJ
373 parsingStage_ = HTTP_PARSE_FIRST;
374 else
f9daf571 375 return false;
cbcd99df 376 }
c11191e0 377
cbcd99df
AJ
378 // stage 2: parse the request-line
379 if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526 380 PROF_start(HttpParserParseReqLine);
678451c0 381 const int retcode = parseRequestFirstLine();
e4cff825
AJ
382
383 // first-line (or a look-alike) found successfully.
384 if (retcode > 0) {
e4cff825
AJ
385 parsingStage_ = HTTP_PARSE_MIME;
386 }
387
947ca0c6
AJ
388 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
389 debugs(74, 5, "request-line: method: " << method_);
390 debugs(74, 5, "request-line: url: " << uri_);
391 debugs(74, 5, "request-line: proto: " << msgProtocol_);
b749de75 392 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526 393 PROF_stop(HttpParserParseReqLine);
cbcd99df
AJ
394
395 // syntax errors already
f4880526 396 if (retcode < 0) {
cbcd99df 397 parsingStage_ = HTTP_PARSE_DONE;
f4880526
AJ
398 return false;
399 }
400 }
401
402 // stage 3: locate the mime header block
cbcd99df 403 if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526 404 // HTTP/1.x request-line is valid and parsing completed.
f8cab755 405 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
f1d5359e
AJ
406 if (parseStatusCode == Http::scHeaderTooLarge)
407 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
016a316b
AJ
408 return false;
409 }
f4880526 410 }
87abd755 411
36a9c964 412 return !needsMoreData();
4c14658e 413}
f53969cc 414