]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/RequestParser.cc
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / http / one / RequestParser.cc
CommitLineData
eac61ce1 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
eac61ce1
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
f7f3304a 9#include "squid.h"
4c14658e 10#include "Debug.h"
c99510dd
AJ
11#include "http/one/RequestParser.h"
12#include "http/ProtocolVersion.h"
417da400 13#include "parser/Tokenizer.h"
582c2af2 14#include "profiler/Profiler.h"
4d5904f7 15#include "SquidConfig.h"
4c14658e 16
947ca0c6
AJ
17Http1::Parser::size_type
18Http::One::RequestParser::firstLineSize() const
7322c9dd 19{
947ca0c6
AJ
20 // RFC 7230 section 2.6
21 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
22 return method_.image().length() + uri_.length() + 12;
4c14658e
AJ
23}
24
c11191e0
AJ
25/**
26 * Attempt to parse the first line of a new request message.
27 *
a4c74dd8 28 * Governed by RFC 7230 section 3.5
c11191e0 29 * "
a4c74dd8
AJ
30 * In the interest of robustness, a server that is expecting to receive
31 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
32 * received prior to the request-line.
c11191e0
AJ
33 * "
34 *
35 * Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df 36 * If garbage is found the parsing offset is incremented.
c11191e0 37 */
cbcd99df 38void
678451c0 39Http::One::RequestParser::skipGarbageLines()
c11191e0 40{
c11191e0 41 if (Config.onoff.relaxed_header_parser) {
b749de75 42 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
c11191e0
AJ
43 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
44 "CRLF bytes received ahead of request-line. " <<
45 "Ignored due to relaxed_header_parser.");
46 // Be tolerant of prefix empty lines
cbcd99df 47 // ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75
AJ
48 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
49 buf_.consume(1);
7a4fa6a0 50 }
c11191e0 51 }
c11191e0
AJ
52}
53
54/**
947ca0c6 55 * Attempt to parse the method field out of an HTTP message request-line.
c11191e0
AJ
56 *
57 * Governed by:
58 * RFC 1945 section 5.1
947ca0c6 59 * RFC 7230 section 2.6, 3.1 and 3.5
c11191e0 60 */
e02f963c 61bool
417da400 62Http::One::RequestParser::parseMethodField(Tokenizer &tok)
4c14658e 63{
e03114f8 64 // method field is a sequence of TCHAR.
e02f963c
AR
65 // Limit to 32 characters to prevent overly long sequences of non-HTTP
66 // being sucked in before mismatch is detected. 32 is itself annoyingly
67 // big but there are methods registered by IANA that reach 17 bytes:
68 // http://www.iana.org/assignments/http-methods
69 static const size_t maxMethodLength = 32; // TODO: make this configurable?
4c14658e 70
e02f963c
AR
71 SBuf methodFound;
72 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
73 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
de158bf5 74 parseStatusCode = Http::scBadRequest;
e02f963c 75 return false;
947ca0c6 76 }
e02f963c 77 method_ = HttpRequestMethod(methodFound);
f8b58a68
EB
78
79 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
80 return false;
81
e02f963c 82 return true;
947ca0c6 83}
4c14658e 84
e02f963c
AR
85/// the characters which truly are valid within URI
86static const CharacterSet &
87UriValidCharacters()
78a63ed1 88{
78a63ed1
AJ
89 /* RFC 3986 section 2:
90 * "
91 * A URI is composed from a limited set of characters consisting of
92 * digits, letters, and a few graphic symbols.
93 * "
94 */
e02f963c
AR
95 static const CharacterSet UriChars =
96 CharacterSet("URI-Chars","") +
97 // RFC 3986 section 2.2 - reserved characters
98 CharacterSet("gen-delims", ":/?#[]@") +
99 CharacterSet("sub-delims", "!$&'()*+,;=") +
100 // RFC 3986 section 2.3 - unreserved characters
101 CharacterSet::ALPHA +
102 CharacterSet::DIGIT +
103 CharacterSet("unreserved", "-._~") +
104 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
105 CharacterSet("pct-encoded", "%") +
106 CharacterSet::HEXDIG;
78a63ed1
AJ
107
108 return UriChars;
109}
016a316b 110
e02f963c
AR
111/// characters which Squid will accept in the HTTP request-target (URI)
112const CharacterSet &
113Http::One::RequestParser::RequestTargetCharacters()
947ca0c6 114{
e02f963c
AR
115 if (Config.onoff.relaxed_header_parser) {
116#if USE_HTTP_VIOLATIONS
117 static const CharacterSet RelaxedExtended =
118 UriValidCharacters() +
119 // accept whitespace (extended), it will be dealt with later
120 DelimiterCharacters() +
121 // RFC 2396 unwise character set which must never be transmitted
122 // in un-escaped form. But many web services do anyway.
123 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
124 // UTF-8 because we want to be future-proof
125 CharacterSet("UTF-8", 128, 255);
126
127 return RelaxedExtended;
128#else
129 static const CharacterSet RelaxedCompliant =
130 UriValidCharacters() +
131 // accept whitespace (extended), it will be dealt with later.
132 DelimiterCharacters();
133
134 return RelaxedCompliant;
135#endif
136 }
137
138 // strict parse only accepts what the RFC say we can
139 return UriValidCharacters();
140}
947ca0c6 141
e02f963c 142bool
417da400 143Http::One::RequestParser::parseUriField(Tokenizer &tok)
e02f963c 144{
947ca0c6
AJ
145 /* Arbitrary 64KB URI upper length limit.
146 *
147 * Not quite as arbitrary as it seems though. Old SquidString objects
148 * cannot store strings larger than 64KB, so we must limit until they
149 * have all been replaced with SBuf.
150 *
151 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
152 * at least 8000 octets for the whole line, including method and version.
153 */
e02f963c 154 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
016a316b 155
947ca0c6 156 SBuf uriFound;
e02f963c
AR
157 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
158 parseStatusCode = Http::scBadRequest;
159 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
160 return false;
016a316b
AJ
161 }
162
e02f963c 163 if (uriFound.length() > maxUriLength) {
e03114f8 164 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
de158bf5 165 parseStatusCode = Http::scUriTooLong;
e02f963c
AR
166 debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
167 "-byte URI exceeds " << maxUriLength << "-byte limit");
168 return false;
4c14658e 169 }
e02f963c
AR
170
171 uri_ = uriFound;
172 return true;
947ca0c6 173}
4c14658e 174
e02f963c 175bool
417da400 176Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
947ca0c6 177{
294083a1
EB
178 static const SBuf http1p0("HTTP/1.0");
179 static const SBuf http1p1("HTTP/1.1");
e02f963c 180 const auto savedTok = tok;
4c14658e 181
294083a1
EB
182 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
183 // the vast majority of cases.
184 if (tok.skipSuffix(http1p1)) {
185 msgProtocol_ = Http::ProtocolVersion(1, 1);
e02f963c 186 return true;
294083a1
EB
187 } else if (tok.skipSuffix(http1p0)) {
188 msgProtocol_ = Http::ProtocolVersion(1, 0);
189 return true;
190 } else {
191 // RFC 7230 section 2.6:
192 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
193 static const CharacterSet period("Decimal point", ".");
194 static const SBuf proto("HTTP/");
195 SBuf majorDigit;
196 SBuf minorDigit;
197 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
198 tok.skipOneTrailing(period) &&
199 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
200 tok.skipSuffix(proto)) {
201 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
202 // use '0.0' for unsupported multiple digit version numbers
203 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
204 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
205 msgProtocol_ = Http::ProtocolVersion(major, minor);
206 return true;
207 }
4c14658e
AJ
208 }
209
e02f963c
AR
210 // A GET request might use HTTP/0.9 syntax
211 if (method_ == Http::METHOD_GET) {
212 // RFC 1945 - no HTTP version field at all
213 tok = savedTok; // in case the URI ends with a digit
214 // report this assumption as an error if configured to triage parsing
215 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
216 msgProtocol_ = Http::ProtocolVersion(0,9);
217 return true;
4c14658e 218 }
4c14658e 219
e02f963c
AR
220 debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
221 parseStatusCode = Http::scBadRequest;
222 return false;
223}
4c14658e 224
e02f963c
AR
225/**
226 * Skip characters separating request-line fields.
227 * To handle bidirectional parsing, the caller does the actual skipping and
228 * we just check how many character the caller has skipped.
229 */
230bool
f8b58a68 231Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
e02f963c
AR
232{
233 if (count <= 0) {
f8b58a68 234 debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
e02f963c
AR
235 parseStatusCode = Http::scBadRequest;
236 return false;
237 }
e03114f8 238
e02f963c
AR
239 // tolerant parser allows multiple whitespace characters between request-line fields
240 if (count > 1 && !Config.onoff.relaxed_header_parser) {
f8b58a68 241 debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
e02f963c
AR
242 parseStatusCode = Http::scBadRequest;
243 return false;
244 }
947ca0c6 245
e02f963c
AR
246 return true;
247}
4c14658e 248
e02f963c
AR
249/// Parse CRs at the end of request-line, just before the terminating LF.
250bool
417da400 251Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
e02f963c
AR
252{
253 if (Config.onoff.relaxed_header_parser) {
254 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
255 } else {
256 if (!tok.skipOneTrailing(CharacterSet::CR)) {
257 debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
258 parseStatusCode = Http::scBadRequest;
259 return false;
260 }
261 }
262 return true;
947ca0c6 263}
274bd5ad 264
947ca0c6
AJ
265/**
266 * Attempt to parse the first line of a new request message.
267 *
268 * Governed by:
269 * RFC 1945 section 5.1
270 * RFC 7230 section 2.6, 3.1 and 3.5
271 *
de158bf5 272 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
947ca0c6
AJ
273 * \retval 1 successful parse. member fields contain the request-line items
274 * \retval 0 more data is needed to complete the parse
275 */
276int
277Http::One::RequestParser::parseRequestFirstLine()
278{
947ca0c6
AJ
279 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
280 debugs(74, DBG_DATA, buf_);
4c14658e 281
e02f963c 282 SBuf line;
947ca0c6 283
e02f963c
AR
284 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
285 // Now, the request line has to end at the first LF.
286 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
417da400 287 Tokenizer lineTok(buf_);
e02f963c 288 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
f8b58a68
EB
289 if (buf_.length() >= Config.maxRequestHeaderSize) {
290 /* who should we blame for our failure to parse this line? */
291
417da400 292 Tokenizer methodTok(buf_);
f8b58a68
EB
293 if (!parseMethodField(methodTok))
294 return -1; // blame a bad method (or its delimiter)
295
296 // assume it is the URI
297 debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
a95f4c73 298 Config.maxRequestHeaderSize << "-byte limit");
f8b58a68
EB
299 parseStatusCode = Http::scUriTooLong;
300 return -1;
301 }
947ca0c6
AJ
302 debugs(74, 5, "Parser needs more data");
303 return 0;
4c14658e
AJ
304 }
305
417da400 306 Tokenizer tok(line);
78a63ed1 307
e02f963c
AR
308 if (!parseMethodField(tok))
309 return -1;
e47e0802 310
e02f963c
AR
311 /* now parse backwards, to leave just the URI */
312 if (!skipTrailingCrs(tok))
313 return -1;
314
315 if (!parseHttpVersionField(tok))
316 return -1;
947ca0c6 317
f8b58a68 318 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
e02f963c
AR
319 return -1;
320
321 /* parsed everything before and after the URI */
322
323 if (!parseUriField(tok))
324 return -1;
325
326 if (!tok.atEnd()) {
327 debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
328 parseStatusCode = Http::scBadRequest;
329 return -1;
4c14658e 330 }
4c14658e 331
e02f963c
AR
332 parseStatusCode = Http::scOkay;
333 buf_ = lineTok.remaining(); // incremental parse checkpoint
334 return 1;
4c14658e 335}
7a4fa6a0 336
87abd755 337bool
36a9c964 338Http::One::RequestParser::parse(const SBuf &aBuf)
6b2b6cfe
CT
339{
340 const bool result = doParse(aBuf);
341 if (preserveParsed_) {
342 assert(aBuf.length() >= remaining().length());
343 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
344 }
345
346 return result;
347}
348
349// raw is not a reference because a reference might point back to our own buf_ or parsed_
350bool
351Http::One::RequestParser::doParse(const SBuf &aBuf)
4c14658e 352{
b749de75 353 buf_ = aBuf;
36a9c964
AJ
354 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
355
cbcd99df 356 // stage 1: locate the request-line
36a9c964 357 if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df 358 skipGarbageLines();
cbcd99df
AJ
359
360 // if we hit something before EOS treat it as a message
b749de75 361 if (!buf_.isEmpty())
cbcd99df
AJ
362 parsingStage_ = HTTP_PARSE_FIRST;
363 else
f9daf571 364 return false;
cbcd99df 365 }
c11191e0 366
cbcd99df
AJ
367 // stage 2: parse the request-line
368 if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526 369 PROF_start(HttpParserParseReqLine);
678451c0 370 const int retcode = parseRequestFirstLine();
e4cff825
AJ
371
372 // first-line (or a look-alike) found successfully.
373 if (retcode > 0) {
e4cff825
AJ
374 parsingStage_ = HTTP_PARSE_MIME;
375 }
376
947ca0c6
AJ
377 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
378 debugs(74, 5, "request-line: method: " << method_);
379 debugs(74, 5, "request-line: url: " << uri_);
380 debugs(74, 5, "request-line: proto: " << msgProtocol_);
b749de75 381 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526 382 PROF_stop(HttpParserParseReqLine);
cbcd99df
AJ
383
384 // syntax errors already
f4880526 385 if (retcode < 0) {
cbcd99df 386 parsingStage_ = HTTP_PARSE_DONE;
f4880526
AJ
387 return false;
388 }
389 }
390
391 // stage 3: locate the mime header block
cbcd99df 392 if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526 393 // HTTP/1.x request-line is valid and parsing completed.
f8cab755 394 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
f1d5359e
AJ
395 if (parseStatusCode == Http::scHeaderTooLarge)
396 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
016a316b
AJ
397 return false;
398 }
f4880526 399 }
87abd755 400
36a9c964 401 return !needsMoreData();
4c14658e 402}
f53969cc 403