]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 /*
2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/RequestParser.h"
12 #include "http/one/Tokenizer.h"
13 #include "http/ProtocolVersion.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
16
17 // the right debugs() level for parsing errors
18 inline static int
19 ErrorLevel() {
20 return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
21 }
22
23 Http::One::RequestParser::RequestParser(bool preserveParsed) :
24 Parser(),
25 preserveParsed_(preserveParsed)
26 {}
27
28 Http1::Parser::size_type
29 Http::One::RequestParser::firstLineSize() const
30 {
31 // RFC 7230 section 2.6
32 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
33 return method_.image().length() + uri_.length() + 12;
34 }
35
36 /**
37 * Attempt to parse the first line of a new request message.
38 *
39 * Governed by RFC 7230 section 3.5
40 * "
41 * In the interest of robustness, a server that is expecting to receive
42 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
43 * received prior to the request-line.
44 * "
45 *
46 * Parsing state is stored between calls to avoid repeating buffer scans.
47 * If garbage is found the parsing offset is incremented.
48 */
49 void
50 Http::One::RequestParser::skipGarbageLines()
51 {
52 if (Config.onoff.relaxed_header_parser) {
53 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
54 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
55 "CRLF bytes received ahead of request-line. " <<
56 "Ignored due to relaxed_header_parser.");
57 // Be tolerant of prefix empty lines
58 // ie any series of either \n or \r\n with no other characters and no repeated \r
59 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
60 buf_.consume(1);
61 }
62 }
63 }
64
65 /**
66 * Attempt to parse the method field out of an HTTP message request-line.
67 *
68 * Governed by:
69 * RFC 1945 section 5.1
70 * RFC 7230 section 2.6, 3.1 and 3.5
71 */
72 bool
73 Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
74 {
75 // method field is a sequence of TCHAR.
76 // Limit to 32 characters to prevent overly long sequences of non-HTTP
77 // being sucked in before mismatch is detected. 32 is itself annoyingly
78 // big but there are methods registered by IANA that reach 17 bytes:
79 // http://www.iana.org/assignments/http-methods
80 static const size_t maxMethodLength = 32; // TODO: make this configurable?
81
82 SBuf methodFound;
83 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
84 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
85 parseStatusCode = Http::scBadRequest;
86 return false;
87 }
88 method_ = HttpRequestMethod(methodFound);
89
90 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
91 return false;
92
93 return true;
94 }
95
96 /// the characters which truly are valid within URI
97 static const CharacterSet &
98 UriValidCharacters()
99 {
100 /* RFC 3986 section 2:
101 * "
102 * A URI is composed from a limited set of characters consisting of
103 * digits, letters, and a few graphic symbols.
104 * "
105 */
106 static const CharacterSet UriChars =
107 CharacterSet("URI-Chars","") +
108 // RFC 3986 section 2.2 - reserved characters
109 CharacterSet("gen-delims", ":/?#[]@") +
110 CharacterSet("sub-delims", "!$&'()*+,;=") +
111 // RFC 3986 section 2.3 - unreserved characters
112 CharacterSet::ALPHA +
113 CharacterSet::DIGIT +
114 CharacterSet("unreserved", "-._~") +
115 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
116 CharacterSet("pct-encoded", "%") +
117 CharacterSet::HEXDIG;
118
119 return UriChars;
120 }
121
122 /// characters which Squid will accept in the HTTP request-target (URI)
123 const CharacterSet &
124 Http::One::RequestParser::RequestTargetCharacters()
125 {
126 if (Config.onoff.relaxed_header_parser) {
127 #if USE_HTTP_VIOLATIONS
128 static const CharacterSet RelaxedExtended =
129 UriValidCharacters() +
130 // accept whitespace (extended), it will be dealt with later
131 DelimiterCharacters() +
132 // RFC 2396 unwise character set which must never be transmitted
133 // in un-escaped form. But many web services do anyway.
134 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
135 // UTF-8 because we want to be future-proof
136 CharacterSet("UTF-8", 128, 255);
137
138 return RelaxedExtended;
139 #else
140 static const CharacterSet RelaxedCompliant =
141 UriValidCharacters() +
142 // accept whitespace (extended), it will be dealt with later.
143 DelimiterCharacters();
144
145 return RelaxedCompliant;
146 #endif
147 }
148
149 // strict parse only accepts what the RFC say we can
150 return UriValidCharacters();
151 }
152
153 bool
154 Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
155 {
156 /* Arbitrary 64KB URI upper length limit.
157 *
158 * Not quite as arbitrary as it seems though. Old SquidString objects
159 * cannot store strings larger than 64KB, so we must limit until they
160 * have all been replaced with SBuf.
161 *
162 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
163 * at least 8000 octets for the whole line, including method and version.
164 */
165 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
166
167 SBuf uriFound;
168 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
169 parseStatusCode = Http::scBadRequest;
170 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
171 return false;
172 }
173
174 if (uriFound.length() > maxUriLength) {
175 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
176 parseStatusCode = Http::scUriTooLong;
177 debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
178 "-byte URI exceeds " << maxUriLength << "-byte limit");
179 return false;
180 }
181
182 uri_ = uriFound;
183 return true;
184 }
185
186 bool
187 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
188 {
189 static const SBuf http1p0("HTTP/1.0");
190 static const SBuf http1p1("HTTP/1.1");
191 const auto savedTok = tok;
192
193 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
194 // the vast majority of cases.
195 if (tok.skipSuffix(http1p1)) {
196 msgProtocol_ = Http::ProtocolVersion(1, 1);
197 return true;
198 } else if (tok.skipSuffix(http1p0)) {
199 msgProtocol_ = Http::ProtocolVersion(1, 0);
200 return true;
201 } else {
202 // RFC 7230 section 2.6:
203 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
204 static const CharacterSet period("Decimal point", ".");
205 static const SBuf proto("HTTP/");
206 SBuf majorDigit;
207 SBuf minorDigit;
208 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
209 tok.skipOneTrailing(period) &&
210 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
211 tok.skipSuffix(proto)) {
212 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
213 // use '0.0' for unsupported multiple digit version numbers
214 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
215 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
216 msgProtocol_ = Http::ProtocolVersion(major, minor);
217 return true;
218 }
219 }
220
221 // A GET request might use HTTP/0.9 syntax
222 if (method_ == Http::METHOD_GET) {
223 // RFC 1945 - no HTTP version field at all
224 tok = savedTok; // in case the URI ends with a digit
225 // report this assumption as an error if configured to triage parsing
226 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
227 msgProtocol_ = Http::ProtocolVersion(0,9);
228 return true;
229 }
230
231 debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
232 parseStatusCode = Http::scBadRequest;
233 return false;
234 }
235
236 /**
237 * Skip characters separating request-line fields.
238 * To handle bidirectional parsing, the caller does the actual skipping and
239 * we just check how many character the caller has skipped.
240 */
241 bool
242 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
243 {
244 if (count <= 0) {
245 debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
246 parseStatusCode = Http::scBadRequest;
247 return false;
248 }
249
250 // tolerant parser allows multiple whitespace characters between request-line fields
251 if (count > 1 && !Config.onoff.relaxed_header_parser) {
252 debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
253 parseStatusCode = Http::scBadRequest;
254 return false;
255 }
256
257 return true;
258 }
259
260 /// Parse CRs at the end of request-line, just before the terminating LF.
261 bool
262 Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
263 {
264 if (Config.onoff.relaxed_header_parser) {
265 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
266 } else {
267 if (!tok.skipOneTrailing(CharacterSet::CR)) {
268 debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
269 parseStatusCode = Http::scBadRequest;
270 return false;
271 }
272 }
273 return true;
274 }
275
276 /**
277 * Attempt to parse the first line of a new request message.
278 *
279 * Governed by:
280 * RFC 1945 section 5.1
281 * RFC 7230 section 2.6, 3.1 and 3.5
282 *
283 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
284 * \retval 1 successful parse. member fields contain the request-line items
285 * \retval 0 more data is needed to complete the parse
286 */
287 int
288 Http::One::RequestParser::parseRequestFirstLine()
289 {
290 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
291 debugs(74, DBG_DATA, buf_);
292
293 SBuf line;
294
295 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
296 // Now, the request line has to end at the first LF.
297 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
298 ::Parser::Tokenizer lineTok(buf_);
299 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
300 if (buf_.length() >= Config.maxRequestHeaderSize) {
301 /* who should we blame for our failure to parse this line? */
302
303 Http1::Tokenizer methodTok(buf_);
304 if (!parseMethodField(methodTok))
305 return -1; // blame a bad method (or its delimiter)
306
307 // assume it is the URI
308 debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
309 Config.maxRequestHeaderSize << "-byte limit");
310 parseStatusCode = Http::scUriTooLong;
311 return -1;
312 }
313 debugs(74, 5, "Parser needs more data");
314 return 0;
315 }
316
317 Http1::Tokenizer tok(line);
318
319 if (!parseMethodField(tok))
320 return -1;
321
322 /* now parse backwards, to leave just the URI */
323 if (!skipTrailingCrs(tok))
324 return -1;
325
326 if (!parseHttpVersionField(tok))
327 return -1;
328
329 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
330 return -1;
331
332 /* parsed everything before and after the URI */
333
334 if (!parseUriField(tok))
335 return -1;
336
337 if (!tok.atEnd()) {
338 debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
339 parseStatusCode = Http::scBadRequest;
340 return -1;
341 }
342
343 parseStatusCode = Http::scOkay;
344 buf_ = lineTok.remaining(); // incremental parse checkpoint
345 return 1;
346 }
347
348 bool
349 Http::One::RequestParser::parse(const SBuf &aBuf)
350 {
351 const bool result = doParse(aBuf);
352 if (preserveParsed_) {
353 assert(aBuf.length() >= remaining().length());
354 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
355 }
356
357 return result;
358 }
359
360 // raw is not a reference because a reference might point back to our own buf_ or parsed_
361 bool
362 Http::One::RequestParser::doParse(const SBuf &aBuf)
363 {
364 buf_ = aBuf;
365 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
366
367 // stage 1: locate the request-line
368 if (parsingStage_ == HTTP_PARSE_NONE) {
369 skipGarbageLines();
370
371 // if we hit something before EOS treat it as a message
372 if (!buf_.isEmpty())
373 parsingStage_ = HTTP_PARSE_FIRST;
374 else
375 return false;
376 }
377
378 // stage 2: parse the request-line
379 if (parsingStage_ == HTTP_PARSE_FIRST) {
380 PROF_start(HttpParserParseReqLine);
381 const int retcode = parseRequestFirstLine();
382
383 // first-line (or a look-alike) found successfully.
384 if (retcode > 0) {
385 parsingStage_ = HTTP_PARSE_MIME;
386 }
387
388 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
389 debugs(74, 5, "request-line: method: " << method_);
390 debugs(74, 5, "request-line: url: " << uri_);
391 debugs(74, 5, "request-line: proto: " << msgProtocol_);
392 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
393 PROF_stop(HttpParserParseReqLine);
394
395 // syntax errors already
396 if (retcode < 0) {
397 parsingStage_ = HTTP_PARSE_DONE;
398 return false;
399 }
400 }
401
402 // stage 3: locate the mime header block
403 if (parsingStage_ == HTTP_PARSE_MIME) {
404 // HTTP/1.x request-line is valid and parsing completed.
405 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
406 if (parseStatusCode == Http::scHeaderTooLarge)
407 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
408 return false;
409 }
410 }
411
412 return !needsMoreData();
413 }
414