]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 /*
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/RequestParser.h"
12 #include "http/one/Tokenizer.h"
13 #include "http/ProtocolVersion.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
16
17 // the right debugs() level for parsing errors
18 inline static int
19 ErrorLevel() {
20 return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
21 }
22
23 Http::One::RequestParser::RequestParser() :
24 Parser()
25 {}
26
27 Http1::Parser::size_type
28 Http::One::RequestParser::firstLineSize() const
29 {
30 // RFC 7230 section 2.6
31 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
32 return method_.image().length() + uri_.length() + 12;
33 }
34
35 /**
36 * Attempt to parse the first line of a new request message.
37 *
38 * Governed by RFC 7230 section 3.5
39 * "
40 * In the interest of robustness, a server that is expecting to receive
41 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
42 * received prior to the request-line.
43 * "
44 *
45 * Parsing state is stored between calls to avoid repeating buffer scans.
46 * If garbage is found the parsing offset is incremented.
47 */
48 void
49 Http::One::RequestParser::skipGarbageLines()
50 {
51 if (Config.onoff.relaxed_header_parser) {
52 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
53 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
54 "CRLF bytes received ahead of request-line. " <<
55 "Ignored due to relaxed_header_parser.");
56 // Be tolerant of prefix empty lines
57 // ie any series of either \n or \r\n with no other characters and no repeated \r
58 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
59 buf_.consume(1);
60 }
61 }
62 }
63
64 /**
65 * Attempt to parse the method field out of an HTTP message request-line.
66 *
67 * Governed by:
68 * RFC 1945 section 5.1
69 * RFC 7230 section 2.6, 3.1 and 3.5
70 */
71 bool
72 Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
73 {
74 // method field is a sequence of TCHAR.
75 // Limit to 32 characters to prevent overly long sequences of non-HTTP
76 // being sucked in before mismatch is detected. 32 is itself annoyingly
77 // big but there are methods registered by IANA that reach 17 bytes:
78 // http://www.iana.org/assignments/http-methods
79 static const size_t maxMethodLength = 32; // TODO: make this configurable?
80
81 SBuf methodFound;
82 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
83 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
84 parseStatusCode = Http::scBadRequest;
85 return false;
86 }
87 method_ = HttpRequestMethod(methodFound);
88
89 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
90 return false;
91
92 return true;
93 }
94
95 /// the characters which truly are valid within URI
96 static const CharacterSet &
97 UriValidCharacters()
98 {
99 /* RFC 3986 section 2:
100 * "
101 * A URI is composed from a limited set of characters consisting of
102 * digits, letters, and a few graphic symbols.
103 * "
104 */
105 static const CharacterSet UriChars =
106 CharacterSet("URI-Chars","") +
107 // RFC 3986 section 2.2 - reserved characters
108 CharacterSet("gen-delims", ":/?#[]@") +
109 CharacterSet("sub-delims", "!$&'()*+,;=") +
110 // RFC 3986 section 2.3 - unreserved characters
111 CharacterSet::ALPHA +
112 CharacterSet::DIGIT +
113 CharacterSet("unreserved", "-._~") +
114 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
115 CharacterSet("pct-encoded", "%") +
116 CharacterSet::HEXDIG;
117
118 return UriChars;
119 }
120
121 /// characters which Squid will accept in the HTTP request-target (URI)
122 const CharacterSet &
123 Http::One::RequestParser::RequestTargetCharacters()
124 {
125 if (Config.onoff.relaxed_header_parser) {
126 #if USE_HTTP_VIOLATIONS
127 static const CharacterSet RelaxedExtended =
128 UriValidCharacters() +
129 // accept whitespace (extended), it will be dealt with later
130 DelimiterCharacters() +
131 // RFC 2396 unwise character set which must never be transmitted
132 // in un-escaped form. But many web services do anyway.
133 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
134 // UTF-8 because we want to be future-proof
135 CharacterSet("UTF-8", 128, 255);
136
137 return RelaxedExtended;
138 #else
139 static const CharacterSet RelaxedCompliant =
140 UriValidCharacters() +
141 // accept whitespace (extended), it will be dealt with later.
142 DelimiterCharacters();
143
144 return RelaxedCompliant;
145 #endif
146 }
147
148 // strict parse only accepts what the RFC say we can
149 return UriValidCharacters();
150 }
151
152 bool
153 Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
154 {
155 /* Arbitrary 64KB URI upper length limit.
156 *
157 * Not quite as arbitrary as it seems though. Old SquidString objects
158 * cannot store strings larger than 64KB, so we must limit until they
159 * have all been replaced with SBuf.
160 *
161 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
162 * at least 8000 octets for the whole line, including method and version.
163 */
164 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
165
166 SBuf uriFound;
167 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
168 parseStatusCode = Http::scBadRequest;
169 debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
170 return false;
171 }
172
173 if (uriFound.length() > maxUriLength) {
174 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
175 parseStatusCode = Http::scUriTooLong;
176 debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
177 "-byte URI exceeds " << maxUriLength << "-byte limit");
178 return false;
179 }
180
181 uri_ = uriFound;
182 return true;
183 }
184
185 bool
186 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
187 {
188 const auto savedTok = tok;
189
190 SBuf digit;
191 // Searching for Http1magic precludes detecting HTTP/2+ versions.
192 // Rewrite if we ever _need_ to return 505 (Version Not Supported) errors.
193 if (tok.suffix(digit, CharacterSet::DIGIT) && tok.skipSuffix(Http1magic)) {
194 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
195 return true;
196 }
197
198 // A GET request might use HTTP/0.9 syntax
199 if (method_ == Http::METHOD_GET) {
200 // RFC 1945 - no HTTP version field at all
201 tok = savedTok; // in case the URI ends with a digit
202 // report this assumption as an error if configured to triage parsing
203 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
204 msgProtocol_ = Http::ProtocolVersion(0,9);
205 return true;
206 }
207
208 debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
209 parseStatusCode = Http::scBadRequest;
210 return false;
211 }
212
213 /**
214 * Skip characters separating request-line fields.
215 * To handle bidirectional parsing, the caller does the actual skipping and
216 * we just check how many character the caller has skipped.
217 */
218 bool
219 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
220 {
221 if (count <= 0) {
222 debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
223 parseStatusCode = Http::scBadRequest;
224 return false;
225 }
226
227 // tolerant parser allows multiple whitespace characters between request-line fields
228 if (count > 1 && !Config.onoff.relaxed_header_parser) {
229 debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
230 parseStatusCode = Http::scBadRequest;
231 return false;
232 }
233
234 return true;
235 }
236
237 /// Parse CRs at the end of request-line, just before the terminating LF.
238 bool
239 Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
240 {
241 if (Config.onoff.relaxed_header_parser) {
242 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
243 } else {
244 if (!tok.skipOneTrailing(CharacterSet::CR)) {
245 debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
246 parseStatusCode = Http::scBadRequest;
247 return false;
248 }
249 }
250 return true;
251 }
252
253 /**
254 * Attempt to parse the first line of a new request message.
255 *
256 * Governed by:
257 * RFC 1945 section 5.1
258 * RFC 7230 section 2.6, 3.1 and 3.5
259 *
260 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
261 * \retval 1 successful parse. member fields contain the request-line items
262 * \retval 0 more data is needed to complete the parse
263 */
264 int
265 Http::One::RequestParser::parseRequestFirstLine()
266 {
267 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
268 debugs(74, DBG_DATA, buf_);
269
270 SBuf line;
271
272 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
273 // Now, the request line has to end at the first LF.
274 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
275 ::Parser::Tokenizer lineTok(buf_);
276 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
277 if (buf_.length() >= Config.maxRequestHeaderSize) {
278 /* who should we blame for our failure to parse this line? */
279
280 Http1::Tokenizer methodTok(buf_);
281 if (!parseMethodField(methodTok))
282 return -1; // blame a bad method (or its delimiter)
283
284 // assume it is the URI
285 debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
286 Config.maxRequestHeaderSize << "-byte limit");
287 parseStatusCode = Http::scUriTooLong;
288 return -1;
289 }
290 debugs(74, 5, "Parser needs more data");
291 return 0;
292 }
293
294 Http1::Tokenizer tok(line);
295
296 if (!parseMethodField(tok))
297 return -1;
298
299 /* now parse backwards, to leave just the URI */
300 if (!skipTrailingCrs(tok))
301 return -1;
302
303 if (!parseHttpVersionField(tok))
304 return -1;
305
306 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
307 return -1;
308
309 /* parsed everything before and after the URI */
310
311 if (!parseUriField(tok))
312 return -1;
313
314 if (!tok.atEnd()) {
315 debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
316 parseStatusCode = Http::scBadRequest;
317 return -1;
318 }
319
320 parseStatusCode = Http::scOkay;
321 buf_ = lineTok.remaining(); // incremental parse checkpoint
322 return 1;
323 }
324
325 bool
326 Http::One::RequestParser::parse(const SBuf &aBuf)
327 {
328 buf_ = aBuf;
329 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
330
331 // stage 1: locate the request-line
332 if (parsingStage_ == HTTP_PARSE_NONE) {
333 skipGarbageLines();
334
335 // if we hit something before EOS treat it as a message
336 if (!buf_.isEmpty())
337 parsingStage_ = HTTP_PARSE_FIRST;
338 else
339 return false;
340 }
341
342 // stage 2: parse the request-line
343 if (parsingStage_ == HTTP_PARSE_FIRST) {
344 PROF_start(HttpParserParseReqLine);
345 const int retcode = parseRequestFirstLine();
346
347 // first-line (or a look-alike) found successfully.
348 if (retcode > 0) {
349 parsingStage_ = HTTP_PARSE_MIME;
350 }
351
352 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
353 debugs(74, 5, "request-line: method: " << method_);
354 debugs(74, 5, "request-line: url: " << uri_);
355 debugs(74, 5, "request-line: proto: " << msgProtocol_);
356 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
357 PROF_stop(HttpParserParseReqLine);
358
359 // syntax errors already
360 if (retcode < 0) {
361 parsingStage_ = HTTP_PARSE_DONE;
362 return false;
363 }
364 }
365
366 // stage 3: locate the mime header block
367 if (parsingStage_ == HTTP_PARSE_MIME) {
368 // HTTP/1.x request-line is valid and parsing completed.
369 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
370 if (parseStatusCode == Http::scHeaderTooLarge)
371 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
372 return false;
373 }
374 }
375
376 return !needsMoreData();
377 }
378