]>
Commit | Line | Data |
---|---|---|
eac61ce1 | 1 | /* |
f70aedc4 | 2 | * Copyright (C) 1996-2021 The Squid Software Foundation and contributors |
eac61ce1 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
f7f3304a | 9 | #include "squid.h" |
4c14658e | 10 | #include "Debug.h" |
c99510dd AJ |
11 | #include "http/one/RequestParser.h" |
12 | #include "http/ProtocolVersion.h" | |
417da400 | 13 | #include "parser/Tokenizer.h" |
582c2af2 | 14 | #include "profiler/Profiler.h" |
4d5904f7 | 15 | #include "SquidConfig.h" |
4c14658e | 16 | |
947ca0c6 AJ |
17 | Http1::Parser::size_type |
18 | Http::One::RequestParser::firstLineSize() const | |
7322c9dd | 19 | { |
947ca0c6 AJ |
20 | // RFC 7230 section 2.6 |
21 | /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */ | |
22 | return method_.image().length() + uri_.length() + 12; | |
4c14658e AJ |
23 | } |
24 | ||
c11191e0 AJ |
25 | /** |
26 | * Attempt to parse the first line of a new request message. | |
27 | * | |
a4c74dd8 | 28 | * Governed by RFC 7230 section 3.5 |
c11191e0 | 29 | * " |
a4c74dd8 AJ |
30 | * In the interest of robustness, a server that is expecting to receive |
31 | * and parse a request-line SHOULD ignore at least one empty line (CRLF) | |
32 | * received prior to the request-line. | |
c11191e0 AJ |
33 | * " |
34 | * | |
35 | * Parsing state is stored between calls to avoid repeating buffer scans. | |
cbcd99df | 36 | * If garbage is found the parsing offset is incremented. |
c11191e0 | 37 | */ |
cbcd99df | 38 | void |
678451c0 | 39 | Http::One::RequestParser::skipGarbageLines() |
c11191e0 | 40 | { |
c11191e0 | 41 | if (Config.onoff.relaxed_header_parser) { |
b749de75 | 42 | if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n')) |
c11191e0 AJ |
43 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
44 | "CRLF bytes received ahead of request-line. " << | |
45 | "Ignored due to relaxed_header_parser."); | |
46 | // Be tolerant of prefix empty lines | |
cbcd99df | 47 | // ie any series of either \n or \r\n with no other characters and no repeated \r |
b749de75 AJ |
48 | while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) { |
49 | buf_.consume(1); | |
7a4fa6a0 | 50 | } |
c11191e0 | 51 | } |
c11191e0 AJ |
52 | } |
53 | ||
54 | /** | |
947ca0c6 | 55 | * Attempt to parse the method field out of an HTTP message request-line. |
c11191e0 AJ |
56 | * |
57 | * Governed by: | |
58 | * RFC 1945 section 5.1 | |
947ca0c6 | 59 | * RFC 7230 section 2.6, 3.1 and 3.5 |
c11191e0 | 60 | */ |
e02f963c | 61 | bool |
417da400 | 62 | Http::One::RequestParser::parseMethodField(Tokenizer &tok) |
4c14658e | 63 | { |
e03114f8 | 64 | // method field is a sequence of TCHAR. |
e02f963c AR |
65 | // Limit to 32 characters to prevent overly long sequences of non-HTTP |
66 | // being sucked in before mismatch is detected. 32 is itself annoyingly | |
67 | // big but there are methods registered by IANA that reach 17 bytes: | |
68 | // http://www.iana.org/assignments/http-methods | |
69 | static const size_t maxMethodLength = 32; // TODO: make this configurable? | |
4c14658e | 70 | |
e02f963c AR |
71 | SBuf methodFound; |
72 | if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) { | |
73 | debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method"); | |
de158bf5 | 74 | parseStatusCode = Http::scBadRequest; |
e02f963c | 75 | return false; |
947ca0c6 | 76 | } |
e02f963c | 77 | method_ = HttpRequestMethod(methodFound); |
f8b58a68 EB |
78 | |
79 | if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method")) | |
80 | return false; | |
81 | ||
e02f963c | 82 | return true; |
947ca0c6 | 83 | } |
4c14658e | 84 | |
e02f963c AR |
85 | /// the characters which truly are valid within URI |
86 | static const CharacterSet & | |
87 | UriValidCharacters() | |
78a63ed1 | 88 | { |
78a63ed1 AJ |
89 | /* RFC 3986 section 2: |
90 | * " | |
91 | * A URI is composed from a limited set of characters consisting of | |
92 | * digits, letters, and a few graphic symbols. | |
93 | * " | |
94 | */ | |
e02f963c AR |
95 | static const CharacterSet UriChars = |
96 | CharacterSet("URI-Chars","") + | |
97 | // RFC 3986 section 2.2 - reserved characters | |
98 | CharacterSet("gen-delims", ":/?#[]@") + | |
99 | CharacterSet("sub-delims", "!$&'()*+,;=") + | |
100 | // RFC 3986 section 2.3 - unreserved characters | |
101 | CharacterSet::ALPHA + | |
102 | CharacterSet::DIGIT + | |
103 | CharacterSet("unreserved", "-._~") + | |
104 | // RFC 3986 section 2.1 - percent encoding "%" HEXDIG | |
105 | CharacterSet("pct-encoded", "%") + | |
106 | CharacterSet::HEXDIG; | |
78a63ed1 AJ |
107 | |
108 | return UriChars; | |
109 | } | |
016a316b | 110 | |
e02f963c AR |
111 | /// characters which Squid will accept in the HTTP request-target (URI) |
112 | const CharacterSet & | |
113 | Http::One::RequestParser::RequestTargetCharacters() | |
947ca0c6 | 114 | { |
e02f963c AR |
115 | if (Config.onoff.relaxed_header_parser) { |
116 | #if USE_HTTP_VIOLATIONS | |
117 | static const CharacterSet RelaxedExtended = | |
118 | UriValidCharacters() + | |
119 | // accept whitespace (extended), it will be dealt with later | |
120 | DelimiterCharacters() + | |
121 | // RFC 2396 unwise character set which must never be transmitted | |
122 | // in un-escaped form. But many web services do anyway. | |
123 | CharacterSet("RFC2396-unwise","\"\\|^<>`{}") + | |
124 | // UTF-8 because we want to be future-proof | |
125 | CharacterSet("UTF-8", 128, 255); | |
126 | ||
127 | return RelaxedExtended; | |
128 | #else | |
129 | static const CharacterSet RelaxedCompliant = | |
130 | UriValidCharacters() + | |
131 | // accept whitespace (extended), it will be dealt with later. | |
132 | DelimiterCharacters(); | |
133 | ||
134 | return RelaxedCompliant; | |
135 | #endif | |
136 | } | |
137 | ||
138 | // strict parse only accepts what the RFC say we can | |
139 | return UriValidCharacters(); | |
140 | } | |
947ca0c6 | 141 | |
e02f963c | 142 | bool |
417da400 | 143 | Http::One::RequestParser::parseUriField(Tokenizer &tok) |
e02f963c | 144 | { |
947ca0c6 AJ |
145 | /* Arbitrary 64KB URI upper length limit. |
146 | * | |
147 | * Not quite as arbitrary as it seems though. Old SquidString objects | |
148 | * cannot store strings larger than 64KB, so we must limit until they | |
149 | * have all been replaced with SBuf. | |
150 | * | |
151 | * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED) | |
152 | * at least 8000 octets for the whole line, including method and version. | |
153 | */ | |
e02f963c | 154 | const size_t maxUriLength = static_cast<size_t>((64*1024)-1); |
016a316b | 155 | |
947ca0c6 | 156 | SBuf uriFound; |
e02f963c AR |
157 | if (!tok.prefix(uriFound, RequestTargetCharacters())) { |
158 | parseStatusCode = Http::scBadRequest; | |
159 | debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI"); | |
160 | return false; | |
016a316b AJ |
161 | } |
162 | ||
e02f963c | 163 | if (uriFound.length() > maxUriLength) { |
e03114f8 | 164 | // RFC 7230 section 3.1.1 mandatory (MUST) 414 response |
de158bf5 | 165 | parseStatusCode = Http::scUriTooLong; |
e02f963c AR |
166 | debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() << |
167 | "-byte URI exceeds " << maxUriLength << "-byte limit"); | |
168 | return false; | |
4c14658e | 169 | } |
e02f963c AR |
170 | |
171 | uri_ = uriFound; | |
172 | return true; | |
947ca0c6 | 173 | } |
4c14658e | 174 | |
e02f963c | 175 | bool |
417da400 | 176 | Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok) |
947ca0c6 | 177 | { |
294083a1 EB |
178 | static const SBuf http1p0("HTTP/1.0"); |
179 | static const SBuf http1p1("HTTP/1.1"); | |
e02f963c | 180 | const auto savedTok = tok; |
4c14658e | 181 | |
294083a1 EB |
182 | // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in |
183 | // the vast majority of cases. | |
184 | if (tok.skipSuffix(http1p1)) { | |
185 | msgProtocol_ = Http::ProtocolVersion(1, 1); | |
e02f963c | 186 | return true; |
294083a1 EB |
187 | } else if (tok.skipSuffix(http1p0)) { |
188 | msgProtocol_ = Http::ProtocolVersion(1, 0); | |
189 | return true; | |
190 | } else { | |
191 | // RFC 7230 section 2.6: | |
192 | // HTTP-version = HTTP-name "/" DIGIT "." DIGIT | |
193 | static const CharacterSet period("Decimal point", "."); | |
194 | static const SBuf proto("HTTP/"); | |
195 | SBuf majorDigit; | |
196 | SBuf minorDigit; | |
197 | if (tok.suffix(minorDigit, CharacterSet::DIGIT) && | |
198 | tok.skipOneTrailing(period) && | |
199 | tok.suffix(majorDigit, CharacterSet::DIGIT) && | |
200 | tok.skipSuffix(proto)) { | |
201 | const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1; | |
202 | // use '0.0' for unsupported multiple digit version numbers | |
203 | const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0'); | |
204 | const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0'); | |
205 | msgProtocol_ = Http::ProtocolVersion(major, minor); | |
206 | return true; | |
207 | } | |
4c14658e AJ |
208 | } |
209 | ||
e02f963c AR |
210 | // A GET request might use HTTP/0.9 syntax |
211 | if (method_ == Http::METHOD_GET) { | |
212 | // RFC 1945 - no HTTP version field at all | |
213 | tok = savedTok; // in case the URI ends with a digit | |
214 | // report this assumption as an error if configured to triage parsing | |
215 | debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line"); | |
216 | msgProtocol_ = Http::ProtocolVersion(0,9); | |
217 | return true; | |
4c14658e | 218 | } |
4c14658e | 219 | |
e02f963c AR |
220 | debugs(33, ErrorLevel(), "invalid request-line: not HTTP"); |
221 | parseStatusCode = Http::scBadRequest; | |
222 | return false; | |
223 | } | |
4c14658e | 224 | |
e02f963c AR |
225 | /** |
226 | * Skip characters separating request-line fields. | |
227 | * To handle bidirectional parsing, the caller does the actual skipping and | |
228 | * we just check how many character the caller has skipped. | |
229 | */ | |
230 | bool | |
f8b58a68 | 231 | Http::One::RequestParser::skipDelimiter(const size_t count, const char *where) |
e02f963c AR |
232 | { |
233 | if (count <= 0) { | |
f8b58a68 | 234 | debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where); |
e02f963c AR |
235 | parseStatusCode = Http::scBadRequest; |
236 | return false; | |
237 | } | |
e03114f8 | 238 | |
e02f963c AR |
239 | // tolerant parser allows multiple whitespace characters between request-line fields |
240 | if (count > 1 && !Config.onoff.relaxed_header_parser) { | |
f8b58a68 | 241 | debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where); |
e02f963c AR |
242 | parseStatusCode = Http::scBadRequest; |
243 | return false; | |
244 | } | |
947ca0c6 | 245 | |
e02f963c AR |
246 | return true; |
247 | } | |
4c14658e | 248 | |
e02f963c AR |
249 | /// Parse CRs at the end of request-line, just before the terminating LF. |
250 | bool | |
417da400 | 251 | Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok) |
e02f963c AR |
252 | { |
253 | if (Config.onoff.relaxed_header_parser) { | |
254 | (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK | |
255 | } else { | |
256 | if (!tok.skipOneTrailing(CharacterSet::CR)) { | |
257 | debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF"); | |
258 | parseStatusCode = Http::scBadRequest; | |
259 | return false; | |
260 | } | |
261 | } | |
262 | return true; | |
947ca0c6 | 263 | } |
274bd5ad | 264 | |
947ca0c6 AJ |
265 | /** |
266 | * Attempt to parse the first line of a new request message. | |
267 | * | |
268 | * Governed by: | |
269 | * RFC 1945 section 5.1 | |
270 | * RFC 7230 section 2.6, 3.1 and 3.5 | |
271 | * | |
de158bf5 | 272 | * \retval -1 an error occurred. parseStatusCode indicates HTTP status result. |
947ca0c6 AJ |
273 | * \retval 1 successful parse. member fields contain the request-line items |
274 | * \retval 0 more data is needed to complete the parse | |
275 | */ | |
276 | int | |
277 | Http::One::RequestParser::parseRequestFirstLine() | |
278 | { | |
947ca0c6 AJ |
279 | debugs(74, 5, "parsing possible request: buf.length=" << buf_.length()); |
280 | debugs(74, DBG_DATA, buf_); | |
4c14658e | 281 | |
e02f963c | 282 | SBuf line; |
947ca0c6 | 283 | |
e02f963c AR |
284 | // Earlier, skipGarbageLines() took care of any leading LFs (if allowed). |
285 | // Now, the request line has to end at the first LF. | |
286 | static const CharacterSet lineChars = CharacterSet::LF.complement("notLF"); | |
417da400 | 287 | Tokenizer lineTok(buf_); |
e02f963c | 288 | if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) { |
f8b58a68 EB |
289 | if (buf_.length() >= Config.maxRequestHeaderSize) { |
290 | /* who should we blame for our failure to parse this line? */ | |
291 | ||
417da400 | 292 | Tokenizer methodTok(buf_); |
f8b58a68 EB |
293 | if (!parseMethodField(methodTok)) |
294 | return -1; // blame a bad method (or its delimiter) | |
295 | ||
296 | // assume it is the URI | |
297 | debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " << | |
a95f4c73 | 298 | Config.maxRequestHeaderSize << "-byte limit"); |
f8b58a68 EB |
299 | parseStatusCode = Http::scUriTooLong; |
300 | return -1; | |
301 | } | |
947ca0c6 AJ |
302 | debugs(74, 5, "Parser needs more data"); |
303 | return 0; | |
4c14658e AJ |
304 | } |
305 | ||
417da400 | 306 | Tokenizer tok(line); |
78a63ed1 | 307 | |
e02f963c AR |
308 | if (!parseMethodField(tok)) |
309 | return -1; | |
e47e0802 | 310 | |
e02f963c AR |
311 | /* now parse backwards, to leave just the URI */ |
312 | if (!skipTrailingCrs(tok)) | |
313 | return -1; | |
314 | ||
315 | if (!parseHttpVersionField(tok)) | |
316 | return -1; | |
947ca0c6 | 317 | |
f8b58a68 | 318 | if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version")) |
e02f963c AR |
319 | return -1; |
320 | ||
321 | /* parsed everything before and after the URI */ | |
322 | ||
323 | if (!parseUriField(tok)) | |
324 | return -1; | |
325 | ||
326 | if (!tok.atEnd()) { | |
327 | debugs(33, ErrorLevel(), "invalid request-line: garbage after URI"); | |
328 | parseStatusCode = Http::scBadRequest; | |
329 | return -1; | |
4c14658e | 330 | } |
4c14658e | 331 | |
e02f963c AR |
332 | parseStatusCode = Http::scOkay; |
333 | buf_ = lineTok.remaining(); // incremental parse checkpoint | |
334 | return 1; | |
4c14658e | 335 | } |
7a4fa6a0 | 336 | |
87abd755 | 337 | bool |
36a9c964 | 338 | Http::One::RequestParser::parse(const SBuf &aBuf) |
6b2b6cfe CT |
339 | { |
340 | const bool result = doParse(aBuf); | |
341 | if (preserveParsed_) { | |
342 | assert(aBuf.length() >= remaining().length()); | |
343 | parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes | |
344 | } | |
345 | ||
346 | return result; | |
347 | } | |
348 | ||
349 | // raw is not a reference because a reference might point back to our own buf_ or parsed_ | |
350 | bool | |
351 | Http::One::RequestParser::doParse(const SBuf &aBuf) | |
4c14658e | 352 | { |
b749de75 | 353 | buf_ = aBuf; |
36a9c964 AJ |
354 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); |
355 | ||
cbcd99df | 356 | // stage 1: locate the request-line |
36a9c964 | 357 | if (parsingStage_ == HTTP_PARSE_NONE) { |
cbcd99df | 358 | skipGarbageLines(); |
cbcd99df AJ |
359 | |
360 | // if we hit something before EOS treat it as a message | |
b749de75 | 361 | if (!buf_.isEmpty()) |
cbcd99df AJ |
362 | parsingStage_ = HTTP_PARSE_FIRST; |
363 | else | |
f9daf571 | 364 | return false; |
cbcd99df | 365 | } |
c11191e0 | 366 | |
cbcd99df AJ |
367 | // stage 2: parse the request-line |
368 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
f4880526 | 369 | PROF_start(HttpParserParseReqLine); |
678451c0 | 370 | const int retcode = parseRequestFirstLine(); |
e4cff825 AJ |
371 | |
372 | // first-line (or a look-alike) found successfully. | |
373 | if (retcode > 0) { | |
e4cff825 AJ |
374 | parsingStage_ = HTTP_PARSE_MIME; |
375 | } | |
376 | ||
947ca0c6 AJ |
377 | debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}"); |
378 | debugs(74, 5, "request-line: method: " << method_); | |
379 | debugs(74, 5, "request-line: url: " << uri_); | |
380 | debugs(74, 5, "request-line: proto: " << msgProtocol_); | |
b749de75 | 381 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); |
f4880526 | 382 | PROF_stop(HttpParserParseReqLine); |
cbcd99df AJ |
383 | |
384 | // syntax errors already | |
f4880526 | 385 | if (retcode < 0) { |
cbcd99df | 386 | parsingStage_ = HTTP_PARSE_DONE; |
f4880526 AJ |
387 | return false; |
388 | } | |
389 | } | |
390 | ||
391 | // stage 3: locate the mime header block | |
cbcd99df | 392 | if (parsingStage_ == HTTP_PARSE_MIME) { |
f4880526 | 393 | // HTTP/1.x request-line is valid and parsing completed. |
f8cab755 | 394 | if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) { |
f1d5359e AJ |
395 | if (parseStatusCode == Http::scHeaderTooLarge) |
396 | parseStatusCode = Http::scRequestHeaderFieldsTooLarge; | |
016a316b AJ |
397 | return false; |
398 | } | |
f4880526 | 399 | } |
87abd755 | 400 | |
36a9c964 | 401 | return !needsMoreData(); |
4c14658e | 402 | } |
f53969cc | 403 |