]>
Commit | Line | Data |
---|---|---|
eac61ce1 | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
eac61ce1 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
f7f3304a | 9 | #include "squid.h" |
4c14658e | 10 | #include "Debug.h" |
c99510dd | 11 | #include "http/one/RequestParser.h" |
f29718b0 | 12 | #include "http/one/Tokenizer.h" |
c99510dd | 13 | #include "http/ProtocolVersion.h" |
582c2af2 | 14 | #include "profiler/Profiler.h" |
4d5904f7 | 15 | #include "SquidConfig.h" |
4c14658e | 16 | |
e02f963c AR |
17 | // the right debugs() level for parsing errors |
18 | inline static int | |
19 | ErrorLevel() { | |
20 | return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5; | |
21 | } | |
22 | ||
6b2b6cfe CT |
23 | Http::One::RequestParser::RequestParser(bool preserveParsed) : |
24 | Parser(), | |
25 | preserveParsed_(preserveParsed) | |
947ca0c6 AJ |
26 | {} |
27 | ||
28 | Http1::Parser::size_type | |
29 | Http::One::RequestParser::firstLineSize() const | |
7322c9dd | 30 | { |
947ca0c6 AJ |
31 | // RFC 7230 section 2.6 |
32 | /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */ | |
33 | return method_.image().length() + uri_.length() + 12; | |
4c14658e AJ |
34 | } |
35 | ||
c11191e0 AJ |
36 | /** |
37 | * Attempt to parse the first line of a new request message. | |
38 | * | |
a4c74dd8 | 39 | * Governed by RFC 7230 section 3.5 |
c11191e0 | 40 | * " |
a4c74dd8 AJ |
41 | * In the interest of robustness, a server that is expecting to receive |
42 | * and parse a request-line SHOULD ignore at least one empty line (CRLF) | |
43 | * received prior to the request-line. | |
c11191e0 AJ |
44 | * " |
45 | * | |
46 | * Parsing state is stored between calls to avoid repeating buffer scans. | |
cbcd99df | 47 | * If garbage is found the parsing offset is incremented. |
c11191e0 | 48 | */ |
cbcd99df | 49 | void |
678451c0 | 50 | Http::One::RequestParser::skipGarbageLines() |
c11191e0 | 51 | { |
c11191e0 | 52 | if (Config.onoff.relaxed_header_parser) { |
b749de75 | 53 | if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n')) |
c11191e0 AJ |
54 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
55 | "CRLF bytes received ahead of request-line. " << | |
56 | "Ignored due to relaxed_header_parser."); | |
57 | // Be tolerant of prefix empty lines | |
cbcd99df | 58 | // ie any series of either \n or \r\n with no other characters and no repeated \r |
b749de75 AJ |
59 | while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) { |
60 | buf_.consume(1); | |
7a4fa6a0 | 61 | } |
c11191e0 | 62 | } |
c11191e0 AJ |
63 | } |
64 | ||
65 | /** | |
947ca0c6 | 66 | * Attempt to parse the method field out of an HTTP message request-line. |
c11191e0 AJ |
67 | * |
68 | * Governed by: | |
69 | * RFC 1945 section 5.1 | |
947ca0c6 | 70 | * RFC 7230 section 2.6, 3.1 and 3.5 |
c11191e0 | 71 | */ |
e02f963c AR |
72 | bool |
73 | Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok) | |
4c14658e | 74 | { |
e03114f8 | 75 | // method field is a sequence of TCHAR. |
e02f963c AR |
76 | // Limit to 32 characters to prevent overly long sequences of non-HTTP |
77 | // being sucked in before mismatch is detected. 32 is itself annoyingly | |
78 | // big but there are methods registered by IANA that reach 17 bytes: | |
79 | // http://www.iana.org/assignments/http-methods | |
80 | static const size_t maxMethodLength = 32; // TODO: make this configurable? | |
4c14658e | 81 | |
e02f963c AR |
82 | SBuf methodFound; |
83 | if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) { | |
84 | debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method"); | |
de158bf5 | 85 | parseStatusCode = Http::scBadRequest; |
e02f963c | 86 | return false; |
947ca0c6 | 87 | } |
e02f963c | 88 | method_ = HttpRequestMethod(methodFound); |
f8b58a68 EB |
89 | |
90 | if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method")) | |
91 | return false; | |
92 | ||
e02f963c | 93 | return true; |
947ca0c6 | 94 | } |
4c14658e | 95 | |
e02f963c AR |
96 | /// the characters which truly are valid within URI |
97 | static const CharacterSet & | |
98 | UriValidCharacters() | |
78a63ed1 | 99 | { |
78a63ed1 AJ |
100 | /* RFC 3986 section 2: |
101 | * " | |
102 | * A URI is composed from a limited set of characters consisting of | |
103 | * digits, letters, and a few graphic symbols. | |
104 | * " | |
105 | */ | |
e02f963c AR |
106 | static const CharacterSet UriChars = |
107 | CharacterSet("URI-Chars","") + | |
108 | // RFC 3986 section 2.2 - reserved characters | |
109 | CharacterSet("gen-delims", ":/?#[]@") + | |
110 | CharacterSet("sub-delims", "!$&'()*+,;=") + | |
111 | // RFC 3986 section 2.3 - unreserved characters | |
112 | CharacterSet::ALPHA + | |
113 | CharacterSet::DIGIT + | |
114 | CharacterSet("unreserved", "-._~") + | |
115 | // RFC 3986 section 2.1 - percent encoding "%" HEXDIG | |
116 | CharacterSet("pct-encoded", "%") + | |
117 | CharacterSet::HEXDIG; | |
78a63ed1 AJ |
118 | |
119 | return UriChars; | |
120 | } | |
016a316b | 121 | |
e02f963c AR |
122 | /// characters which Squid will accept in the HTTP request-target (URI) |
123 | const CharacterSet & | |
124 | Http::One::RequestParser::RequestTargetCharacters() | |
947ca0c6 | 125 | { |
e02f963c AR |
126 | if (Config.onoff.relaxed_header_parser) { |
127 | #if USE_HTTP_VIOLATIONS | |
128 | static const CharacterSet RelaxedExtended = | |
129 | UriValidCharacters() + | |
130 | // accept whitespace (extended), it will be dealt with later | |
131 | DelimiterCharacters() + | |
132 | // RFC 2396 unwise character set which must never be transmitted | |
133 | // in un-escaped form. But many web services do anyway. | |
134 | CharacterSet("RFC2396-unwise","\"\\|^<>`{}") + | |
135 | // UTF-8 because we want to be future-proof | |
136 | CharacterSet("UTF-8", 128, 255); | |
137 | ||
138 | return RelaxedExtended; | |
139 | #else | |
140 | static const CharacterSet RelaxedCompliant = | |
141 | UriValidCharacters() + | |
142 | // accept whitespace (extended), it will be dealt with later. | |
143 | DelimiterCharacters(); | |
144 | ||
145 | return RelaxedCompliant; | |
146 | #endif | |
147 | } | |
148 | ||
149 | // strict parse only accepts what the RFC say we can | |
150 | return UriValidCharacters(); | |
151 | } | |
947ca0c6 | 152 | |
e02f963c AR |
153 | bool |
154 | Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok) | |
155 | { | |
947ca0c6 AJ |
156 | /* Arbitrary 64KB URI upper length limit. |
157 | * | |
158 | * Not quite as arbitrary as it seems though. Old SquidString objects | |
159 | * cannot store strings larger than 64KB, so we must limit until they | |
160 | * have all been replaced with SBuf. | |
161 | * | |
162 | * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED) | |
163 | * at least 8000 octets for the whole line, including method and version. | |
164 | */ | |
e02f963c | 165 | const size_t maxUriLength = static_cast<size_t>((64*1024)-1); |
016a316b | 166 | |
947ca0c6 | 167 | SBuf uriFound; |
e02f963c AR |
168 | if (!tok.prefix(uriFound, RequestTargetCharacters())) { |
169 | parseStatusCode = Http::scBadRequest; | |
170 | debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI"); | |
171 | return false; | |
016a316b AJ |
172 | } |
173 | ||
e02f963c | 174 | if (uriFound.length() > maxUriLength) { |
e03114f8 | 175 | // RFC 7230 section 3.1.1 mandatory (MUST) 414 response |
de158bf5 | 176 | parseStatusCode = Http::scUriTooLong; |
e02f963c AR |
177 | debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() << |
178 | "-byte URI exceeds " << maxUriLength << "-byte limit"); | |
179 | return false; | |
4c14658e | 180 | } |
e02f963c AR |
181 | |
182 | uri_ = uriFound; | |
183 | return true; | |
947ca0c6 | 184 | } |
4c14658e | 185 | |
e02f963c | 186 | bool |
f29718b0 | 187 | Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok) |
947ca0c6 | 188 | { |
294083a1 EB |
189 | static const SBuf http1p0("HTTP/1.0"); |
190 | static const SBuf http1p1("HTTP/1.1"); | |
e02f963c | 191 | const auto savedTok = tok; |
4c14658e | 192 | |
294083a1 EB |
193 | // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in |
194 | // the vast majority of cases. | |
195 | if (tok.skipSuffix(http1p1)) { | |
196 | msgProtocol_ = Http::ProtocolVersion(1, 1); | |
e02f963c | 197 | return true; |
294083a1 EB |
198 | } else if (tok.skipSuffix(http1p0)) { |
199 | msgProtocol_ = Http::ProtocolVersion(1, 0); | |
200 | return true; | |
201 | } else { | |
202 | // RFC 7230 section 2.6: | |
203 | // HTTP-version = HTTP-name "/" DIGIT "." DIGIT | |
204 | static const CharacterSet period("Decimal point", "."); | |
205 | static const SBuf proto("HTTP/"); | |
206 | SBuf majorDigit; | |
207 | SBuf minorDigit; | |
208 | if (tok.suffix(minorDigit, CharacterSet::DIGIT) && | |
209 | tok.skipOneTrailing(period) && | |
210 | tok.suffix(majorDigit, CharacterSet::DIGIT) && | |
211 | tok.skipSuffix(proto)) { | |
212 | const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1; | |
213 | // use '0.0' for unsupported multiple digit version numbers | |
214 | const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0'); | |
215 | const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0'); | |
216 | msgProtocol_ = Http::ProtocolVersion(major, minor); | |
217 | return true; | |
218 | } | |
4c14658e AJ |
219 | } |
220 | ||
e02f963c AR |
221 | // A GET request might use HTTP/0.9 syntax |
222 | if (method_ == Http::METHOD_GET) { | |
223 | // RFC 1945 - no HTTP version field at all | |
224 | tok = savedTok; // in case the URI ends with a digit | |
225 | // report this assumption as an error if configured to triage parsing | |
226 | debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line"); | |
227 | msgProtocol_ = Http::ProtocolVersion(0,9); | |
228 | return true; | |
4c14658e | 229 | } |
4c14658e | 230 | |
e02f963c AR |
231 | debugs(33, ErrorLevel(), "invalid request-line: not HTTP"); |
232 | parseStatusCode = Http::scBadRequest; | |
233 | return false; | |
234 | } | |
4c14658e | 235 | |
e02f963c AR |
236 | /** |
237 | * Skip characters separating request-line fields. | |
238 | * To handle bidirectional parsing, the caller does the actual skipping and | |
239 | * we just check how many character the caller has skipped. | |
240 | */ | |
241 | bool | |
f8b58a68 | 242 | Http::One::RequestParser::skipDelimiter(const size_t count, const char *where) |
e02f963c AR |
243 | { |
244 | if (count <= 0) { | |
f8b58a68 | 245 | debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where); |
e02f963c AR |
246 | parseStatusCode = Http::scBadRequest; |
247 | return false; | |
248 | } | |
e03114f8 | 249 | |
e02f963c AR |
250 | // tolerant parser allows multiple whitespace characters between request-line fields |
251 | if (count > 1 && !Config.onoff.relaxed_header_parser) { | |
f8b58a68 | 252 | debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where); |
e02f963c AR |
253 | parseStatusCode = Http::scBadRequest; |
254 | return false; | |
255 | } | |
947ca0c6 | 256 | |
e02f963c AR |
257 | return true; |
258 | } | |
4c14658e | 259 | |
e02f963c AR |
260 | /// Parse CRs at the end of request-line, just before the terminating LF. |
261 | bool | |
262 | Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok) | |
263 | { | |
264 | if (Config.onoff.relaxed_header_parser) { | |
265 | (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK | |
266 | } else { | |
267 | if (!tok.skipOneTrailing(CharacterSet::CR)) { | |
268 | debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF"); | |
269 | parseStatusCode = Http::scBadRequest; | |
270 | return false; | |
271 | } | |
272 | } | |
273 | return true; | |
947ca0c6 | 274 | } |
274bd5ad | 275 | |
947ca0c6 AJ |
276 | /** |
277 | * Attempt to parse the first line of a new request message. | |
278 | * | |
279 | * Governed by: | |
280 | * RFC 1945 section 5.1 | |
281 | * RFC 7230 section 2.6, 3.1 and 3.5 | |
282 | * | |
de158bf5 | 283 | * \retval -1 an error occurred. parseStatusCode indicates HTTP status result. |
947ca0c6 AJ |
284 | * \retval 1 successful parse. member fields contain the request-line items |
285 | * \retval 0 more data is needed to complete the parse | |
286 | */ | |
287 | int | |
288 | Http::One::RequestParser::parseRequestFirstLine() | |
289 | { | |
947ca0c6 AJ |
290 | debugs(74, 5, "parsing possible request: buf.length=" << buf_.length()); |
291 | debugs(74, DBG_DATA, buf_); | |
4c14658e | 292 | |
e02f963c | 293 | SBuf line; |
947ca0c6 | 294 | |
e02f963c AR |
295 | // Earlier, skipGarbageLines() took care of any leading LFs (if allowed). |
296 | // Now, the request line has to end at the first LF. | |
297 | static const CharacterSet lineChars = CharacterSet::LF.complement("notLF"); | |
298 | ::Parser::Tokenizer lineTok(buf_); | |
299 | if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) { | |
f8b58a68 EB |
300 | if (buf_.length() >= Config.maxRequestHeaderSize) { |
301 | /* who should we blame for our failure to parse this line? */ | |
302 | ||
303 | Http1::Tokenizer methodTok(buf_); | |
304 | if (!parseMethodField(methodTok)) | |
305 | return -1; // blame a bad method (or its delimiter) | |
306 | ||
307 | // assume it is the URI | |
308 | debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " << | |
a95f4c73 | 309 | Config.maxRequestHeaderSize << "-byte limit"); |
f8b58a68 EB |
310 | parseStatusCode = Http::scUriTooLong; |
311 | return -1; | |
312 | } | |
947ca0c6 AJ |
313 | debugs(74, 5, "Parser needs more data"); |
314 | return 0; | |
4c14658e AJ |
315 | } |
316 | ||
e02f963c | 317 | Http1::Tokenizer tok(line); |
78a63ed1 | 318 | |
e02f963c AR |
319 | if (!parseMethodField(tok)) |
320 | return -1; | |
e47e0802 | 321 | |
e02f963c AR |
322 | /* now parse backwards, to leave just the URI */ |
323 | if (!skipTrailingCrs(tok)) | |
324 | return -1; | |
325 | ||
326 | if (!parseHttpVersionField(tok)) | |
327 | return -1; | |
947ca0c6 | 328 | |
f8b58a68 | 329 | if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version")) |
e02f963c AR |
330 | return -1; |
331 | ||
332 | /* parsed everything before and after the URI */ | |
333 | ||
334 | if (!parseUriField(tok)) | |
335 | return -1; | |
336 | ||
337 | if (!tok.atEnd()) { | |
338 | debugs(33, ErrorLevel(), "invalid request-line: garbage after URI"); | |
339 | parseStatusCode = Http::scBadRequest; | |
340 | return -1; | |
4c14658e | 341 | } |
4c14658e | 342 | |
e02f963c AR |
343 | parseStatusCode = Http::scOkay; |
344 | buf_ = lineTok.remaining(); // incremental parse checkpoint | |
345 | return 1; | |
4c14658e | 346 | } |
7a4fa6a0 | 347 | |
87abd755 | 348 | bool |
36a9c964 | 349 | Http::One::RequestParser::parse(const SBuf &aBuf) |
6b2b6cfe CT |
350 | { |
351 | const bool result = doParse(aBuf); | |
352 | if (preserveParsed_) { | |
353 | assert(aBuf.length() >= remaining().length()); | |
354 | parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes | |
355 | } | |
356 | ||
357 | return result; | |
358 | } | |
359 | ||
360 | // raw is not a reference because a reference might point back to our own buf_ or parsed_ | |
361 | bool | |
362 | Http::One::RequestParser::doParse(const SBuf &aBuf) | |
4c14658e | 363 | { |
b749de75 | 364 | buf_ = aBuf; |
36a9c964 AJ |
365 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); |
366 | ||
cbcd99df | 367 | // stage 1: locate the request-line |
36a9c964 | 368 | if (parsingStage_ == HTTP_PARSE_NONE) { |
cbcd99df | 369 | skipGarbageLines(); |
cbcd99df AJ |
370 | |
371 | // if we hit something before EOS treat it as a message | |
b749de75 | 372 | if (!buf_.isEmpty()) |
cbcd99df AJ |
373 | parsingStage_ = HTTP_PARSE_FIRST; |
374 | else | |
f9daf571 | 375 | return false; |
cbcd99df | 376 | } |
c11191e0 | 377 | |
cbcd99df AJ |
378 | // stage 2: parse the request-line |
379 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
f4880526 | 380 | PROF_start(HttpParserParseReqLine); |
678451c0 | 381 | const int retcode = parseRequestFirstLine(); |
e4cff825 AJ |
382 | |
383 | // first-line (or a look-alike) found successfully. | |
384 | if (retcode > 0) { | |
e4cff825 AJ |
385 | parsingStage_ = HTTP_PARSE_MIME; |
386 | } | |
387 | ||
947ca0c6 AJ |
388 | debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}"); |
389 | debugs(74, 5, "request-line: method: " << method_); | |
390 | debugs(74, 5, "request-line: url: " << uri_); | |
391 | debugs(74, 5, "request-line: proto: " << msgProtocol_); | |
b749de75 | 392 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); |
f4880526 | 393 | PROF_stop(HttpParserParseReqLine); |
cbcd99df AJ |
394 | |
395 | // syntax errors already | |
f4880526 | 396 | if (retcode < 0) { |
cbcd99df | 397 | parsingStage_ = HTTP_PARSE_DONE; |
f4880526 AJ |
398 | return false; |
399 | } | |
400 | } | |
401 | ||
402 | // stage 3: locate the mime header block | |
cbcd99df | 403 | if (parsingStage_ == HTTP_PARSE_MIME) { |
f4880526 | 404 | // HTTP/1.x request-line is valid and parsing completed. |
f8cab755 | 405 | if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) { |
f1d5359e AJ |
406 | if (parseStatusCode == Http::scHeaderTooLarge) |
407 | parseStatusCode = Http::scRequestHeaderFieldsTooLarge; | |
016a316b AJ |
408 | return false; |
409 | } | |
f4880526 | 410 | } |
87abd755 | 411 | |
36a9c964 | 412 | return !needsMoreData(); |
4c14658e | 413 | } |
f53969cc | 414 |