]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Merged from trunk
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/RequestParser.h"
12 #include "http/one/Tokenizer.h"
13 #include "http/ProtocolVersion.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
16
17 Http::One::RequestParser::RequestParser() :
18 Parser(),
19 firstLineGarbage_(0)
20 {}
21
22 Http1::Parser::size_type
23 Http::One::RequestParser::firstLineSize() const
24 {
25 // RFC 7230 section 2.6
26 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
27 return method_.image().length() + uri_.length() + 12;
28 }
29
30 /**
31 * Attempt to parse the first line of a new request message.
32 *
33 * Governed by RFC 7230 section 3.5
34 * "
35 * In the interest of robustness, a server that is expecting to receive
36 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
37 * received prior to the request-line.
38 * "
39 *
40 * Parsing state is stored between calls to avoid repeating buffer scans.
41 * If garbage is found the parsing offset is incremented.
42 */
43 void
44 Http::One::RequestParser::skipGarbageLines()
45 {
46 if (Config.onoff.relaxed_header_parser) {
47 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
48 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
49 "CRLF bytes received ahead of request-line. " <<
50 "Ignored due to relaxed_header_parser.");
51 // Be tolerant of prefix empty lines
52 // ie any series of either \n or \r\n with no other characters and no repeated \r
53 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
54 buf_.consume(1);
55 }
56 }
57 }
58
59 /**
60 * Attempt to parse the method field out of an HTTP message request-line.
61 *
62 * Governed by:
63 * RFC 1945 section 5.1
64 * RFC 7230 section 2.6, 3.1 and 3.5
65 *
66 * Parsing state is stored between calls. The current implementation uses
67 * checkpoints after each successful request-line field.
68 * The return value tells you whether the parsing is completed or not.
69 *
70 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
71 * \retval 1 successful parse. method_ is filled and buffer consumed including first delimiter.
72 * \retval 0 more data is needed to complete the parse
73 */
74 int
75 Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
76 {
77 // scan for up to 16 valid method characters.
78 static const size_t maxMethodLength = 16; // TODO: make this configurable?
79
80 // method field is a sequence of TCHAR.
81 SBuf methodFound;
82 if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
83
84 method_ = HttpRequestMethod(methodFound);
85 buf_ = tok.remaining(); // incremental parse checkpoint
86 return 1;
87
88 } else if (tok.atEnd()) {
89 debugs(74, 5, "Parser needs more data to find method");
90 return 0;
91
92 } // else error(s)
93
94 // non-delimiter found after accepted method bytes means ...
95 if (methodFound.length() == maxMethodLength) {
96 // method longer than acceptible.
97 // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
98 parseStatusCode = Http::scNotImplemented;
99 debugs(33, 5, "invalid request-line. method too long");
100 } else {
101 // invalid character in the URL
102 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
103 parseStatusCode = Http::scBadRequest;
104 debugs(33, 5, "invalid request-line. missing method delimiter");
105 }
106 return -1;
107 }
108
109 static CharacterSet
110 uriValidCharacters()
111 {
112 CharacterSet UriChars("URI-Chars","");
113
114 /* RFC 3986 section 2:
115 * "
116 * A URI is composed from a limited set of characters consisting of
117 * digits, letters, and a few graphic symbols.
118 * "
119 */
120 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
121 UriChars.add('%');
122 UriChars += CharacterSet::HEXDIG;
123 // RFC 3986 section 2.2 - reserved characters
124 UriChars += CharacterSet("gen-delims", ":/?#[]@");
125 UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
126 // RFC 3986 section 2.3 - unreserved characters
127 UriChars += CharacterSet::ALPHA;
128 UriChars += CharacterSet::DIGIT;
129 UriChars += CharacterSet("unreserved", "-._~");
130
131 return UriChars;
132 }
133
134 int
135 Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
136 {
137 // URI field is a sequence of ... what? segments all have different valid charset
138 // go with non-whitespace non-binary characters for now
139 static CharacterSet UriChars = uriValidCharacters();
140
141 /* Arbitrary 64KB URI upper length limit.
142 *
143 * Not quite as arbitrary as it seems though. Old SquidString objects
144 * cannot store strings larger than 64KB, so we must limit until they
145 * have all been replaced with SBuf.
146 *
147 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
148 * at least 8000 octets for the whole line, including method and version.
149 */
150 const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
151 static_cast<size_t>((64*1024)-1));
152
153 SBuf uriFound;
154
155 // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
156 if (tok.prefix(uriFound, UriChars, maxUriLength) && tok.skipOne(CharacterSet::SP)) {
157 uri_ = uriFound;
158 buf_ = tok.remaining(); // incremental parse checkpoint
159 return 1;
160
161 // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
162 } else if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
163 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
164 msgProtocol_ = Http::ProtocolVersion(0,9);
165 uri_ = uriFound; // found by successful prefix() call earlier.
166 parseStatusCode = Http::scOkay;
167 buf_ = tok.remaining(); // incremental parse checkpoint
168 return 1;
169
170 } else if (tok.atEnd()) {
171 debugs(74, 5, "Parser needs more data to find URI");
172 return 0;
173 }
174
175 // else errors...
176
177 if (uriFound.length() == maxUriLength) {
178 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
179 parseStatusCode = Http::scUriTooLong;
180 debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
181 } else {
182 // RFC 7230 section 3.1.1 required (SHOULD) 400 response
183 parseStatusCode = Http::scBadRequest;
184 debugs(33, 5, "invalid request-line. missing URI delimiter");
185 }
186 return -1;
187 }
188
189 int
190 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
191 {
192 // partial match of HTTP/1 magic prefix
193 if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
194 debugs(74, 5, "Parser needs more data to find version");
195 return 0;
196 }
197
198 if (!tok.skip(Http1magic)) {
199 debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
200 parseStatusCode = Http::scHttpVersionNotSupported;
201 return -1;
202 }
203
204 if (tok.atEnd()) {
205 debugs(74, 5, "Parser needs more data to find version");
206 return 0;
207 }
208
209 // get the version minor DIGIT
210 SBuf digit;
211 if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
212
213 // found version fully AND terminator
214 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
215 parseStatusCode = Http::scOkay;
216 buf_ = tok.remaining(); // incremental parse checkpoint
217 return 1;
218
219 } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
220 debugs(74, 5, "Parser needs more data to find version");
221 return 0;
222
223 } // else error ...
224
225 // non-DIGIT. invalid version number.
226 parseStatusCode = Http::scHttpVersionNotSupported;
227 debugs(33, 5, "invalid request-line. garbage before line terminator");
228 return -1;
229 }
230
231 /**
232 * Attempt to parse the first line of a new request message.
233 *
234 * Governed by:
235 * RFC 1945 section 5.1
236 * RFC 7230 section 2.6, 3.1 and 3.5
237 *
238 * Parsing state is stored between calls. The current implementation uses
239 * checkpoints after each successful request-line field.
240 * The return value tells you whether the parsing is completed or not.
241 *
242 * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
243 * \retval 1 successful parse. member fields contain the request-line items
244 * \retval 0 more data is needed to complete the parse
245 */
246 int
247 Http::One::RequestParser::parseRequestFirstLine()
248 {
249 Http1::Tokenizer tok(buf_);
250
251 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
252 debugs(74, DBG_DATA, buf_);
253
254 // NP: would be static, except it need to change with reconfigure
255 CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
256
257 if (Config.onoff.relaxed_header_parser) {
258 // RFC 7230 section 3.5
259 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
260 // as whitespace between request-line fields
261 WspDelim += CharacterSet::HTAB
262 + CharacterSet("VT,FF","\x0B\x0C")
263 + CharacterSet::CR;
264 }
265
266 // only search for method if we have not yet found one
267 if (method_ == Http::METHOD_NONE) {
268 const int res = parseMethodField(tok, WspDelim);
269 if (res < 1)
270 return res;
271 // else keep going...
272 }
273
274 // tolerant parser allows multiple whitespace characters between request-line fields
275 if (Config.onoff.relaxed_header_parser) {
276 const size_t garbage = tok.skipAll(WspDelim);
277 if (garbage > 0) {
278 firstLineGarbage_ += garbage;
279 buf_ = tok.remaining(); // re-checkpoint after garbage
280 }
281 }
282 if (tok.atEnd()) {
283 debugs(74, 5, "Parser needs more data");
284 return 0;
285 }
286
287 // from here on, we have two possible parse paths: whitespace tolerant, and strict
288 if (Config.onoff.relaxed_header_parser) {
289 // whitespace tolerant
290
291 // NOTES:
292 // * this would be static, except WspDelim changes with reconfigure
293 // * HTTP-version charset is included by uriValidCharacters()
294 // * terminal CR is included by WspDelim here in relaxed parsing
295 CharacterSet LfDelim = uriValidCharacters() + WspDelim;
296
297 // seek the LF character, then tokenize the line in reverse
298 SBuf line;
299 if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
300 Http1::Tokenizer rTok(line);
301 SBuf nil;
302 (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
303 SBuf digit;
304 if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
305 uri_ = rTok.remaining();
306 msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
307 if (uri_.isEmpty()) {
308 debugs(33, 5, "invalid request-line. missing URL");
309 parseStatusCode = Http::scBadRequest;
310 return -1;
311 }
312
313 parseStatusCode = Http::scOkay;
314 buf_ = tok.remaining(); // incremental parse checkpoint
315 return 1;
316
317 } else if (method_ == Http::METHOD_GET) {
318 // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
319 debugs(33, 5, "HTTP/0.9 syntax request-line detected");
320 msgProtocol_ = Http::ProtocolVersion(0,9);
321 static const SBuf cr("\r",1);
322 uri_ = line.trim(cr,false,true);
323 parseStatusCode = Http::scOkay;
324 buf_ = tok.remaining(); // incremental parse checkpoint
325 return 1;
326 }
327
328 debugs(33, 5, "invalid request-line. not HTTP");
329 parseStatusCode = Http::scBadRequest;
330 return -1;
331 }
332
333 debugs(74, 5, "Parser needs more data");
334 return 0;
335 }
336 // else strict non-whitespace tolerant parse
337
338 // only search for request-target (URL) if we have not yet found one
339 if (uri_.isEmpty()) {
340 const int res = parseUriField(tok);
341 if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
342 return res;
343 // else keep going...
344 }
345
346 if (tok.atEnd()) {
347 debugs(74, 5, "Parser needs more data");
348 return 0;
349 }
350
351 // HTTP/1 version suffix (protocol magic) followed by CR*LF
352 if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
353 return parseHttpVersionField(tok);
354 }
355
356 // If we got here this method has been called too many times
357 parseStatusCode = Http::scInternalServerError;
358 debugs(33, 5, "ERROR: Parser already processed request-line");
359 return -1;
360 }
361
362 bool
363 Http::One::RequestParser::parse(const SBuf &aBuf)
364 {
365 buf_ = aBuf;
366 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
367
368 // stage 1: locate the request-line
369 if (parsingStage_ == HTTP_PARSE_NONE) {
370 skipGarbageLines();
371
372 // if we hit something before EOS treat it as a message
373 if (!buf_.isEmpty())
374 parsingStage_ = HTTP_PARSE_FIRST;
375 else
376 return false;
377 }
378
379 // stage 2: parse the request-line
380 if (parsingStage_ == HTTP_PARSE_FIRST) {
381 PROF_start(HttpParserParseReqLine);
382 const int retcode = parseRequestFirstLine();
383
384 // first-line (or a look-alike) found successfully.
385 if (retcode > 0) {
386 parsingStage_ = HTTP_PARSE_MIME;
387 }
388
389 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
390 debugs(74, 5, "request-line: method: " << method_);
391 debugs(74, 5, "request-line: url: " << uri_);
392 debugs(74, 5, "request-line: proto: " << msgProtocol_);
393 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
394 PROF_stop(HttpParserParseReqLine);
395
396 // syntax errors already
397 if (retcode < 0) {
398 parsingStage_ = HTTP_PARSE_DONE;
399 return false;
400 }
401 }
402
403 // stage 3: locate the mime header block
404 if (parsingStage_ == HTTP_PARSE_MIME) {
405 // HTTP/1.x request-line is valid and parsing completed.
406 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
407 if (parseStatusCode == Http::scHeaderTooLarge)
408 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
409 return false;
410 }
411 }
412
413 return !needsMoreData();
414 }
415