]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Fix first-line debug display after dropping parsedCount_
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "http/one/RequestParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "mime_header.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
8
9 void
10 Http::One::RequestParser::clear()
11 {
12 Http1::Parser::clear();
13
14 request_parse_status = Http::scNone;
15 req.start = req.end = -1;
16 req.m_start = req.m_end = -1;
17 req.u_start = req.u_end = -1;
18 req.v_start = req.v_end = -1;
19 method_ = HttpRequestMethod();
20 }
21
22 /**
23 * Attempt to parse the first line of a new request message.
24 *
25 * Governed by RFC 2616 section 4.1
26 * "
27 * In the interest of robustness, servers SHOULD ignore any empty
28 * line(s) received where a Request-Line is expected. In other words, if
29 * the server is reading the protocol stream at the beginning of a
30 * message and receives a CRLF first, it should ignore the CRLF.
31 *
32 * ... To restate what is explicitly forbidden by the
33 * BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
34 * extra CRLF.
35 * "
36 *
37 * Parsing state is stored between calls to avoid repeating buffer scans.
38 * If garbage is found the parsing offset is incremented.
39 */
40 void
41 Http::One::RequestParser::skipGarbageLines()
42 {
43 #if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
44 if (Config.onoff.relaxed_header_parser) {
45 if (Config.onoff.relaxed_header_parser < 0 && (buf[0] == '\r' || buf[0] == '\n'))
46 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
47 "CRLF bytes received ahead of request-line. " <<
48 "Ignored due to relaxed_header_parser.");
49 // Be tolerant of prefix empty lines
50 // ie any series of either \n or \r\n with no other characters and no repeated \r
51 while (!buf.isEmpty() && (buf[0] == '\n' || (buf[0] == '\r' && buf[1] == '\n'))) {
52 buf.consume(1);
53 }
54 }
55 #endif
56
57 /* XXX: this is a Squid-specific tolerance
58 * it appears never to have been relevant outside out unit-tests
59 * because the ConnStateData parser loop starts with consumeWhitespace()
60 * which absorbs any SP HTAB VTAB CR LF characters.
61 * But unit-tests called the HttpParser method directly without that pruning.
62 */
63 #if USE_HTTP_VIOLATIONS
64 if (Config.onoff.relaxed_header_parser) {
65 if (Config.onoff.relaxed_header_parser < 0 && buf[0] == ' ')
66 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
67 "Whitespace bytes received ahead of method. " <<
68 "Ignored due to relaxed_header_parser.");
69 // Be tolerant of prefix spaces (other bytes are valid method values)
70 while (!buf.isEmpty() && buf[0] == ' ') {
71 buf.consume(1);
72 }
73 }
74 #endif
75 }
76
77 /**
78 * Attempt to parse the first line of a new request message.
79 *
80 * Governed by:
81 * RFC 1945 section 5.1
82 * RFC 2616 section 5.1
83 *
84 * Parsing state is stored between calls. However the current implementation
85 * begins parsing from scratch on every call.
86 * The return value tells you whether the parsing state fields are valid or not.
87 *
88 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
89 * \retval 1 successful parse. member fields contain the request-line items
90 * \retval 0 more data is needed to complete the parse
91 */
92 int
93 Http::One::RequestParser::parseRequestFirstLine()
94 {
95 int second_word = -1; // track the suspected URI start
96 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
97 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
98
99 debugs(74, 5, "parsing possible request: buf.length=" << buf.length());
100 debugs(74, DBG_DATA, buf);
101
102 // Single-pass parse: (provided we have the whole line anyways)
103
104 req.start = 0;
105 req.end = -1;
106 for (SBuf::size_type i = 0; i < buf.length(); ++i) {
107 // track first and last whitespace (SP only)
108 if (buf[i] == ' ') {
109 last_whitespace = i;
110 if (first_whitespace < req.start)
111 first_whitespace = i;
112 }
113
114 // track next non-SP/non-HT byte after first_whitespace
115 if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
116 second_word = i;
117 }
118
119 // locate line terminator
120 if (buf[i] == '\n') {
121 req.end = i;
122 line_end = i - 1;
123 break;
124 }
125 if (i < buf.length() - 1 && buf[i] == '\r') {
126 if (Config.onoff.relaxed_header_parser) {
127 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
128 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
129 "Series of carriage-return bytes received prior to line terminator. " <<
130 "Ignored due to relaxed_header_parser.");
131
132 // Be tolerant of invalid multiple \r prior to terminal \n
133 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
134 line_end = i - 1;
135 while (i < buf.length() - 1 && buf[i + 1] == '\r')
136 ++i;
137
138 if (buf[i + 1] == '\n') {
139 req.end = i + 1;
140 break;
141 }
142 } else {
143 if (buf[i + 1] == '\n') {
144 req.end = i + 1;
145 line_end = i - 1;
146 break;
147 }
148 }
149
150 // RFC 2616 section 5.1
151 // "No CR or LF is allowed except in the final CRLF sequence"
152 request_parse_status = Http::scBadRequest;
153 return -1;
154 }
155 }
156
157 if (req.end == -1) {
158 // DoS protection against long first-line
159 if ((size_t)buf.length() >= Config.maxRequestHeaderSize) {
160 debugs(33, 5, "Too large request-line");
161 // XXX: return URL-too-log status code if second_whitespace is not yet found.
162 request_parse_status = Http::scHeaderTooLarge;
163 return -1;
164 }
165
166 debugs(74, 5, "Parser: retval 0: from " << req.start <<
167 "->" << req.end << ": needs more data to complete first line.");
168 return 0;
169 }
170
171 // NP: we have now seen EOL, more-data (0) cannot occur.
172 // From here on any failure is -1, success is 1
173
174 // Input Validation:
175
176 // DoS protection against long first-line
177 if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
178 debugs(33, 5, "Too large request-line");
179 request_parse_status = Http::scHeaderTooLarge;
180 return -1;
181 }
182
183 // Process what we now know about the line structure into field offsets
184 // generating HTTP status for any aborts as we go.
185
186 // First non-whitespace = beginning of method
187 if (req.start > line_end) {
188 request_parse_status = Http::scBadRequest;
189 return -1;
190 }
191 req.m_start = req.start;
192
193 // First whitespace = end of method
194 if (first_whitespace > line_end || first_whitespace < req.start) {
195 request_parse_status = Http::scBadRequest; // no method
196 return -1;
197 }
198 req.m_end = first_whitespace - 1;
199 if (req.m_end < req.m_start) {
200 request_parse_status = Http::scBadRequest; // missing URI?
201 return -1;
202 }
203
204 /* Set method_ */
205 SBuf tmp = buf.substr(req.m_start, req.m_end - req.m_start + 1);
206 method_ = HttpRequestMethod(tmp);
207
208 // First non-whitespace after first SP = beginning of URL+Version
209 if (second_word > line_end || second_word < req.start) {
210 request_parse_status = Http::scBadRequest; // missing URI
211 return -1;
212 }
213 req.u_start = second_word;
214
215 // RFC 1945: SP and version following URI are optional, marking version 0.9
216 // we identify this by the last whitespace being earlier than URI start
217 if (last_whitespace < second_word && last_whitespace >= req.start) {
218 msgProtocol_ = Http::ProtocolVersion(0,9);
219 req.u_end = line_end;
220 uri_ = buf.substr(req.u_start, req.u_end - req.u_start + 1);
221 request_parse_status = Http::scOkay; // HTTP/0.9
222 return 1;
223 } else {
224 // otherwise last whitespace is somewhere after end of URI.
225 req.u_end = last_whitespace;
226 // crop any trailing whitespace in the area we think of as URI
227 for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
228 }
229 if (req.u_end < req.u_start) {
230 request_parse_status = Http::scBadRequest; // missing URI
231 return -1;
232 }
233 uri_ = buf.substr(req.u_start, req.u_end - req.u_start + 1);
234
235 // Last whitespace SP = before start of protocol/version
236 if (last_whitespace >= line_end) {
237 request_parse_status = Http::scBadRequest; // missing version
238 return -1;
239 }
240 req.v_start = last_whitespace + 1;
241 req.v_end = line_end;
242
243 // We only accept HTTP protocol requests right now.
244 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
245 if ((req.v_end - req.v_start +1) < 5 || buf.substr(req.v_start, 5).caseCmp(SBuf("HTTP/")) != 0) {
246 #if USE_HTTP_VIOLATIONS
247 // being lax; old parser accepted strange versions
248 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
249 msgProtocol_ = Http::ProtocolVersion(0,9);
250 req.u_end = line_end;
251 request_parse_status = Http::scOkay; // treat as HTTP/0.9
252 return 1;
253 #else
254 // protocol not supported / implemented.
255 request_parse_status = Http::scHttpVersionNotSupported;
256 return -1;
257 #endif
258 }
259 msgProtocol_.protocol = AnyP::PROTO_HTTP;
260
261 int i = req.v_start + sizeof("HTTP/") -1;
262
263 /* next should be 1 or more digits */
264 if (!isdigit(buf[i])) {
265 request_parse_status = Http::scHttpVersionNotSupported;
266 return -1;
267 }
268 int maj = 0;
269 for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
270 maj = maj * 10;
271 maj = maj + (buf[i]) - '0';
272 }
273 // catch too-big values or missing remainders
274 if (maj >= 65536 || i > line_end) {
275 request_parse_status = Http::scHttpVersionNotSupported;
276 return -1;
277 }
278 msgProtocol_.major = maj;
279
280 /* next should be .; we -have- to have this as we have a whole line.. */
281 if (buf[i] != '.') {
282 request_parse_status = Http::scHttpVersionNotSupported;
283 return -1;
284 }
285 // catch missing minor part
286 if (++i > line_end) {
287 request_parse_status = Http::scHttpVersionNotSupported;
288 return -1;
289 }
290 /* next should be one or more digits */
291 if (!isdigit(buf[i])) {
292 request_parse_status = Http::scHttpVersionNotSupported;
293 return -1;
294 }
295 int min = 0;
296 for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
297 min = min * 10;
298 min = min + (buf[i]) - '0';
299 }
300 // catch too-big values or trailing garbage
301 if (min >= 65536 || i < line_end) {
302 request_parse_status = Http::scHttpVersionNotSupported;
303 return -1;
304 }
305 msgProtocol_.minor = min;
306
307 /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */
308 /* We currently only support 0.9, 1.0, 1.1 properly in this parser */
309 if ((maj == 0 && min != 9) || (maj > 1)) {
310 request_parse_status = Http::scHttpVersionNotSupported;
311 return -1;
312 }
313
314 /*
315 * Rightio - we have all the schtuff. Return true; we've got enough.
316 */
317 request_parse_status = Http::scOkay;
318 return 1;
319 }
320
321 bool
322 Http::One::RequestParser::parse(const SBuf &aBuf)
323 {
324 buf = aBuf;
325 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
326
327 // stage 1: locate the request-line
328 if (parsingStage_ == HTTP_PARSE_NONE) {
329 skipGarbageLines();
330
331 // if we hit something before EOS treat it as a message
332 if (!buf.isEmpty())
333 parsingStage_ = HTTP_PARSE_FIRST;
334 else
335 return false;
336 }
337
338 // stage 2: parse the request-line
339 if (parsingStage_ == HTTP_PARSE_FIRST) {
340 PROF_start(HttpParserParseReqLine);
341 const int retcode = parseRequestFirstLine();
342
343 // first-line (or a look-alike) found successfully.
344 if (retcode > 0) {
345 buf.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
346 parsingStage_ = HTTP_PARSE_MIME;
347 }
348
349 debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
350 " line={" << aBuf.length() << ", data='" << aBuf << "'}");
351 debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
352 debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
353 debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
354 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf.length()));
355 PROF_stop(HttpParserParseReqLine);
356
357 // syntax errors already
358 if (retcode < 0) {
359 parsingStage_ = HTTP_PARSE_DONE;
360 return false;
361 }
362 }
363
364 // stage 3: locate the mime header block
365 if (parsingStage_ == HTTP_PARSE_MIME) {
366 // HTTP/1.x request-line is valid and parsing completed.
367 if (msgProtocol_.major == 1) {
368 /* NOTE: HTTP/0.9 requests do not have a mime header block.
369 * So the rest of the code will need to deal with '0'-byte headers
370 * (ie, none, so don't try parsing em)
371 */
372 int64_t mimeHeaderBytes = 0;
373 if ((mimeHeaderBytes = headersEnd(buf.c_str(), buf.length())) == 0) {
374 if (buf.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
375 debugs(33, 5, "Too large request");
376 request_parse_status = Http::scHeaderTooLarge;
377 parsingStage_ = HTTP_PARSE_DONE;
378 } else
379 debugs(33, 5, "Incomplete request, waiting for end of headers");
380 return false;
381 }
382 mimeHeaderBlock_ = buf.substr(req.end+1, mimeHeaderBytes);
383 buf.consume(mimeHeaderBytes); // done with these bytes now.
384
385 } else
386 debugs(33, 3, "Missing HTTP/1.x identifier");
387
388 // NP: we do not do any further stages here yet so go straight to DONE
389 parsingStage_ = HTTP_PARSE_DONE;
390
391 // Squid could handle these headers, but admin does not want to
392 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
393 debugs(33, 5, "Too large request");
394 request_parse_status = Http::scHeaderTooLarge;
395 return false;
396 }
397 }
398
399 return !needsMoreData();
400 }