]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/RequestParser.cc
Merge from trunk rev.13687
[thirdparty/squid.git] / src / http / one / RequestParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "http/one/RequestParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "mime_header.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
8
9 Http::One::RequestParser::RequestParser() :
10 Parser(),
11 request_parse_status(Http::scNone)
12 {
13 req.start = req.end = -1;
14 req.m_start = req.m_end = -1;
15 req.u_start = req.u_end = -1;
16 req.v_start = req.v_end = -1;
17 }
18
19 /**
20 * Attempt to parse the first line of a new request message.
21 *
22 * Governed by RFC 7230 section 3.5
23 * "
24 * In the interest of robustness, a server that is expecting to receive
25 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
26 * received prior to the request-line.
27 * "
28 *
29 * Parsing state is stored between calls to avoid repeating buffer scans.
30 * If garbage is found the parsing offset is incremented.
31 */
32 void
33 Http::One::RequestParser::skipGarbageLines()
34 {
35 if (Config.onoff.relaxed_header_parser) {
36 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
37 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
38 "CRLF bytes received ahead of request-line. " <<
39 "Ignored due to relaxed_header_parser.");
40 // Be tolerant of prefix empty lines
41 // ie any series of either \n or \r\n with no other characters and no repeated \r
42 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
43 buf_.consume(1);
44 }
45 }
46
47 /* XXX: this is a Squid-specific tolerance
48 * it appears never to have been relevant outside out unit-tests
49 * because the ConnStateData parser loop starts with consumeWhitespace()
50 * which absorbs any SP HTAB VTAB CR LF characters.
51 * But unit-tests called the HttpParser method directly without that pruning.
52 */
53 #if USE_HTTP_VIOLATIONS
54 if (Config.onoff.relaxed_header_parser) {
55 if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
56 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
57 "Whitespace bytes received ahead of method. " <<
58 "Ignored due to relaxed_header_parser.");
59 // Be tolerant of prefix spaces (other bytes are valid method values)
60 while (!buf_.isEmpty() && buf_[0] == ' ') {
61 buf_.consume(1);
62 }
63 }
64 #endif
65 }
66
67 /**
68 * Attempt to parse the first line of a new request message.
69 *
70 * Governed by:
71 * RFC 1945 section 5.1
72 * RFC 7230 section 3.1 and 3.5
73 *
74 * Parsing state is stored between calls. However the current implementation
75 * begins parsing from scratch on every call.
76 * The return value tells you whether the parsing state fields are valid or not.
77 *
78 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
79 * \retval 1 successful parse. member fields contain the request-line items
80 * \retval 0 more data is needed to complete the parse
81 */
82 int
83 Http::One::RequestParser::parseRequestFirstLine()
84 {
85 int second_word = -1; // track the suspected URI start
86 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
87 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
88
89 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
90 debugs(74, DBG_DATA, buf_);
91
92 // Single-pass parse: (provided we have the whole line anyways)
93
94 req.start = 0;
95 req.end = -1;
96 for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
97 // track first and last whitespace (SP only)
98 if (buf_[i] == ' ') {
99 last_whitespace = i;
100 if (first_whitespace < req.start)
101 first_whitespace = i;
102 }
103
104 // track next non-SP/non-HT byte after first_whitespace
105 if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
106 second_word = i;
107 }
108
109 // locate line terminator
110 if (buf_[i] == '\n') {
111 req.end = i;
112 line_end = i - 1;
113 break;
114 }
115 if (i < buf_.length() - 1 && buf_[i] == '\r') {
116 if (Config.onoff.relaxed_header_parser) {
117 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
118 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
119 "Series of carriage-return bytes received prior to line terminator. " <<
120 "Ignored due to relaxed_header_parser.");
121
122 // Be tolerant of invalid multiple \r prior to terminal \n
123 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
124 line_end = i - 1;
125 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
126 ++i;
127
128 if (buf_[i + 1] == '\n') {
129 req.end = i + 1;
130 break;
131 }
132 } else {
133 if (buf_[i + 1] == '\n') {
134 req.end = i + 1;
135 line_end = i - 1;
136 break;
137 }
138 }
139
140 // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
141 // However it does explicitly state an exact syntax which omits un-encoded CR
142 // and defines 400 (Bad Request) as the required action when
143 // handed an invalid request-line.
144 request_parse_status = Http::scBadRequest;
145 return -1;
146 }
147 }
148
149 if (req.end == -1) {
150 // DoS protection against long first-line
151 if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
152 debugs(33, 5, "Too large request-line");
153 // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
154 request_parse_status = Http::scUriTooLong;
155 return -1;
156 }
157
158 debugs(74, 5, "Parser: retval 0: from " << req.start <<
159 "->" << req.end << ": needs more data to complete first line.");
160 return 0;
161 }
162
163 // NP: we have now seen EOL, more-data (0) cannot occur.
164 // From here on any failure is -1, success is 1
165
166 // Input Validation:
167
168 // DoS protection against long first-line
169 if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
170 debugs(33, 5, "Too large request-line");
171 request_parse_status = Http::scUriTooLong;
172 return -1;
173 }
174
175 // Process what we now know about the line structure into field offsets
176 // generating HTTP status for any aborts as we go.
177
178 // First non-whitespace = beginning of method
179 if (req.start > line_end) {
180 request_parse_status = Http::scBadRequest;
181 return -1;
182 }
183 req.m_start = req.start;
184
185 // First whitespace = end of method
186 if (first_whitespace > line_end || first_whitespace < req.start) {
187 request_parse_status = Http::scBadRequest; // no method
188 return -1;
189 }
190 req.m_end = first_whitespace - 1;
191 if (req.m_end < req.m_start) {
192 request_parse_status = Http::scBadRequest; // missing URI?
193 return -1;
194 }
195
196 /* Set method_ */
197 const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
198 method_ = HttpRequestMethod(tmp);
199
200 // First non-whitespace after first SP = beginning of URL+Version
201 if (second_word > line_end || second_word < req.start) {
202 request_parse_status = Http::scBadRequest; // missing URI
203 return -1;
204 }
205 req.u_start = second_word;
206
207 // RFC 1945: SP and version following URI are optional, marking version 0.9
208 // we identify this by the last whitespace being earlier than URI start
209 if (last_whitespace < second_word && last_whitespace >= req.start) {
210 msgProtocol_ = Http::ProtocolVersion(0,9);
211 req.u_end = line_end;
212 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
213 request_parse_status = Http::scOkay; // HTTP/0.9
214 return 1;
215 } else {
216 // otherwise last whitespace is somewhere after end of URI.
217 req.u_end = last_whitespace;
218 // crop any trailing whitespace in the area we think of as URI
219 for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
220 }
221 if (req.u_end < req.u_start) {
222 request_parse_status = Http::scBadRequest; // missing URI
223 return -1;
224 }
225 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
226
227 // Last whitespace SP = before start of protocol/version
228 if (last_whitespace >= line_end) {
229 request_parse_status = Http::scBadRequest; // missing version
230 return -1;
231 }
232 req.v_start = last_whitespace + 1;
233 req.v_end = line_end;
234
235 /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
236 if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
237 // non-HTTP/1 protocols not supported / implemented.
238 request_parse_status = Http::scHttpVersionNotSupported;
239 return -1;
240 }
241 // NP: magic octets include the protocol name and major version DIGIT.
242 msgProtocol_.protocol = AnyP::PROTO_HTTP;
243 msgProtocol_.major = 1;
244
245 int i = req.v_start + Http1magic.length() -1;
246
247 // catch missing minor part
248 if (++i > line_end) {
249 request_parse_status = Http::scHttpVersionNotSupported;
250 return -1;
251 }
252 /* next should be one or more digits */
253 if (!isdigit(buf_[i])) {
254 request_parse_status = Http::scHttpVersionNotSupported;
255 return -1;
256 }
257 int min = 0;
258 for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
259 min = min * 10;
260 min = min + (buf_[i]) - '0';
261 }
262 // catch too-big values or trailing garbage
263 if (min >= 65536 || i < line_end) {
264 request_parse_status = Http::scHttpVersionNotSupported;
265 return -1;
266 }
267 msgProtocol_.minor = min;
268
269 /*
270 * Rightio - we have all the schtuff. Return true; we've got enough.
271 */
272 request_parse_status = Http::scOkay;
273 return 1;
274 }
275
276 bool
277 Http::One::RequestParser::parse(const SBuf &aBuf)
278 {
279 buf_ = aBuf;
280 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
281
282 // stage 1: locate the request-line
283 if (parsingStage_ == HTTP_PARSE_NONE) {
284 skipGarbageLines();
285
286 // if we hit something before EOS treat it as a message
287 if (!buf_.isEmpty())
288 parsingStage_ = HTTP_PARSE_FIRST;
289 else
290 return false;
291 }
292
293 // stage 2: parse the request-line
294 if (parsingStage_ == HTTP_PARSE_FIRST) {
295 PROF_start(HttpParserParseReqLine);
296 const int retcode = parseRequestFirstLine();
297
298 // first-line (or a look-alike) found successfully.
299 if (retcode > 0) {
300 buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
301 parsingStage_ = HTTP_PARSE_MIME;
302 }
303
304 debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
305 " line={" << aBuf.length() << ", data='" << aBuf << "'}");
306 debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
307 debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
308 debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
309 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
310 PROF_stop(HttpParserParseReqLine);
311
312 // syntax errors already
313 if (retcode < 0) {
314 parsingStage_ = HTTP_PARSE_DONE;
315 return false;
316 }
317 }
318
319 // stage 3: locate the mime header block
320 if (parsingStage_ == HTTP_PARSE_MIME) {
321 // HTTP/1.x request-line is valid and parsing completed.
322 if (msgProtocol_.major == 1) {
323 /* NOTE: HTTP/0.9 requests do not have a mime header block.
324 * So the rest of the code will need to deal with '0'-byte headers
325 * (ie, none, so don't try parsing em)
326 */
327 int64_t mimeHeaderBytes = 0;
328 // XXX: c_str() reallocates. performance regression.
329 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
330 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
331 debugs(33, 5, "Too large request");
332 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
333 parsingStage_ = HTTP_PARSE_DONE;
334 } else
335 debugs(33, 5, "Incomplete request, waiting for end of headers");
336 return false;
337 }
338 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
339 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
340
341 } else
342 debugs(33, 3, "Missing HTTP/1.x identifier");
343
344 // NP: we do not do any further stages here yet so go straight to DONE
345 parsingStage_ = HTTP_PARSE_DONE;
346
347 // Squid could handle these headers, but admin does not want to
348 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
349 debugs(33, 5, "Too large request");
350 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
351 return false;
352 }
353 }
354
355 return !needsMoreData();
356 }