]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/RequestParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / http / one / RequestParser.cc
CommitLineData
eac61ce1
AJ
1/*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
f7f3304a 9#include "squid.h"
4c14658e 10#include "Debug.h"
c99510dd
AJ
11#include "http/one/RequestParser.h"
12#include "http/ProtocolVersion.h"
f4880526 13#include "mime_header.h"
582c2af2 14#include "profiler/Profiler.h"
4d5904f7 15#include "SquidConfig.h"
4c14658e 16
f9688132 17Http::One::RequestParser::RequestParser() :
f53969cc
SM
18 Parser(),
19 request_parse_status(Http::scNone)
7322c9dd 20{
74f478f8 21 req.start = req.end = -1;
74f478f8
AJ
22 req.m_start = req.m_end = -1;
23 req.u_start = req.u_end = -1;
24 req.v_start = req.v_end = -1;
4c14658e
AJ
25}
26
c11191e0
AJ
27/**
28 * Attempt to parse the first line of a new request message.
29 *
a4c74dd8 30 * Governed by RFC 7230 section 3.5
c11191e0 31 * "
a4c74dd8
AJ
32 * In the interest of robustness, a server that is expecting to receive
33 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
34 * received prior to the request-line.
c11191e0
AJ
35 * "
36 *
37 * Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df 38 * If garbage is found the parsing offset is incremented.
c11191e0 39 */
cbcd99df 40void
678451c0 41Http::One::RequestParser::skipGarbageLines()
c11191e0 42{
c11191e0 43 if (Config.onoff.relaxed_header_parser) {
b749de75 44 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
c11191e0
AJ
45 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
46 "CRLF bytes received ahead of request-line. " <<
47 "Ignored due to relaxed_header_parser.");
48 // Be tolerant of prefix empty lines
cbcd99df 49 // ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75
AJ
50 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
51 buf_.consume(1);
7a4fa6a0 52 }
c11191e0 53 }
c11191e0
AJ
54
55 /* XXX: this is a Squid-specific tolerance
56 * it appears never to have been relevant outside out unit-tests
57 * because the ConnStateData parser loop starts with consumeWhitespace()
58 * which absorbs any SP HTAB VTAB CR LF characters.
59 * But unit-tests called the HttpParser method directly without that pruning.
60 */
61#if USE_HTTP_VIOLATIONS
62 if (Config.onoff.relaxed_header_parser) {
b749de75 63 if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
c11191e0
AJ
64 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
65 "Whitespace bytes received ahead of method. " <<
66 "Ignored due to relaxed_header_parser.");
67 // Be tolerant of prefix spaces (other bytes are valid method values)
b749de75
AJ
68 while (!buf_.isEmpty() && buf_[0] == ' ') {
69 buf_.consume(1);
7a4fa6a0 70 }
c11191e0
AJ
71 }
72#endif
c11191e0
AJ
73}
74
75/**
76 * Attempt to parse the first line of a new request message.
77 *
78 * Governed by:
79 * RFC 1945 section 5.1
a4c74dd8 80 * RFC 7230 section 3.1 and 3.5
c11191e0
AJ
81 *
82 * Parsing state is stored between calls. However the current implementation
83 * begins parsing from scratch on every call.
84 * The return value tells you whether the parsing state fields are valid or not.
85 *
86 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
87 * \retval 1 successful parse. member fields contain the request-line items
88 * \retval 0 more data is needed to complete the parse
89 */
4c14658e 90int
678451c0 91Http::One::RequestParser::parseRequestFirstLine()
4c14658e
AJ
92{
93 int second_word = -1; // track the suspected URI start
94 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
95 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
96
b749de75
AJ
97 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
98 debugs(74, DBG_DATA, buf_);
4c14658e
AJ
99
100 // Single-pass parse: (provided we have the whole line anyways)
101
7a4fa6a0 102 req.start = 0;
74f478f8 103 req.end = -1;
b749de75 104 for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
4c14658e 105 // track first and last whitespace (SP only)
b749de75 106 if (buf_[i] == ' ') {
4c14658e 107 last_whitespace = i;
74f478f8 108 if (first_whitespace < req.start)
4c14658e
AJ
109 first_whitespace = i;
110 }
111
112 // track next non-SP/non-HT byte after first_whitespace
b749de75 113 if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
4c14658e
AJ
114 second_word = i;
115 }
116
117 // locate line terminator
b749de75 118 if (buf_[i] == '\n') {
74f478f8 119 req.end = i;
4c14658e
AJ
120 line_end = i - 1;
121 break;
122 }
b749de75 123 if (i < buf_.length() - 1 && buf_[i] == '\r') {
4c14658e 124 if (Config.onoff.relaxed_header_parser) {
b749de75 125 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
4c14658e
AJ
126 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
127 "Series of carriage-return bytes received prior to line terminator. " <<
128 "Ignored due to relaxed_header_parser.");
129
130 // Be tolerant of invalid multiple \r prior to terminal \n
b749de75 131 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
4c14658e 132 line_end = i - 1;
b749de75 133 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
95dc7ff4 134 ++i;
4c14658e 135
b749de75 136 if (buf_[i + 1] == '\n') {
74f478f8 137 req.end = i + 1;
4c14658e
AJ
138 break;
139 }
140 } else {
b749de75 141 if (buf_[i + 1] == '\n') {
74f478f8 142 req.end = i + 1;
4c14658e
AJ
143 line_end = i - 1;
144 break;
145 }
146 }
147
a4c74dd8
AJ
148 // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
149 // However it does explicitly state an exact syntax which omits un-encoded CR
150 // and defines 400 (Bad Request) as the required action when
151 // handed an invalid request-line.
955394ce 152 request_parse_status = Http::scBadRequest;
4c14658e
AJ
153 return -1;
154 }
155 }
016a316b 156
74f478f8 157 if (req.end == -1) {
016a316b 158 // DoS protection against long first-line
b749de75 159 if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
016a316b 160 debugs(33, 5, "Too large request-line");
a4c74dd8
AJ
161 // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
162 request_parse_status = Http::scUriTooLong;
016a316b
AJ
163 return -1;
164 }
165
74f478f8
AJ
166 debugs(74, 5, "Parser: retval 0: from " << req.start <<
167 "->" << req.end << ": needs more data to complete first line.");
4c14658e
AJ
168 return 0;
169 }
170
171 // NP: we have now seen EOL, more-data (0) cannot occur.
172 // From here on any failure is -1, success is 1
173
4c14658e
AJ
174 // Input Validation:
175
016a316b
AJ
176 // DoS protection against long first-line
177 if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
178 debugs(33, 5, "Too large request-line");
a4c74dd8 179 request_parse_status = Http::scUriTooLong;
016a316b
AJ
180 return -1;
181 }
182
4c14658e
AJ
183 // Process what we now know about the line structure into field offsets
184 // generating HTTP status for any aborts as we go.
185
186 // First non-whitespace = beginning of method
74f478f8 187 if (req.start > line_end) {
955394ce 188 request_parse_status = Http::scBadRequest;
4c14658e
AJ
189 return -1;
190 }
74f478f8 191 req.m_start = req.start;
4c14658e
AJ
192
193 // First whitespace = end of method
74f478f8 194 if (first_whitespace > line_end || first_whitespace < req.start) {
955394ce 195 request_parse_status = Http::scBadRequest; // no method
4c14658e
AJ
196 return -1;
197 }
74f478f8
AJ
198 req.m_end = first_whitespace - 1;
199 if (req.m_end < req.m_start) {
955394ce 200 request_parse_status = Http::scBadRequest; // missing URI?
4c14658e
AJ
201 return -1;
202 }
203
274bd5ad 204 /* Set method_ */
b749de75 205 const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
7a4fa6a0 206 method_ = HttpRequestMethod(tmp);
274bd5ad 207
4c14658e 208 // First non-whitespace after first SP = beginning of URL+Version
74f478f8 209 if (second_word > line_end || second_word < req.start) {
955394ce 210 request_parse_status = Http::scBadRequest; // missing URI
4c14658e
AJ
211 return -1;
212 }
74f478f8 213 req.u_start = second_word;
4c14658e
AJ
214
215 // RFC 1945: SP and version following URI are optional, marking version 0.9
216 // we identify this by the last whitespace being earlier than URI start
74f478f8 217 if (last_whitespace < second_word && last_whitespace >= req.start) {
5aedd08d 218 msgProtocol_ = Http::ProtocolVersion(0,9);
74f478f8 219 req.u_end = line_end;
b749de75 220 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
955394ce 221 request_parse_status = Http::scOkay; // HTTP/0.9
4c14658e
AJ
222 return 1;
223 } else {
224 // otherwise last whitespace is somewhere after end of URI.
74f478f8 225 req.u_end = last_whitespace;
4c14658e 226 // crop any trailing whitespace in the area we think of as URI
b749de75 227 for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
4c14658e 228 }
74f478f8 229 if (req.u_end < req.u_start) {
955394ce 230 request_parse_status = Http::scBadRequest; // missing URI
4c14658e
AJ
231 return -1;
232 }
b749de75 233 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
4c14658e
AJ
234
235 // Last whitespace SP = before start of protocol/version
236 if (last_whitespace >= line_end) {
955394ce 237 request_parse_status = Http::scBadRequest; // missing version
4c14658e
AJ
238 return -1;
239 }
74f478f8
AJ
240 req.v_start = last_whitespace + 1;
241 req.v_end = line_end;
4c14658e 242
a4c74dd8 243 /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
b749de75 244 if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
9651320a 245 // non-HTTP/1 protocols not supported / implemented.
955394ce 246 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e 247 return -1;
4c14658e 248 }
9651320a 249 // NP: magic octets include the protocol name and major version DIGIT.
5aedd08d 250 msgProtocol_.protocol = AnyP::PROTO_HTTP;
9651320a 251 msgProtocol_.major = 1;
4c14658e 252
9651320a 253 int i = req.v_start + Http1magic.length() -1;
4c14658e 254
4c14658e
AJ
255 // catch missing minor part
256 if (++i > line_end) {
955394ce 257 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
258 return -1;
259 }
260 /* next should be one or more digits */
b749de75 261 if (!isdigit(buf_[i])) {
955394ce 262 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
263 return -1;
264 }
265 int min = 0;
b749de75 266 for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
4c14658e 267 min = min * 10;
b749de75 268 min = min + (buf_[i]) - '0';
4c14658e
AJ
269 }
270 // catch too-big values or trailing garbage
271 if (min >= 65536 || i < line_end) {
955394ce 272 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
273 return -1;
274 }
5aedd08d 275 msgProtocol_.minor = min;
4c14658e
AJ
276
277 /*
278 * Rightio - we have all the schtuff. Return true; we've got enough.
279 */
955394ce 280 request_parse_status = Http::scOkay;
4c14658e
AJ
281 return 1;
282}
7a4fa6a0 283
87abd755 284bool
36a9c964 285Http::One::RequestParser::parse(const SBuf &aBuf)
4c14658e 286{
b749de75 287 buf_ = aBuf;
36a9c964
AJ
288 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
289
cbcd99df 290 // stage 1: locate the request-line
36a9c964 291 if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df 292 skipGarbageLines();
cbcd99df
AJ
293
294 // if we hit something before EOS treat it as a message
b749de75 295 if (!buf_.isEmpty())
cbcd99df
AJ
296 parsingStage_ = HTTP_PARSE_FIRST;
297 else
f9daf571 298 return false;
cbcd99df 299 }
c11191e0 300
cbcd99df
AJ
301 // stage 2: parse the request-line
302 if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526 303 PROF_start(HttpParserParseReqLine);
678451c0 304 const int retcode = parseRequestFirstLine();
e4cff825
AJ
305
306 // first-line (or a look-alike) found successfully.
307 if (retcode > 0) {
b749de75 308 buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
e4cff825
AJ
309 parsingStage_ = HTTP_PARSE_MIME;
310 }
311
7a4fa6a0 312 debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
e4cff825 313 " line={" << aBuf.length() << ", data='" << aBuf << "'}");
9ff1b8ca 314 debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
5f3cc9a2 315 debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
f4880526 316 debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
b749de75 317 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526 318 PROF_stop(HttpParserParseReqLine);
cbcd99df
AJ
319
320 // syntax errors already
f4880526 321 if (retcode < 0) {
cbcd99df 322 parsingStage_ = HTTP_PARSE_DONE;
f4880526
AJ
323 return false;
324 }
325 }
326
327 // stage 3: locate the mime header block
cbcd99df 328 if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526
AJ
329 // HTTP/1.x request-line is valid and parsing completed.
330 if (msgProtocol_.major == 1) {
331 /* NOTE: HTTP/0.9 requests do not have a mime header block.
332 * So the rest of the code will need to deal with '0'-byte headers
333 * (ie, none, so don't try parsing em)
334 */
eb1bd364 335 int64_t mimeHeaderBytes = 0;
2169fd4d 336 // XXX: c_str() reallocates. performance regression.
b749de75
AJ
337 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
338 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
016a316b 339 debugs(33, 5, "Too large request");
a4c74dd8 340 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
cbcd99df 341 parsingStage_ = HTTP_PARSE_DONE;
7a4fa6a0 342 } else
016a316b 343 debugs(33, 5, "Incomplete request, waiting for end of headers");
7a4fa6a0 344 return false;
f4880526 345 }
38012e61
AJ
346 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
347 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
f4880526 348
7a4fa6a0 349 } else
f4880526 350 debugs(33, 3, "Missing HTTP/1.x identifier");
7a4fa6a0 351
cbcd99df
AJ
352 // NP: we do not do any further stages here yet so go straight to DONE
353 parsingStage_ = HTTP_PARSE_DONE;
016a316b
AJ
354
355 // Squid could handle these headers, but admin does not want to
356 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
357 debugs(33, 5, "Too large request");
a4c74dd8 358 request_parse_status = Http::scRequestHeaderFieldsTooLarge;
016a316b
AJ
359 return false;
360 }
f4880526 361 }
87abd755 362
36a9c964 363 return !needsMoreData();
4c14658e 364}
f53969cc 365