]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/RequestParser.cc
Merge from trunk rev.13638
[thirdparty/squid.git] / src / http / one / RequestParser.cc
CommitLineData
f7f3304a 1#include "squid.h"
4c14658e 2#include "Debug.h"
c99510dd
AJ
3#include "http/one/RequestParser.h"
4#include "http/ProtocolVersion.h"
f4880526 5#include "mime_header.h"
582c2af2 6#include "profiler/Profiler.h"
4d5904f7 7#include "SquidConfig.h"
4c14658e 8
7322c9dd 9void
678451c0 10Http::One::RequestParser::clear()
7322c9dd
AJ
11{
12 Http1::Parser::clear();
13
14 request_parse_status = Http::scNone;
74f478f8 15 req.start = req.end = -1;
74f478f8
AJ
16 req.m_start = req.m_end = -1;
17 req.u_start = req.u_end = -1;
18 req.v_start = req.v_end = -1;
9ff1b8ca 19 method_ = HttpRequestMethod();
4c14658e
AJ
20}
21
c11191e0
AJ
22/**
23 * Attempt to parse the first line of a new request message.
24 *
25 * Governed by RFC 2616 section 4.1
26 * "
27 * In the interest of robustness, servers SHOULD ignore any empty
28 * line(s) received where a Request-Line is expected. In other words, if
29 * the server is reading the protocol stream at the beginning of a
30 * message and receives a CRLF first, it should ignore the CRLF.
31 *
32 * ... To restate what is explicitly forbidden by the
33 * BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
34 * extra CRLF.
35 * "
36 *
37 * Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df 38 * If garbage is found the parsing offset is incremented.
c11191e0 39 */
cbcd99df 40void
678451c0 41Http::One::RequestParser::skipGarbageLines()
c11191e0 42{
c11191e0
AJ
43#if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
44 if (Config.onoff.relaxed_header_parser) {
b749de75 45 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
c11191e0
AJ
46 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
47 "CRLF bytes received ahead of request-line. " <<
48 "Ignored due to relaxed_header_parser.");
49 // Be tolerant of prefix empty lines
cbcd99df 50 // ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75
AJ
51 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
52 buf_.consume(1);
7a4fa6a0 53 }
c11191e0
AJ
54 }
55#endif
56
57 /* XXX: this is a Squid-specific tolerance
58 * it appears never to have been relevant outside out unit-tests
59 * because the ConnStateData parser loop starts with consumeWhitespace()
60 * which absorbs any SP HTAB VTAB CR LF characters.
61 * But unit-tests called the HttpParser method directly without that pruning.
62 */
63#if USE_HTTP_VIOLATIONS
64 if (Config.onoff.relaxed_header_parser) {
b749de75 65 if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
c11191e0
AJ
66 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
67 "Whitespace bytes received ahead of method. " <<
68 "Ignored due to relaxed_header_parser.");
69 // Be tolerant of prefix spaces (other bytes are valid method values)
b749de75
AJ
70 while (!buf_.isEmpty() && buf_[0] == ' ') {
71 buf_.consume(1);
7a4fa6a0 72 }
c11191e0
AJ
73 }
74#endif
c11191e0
AJ
75}
76
77/**
78 * Attempt to parse the first line of a new request message.
79 *
80 * Governed by:
81 * RFC 1945 section 5.1
82 * RFC 2616 section 5.1
9651320a 83 * RFC 7230
c11191e0
AJ
84 *
85 * Parsing state is stored between calls. However the current implementation
86 * begins parsing from scratch on every call.
87 * The return value tells you whether the parsing state fields are valid or not.
88 *
89 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
90 * \retval 1 successful parse. member fields contain the request-line items
91 * \retval 0 more data is needed to complete the parse
92 */
4c14658e 93int
678451c0 94Http::One::RequestParser::parseRequestFirstLine()
4c14658e
AJ
95{
96 int second_word = -1; // track the suspected URI start
97 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
98 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
99
b749de75
AJ
100 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
101 debugs(74, DBG_DATA, buf_);
4c14658e
AJ
102
103 // Single-pass parse: (provided we have the whole line anyways)
104
7a4fa6a0 105 req.start = 0;
74f478f8 106 req.end = -1;
b749de75 107 for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
4c14658e 108 // track first and last whitespace (SP only)
b749de75 109 if (buf_[i] == ' ') {
4c14658e 110 last_whitespace = i;
74f478f8 111 if (first_whitespace < req.start)
4c14658e
AJ
112 first_whitespace = i;
113 }
114
115 // track next non-SP/non-HT byte after first_whitespace
b749de75 116 if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
4c14658e
AJ
117 second_word = i;
118 }
119
120 // locate line terminator
b749de75 121 if (buf_[i] == '\n') {
74f478f8 122 req.end = i;
4c14658e
AJ
123 line_end = i - 1;
124 break;
125 }
b749de75 126 if (i < buf_.length() - 1 && buf_[i] == '\r') {
4c14658e 127 if (Config.onoff.relaxed_header_parser) {
b749de75 128 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
4c14658e
AJ
129 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
130 "Series of carriage-return bytes received prior to line terminator. " <<
131 "Ignored due to relaxed_header_parser.");
132
133 // Be tolerant of invalid multiple \r prior to terminal \n
b749de75 134 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
4c14658e 135 line_end = i - 1;
b749de75 136 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
95dc7ff4 137 ++i;
4c14658e 138
b749de75 139 if (buf_[i + 1] == '\n') {
74f478f8 140 req.end = i + 1;
4c14658e
AJ
141 break;
142 }
143 } else {
b749de75 144 if (buf_[i + 1] == '\n') {
74f478f8 145 req.end = i + 1;
4c14658e
AJ
146 line_end = i - 1;
147 break;
148 }
149 }
150
151 // RFC 2616 section 5.1
152 // "No CR or LF is allowed except in the final CRLF sequence"
955394ce 153 request_parse_status = Http::scBadRequest;
4c14658e
AJ
154 return -1;
155 }
156 }
016a316b 157
74f478f8 158 if (req.end == -1) {
016a316b 159 // DoS protection against long first-line
b749de75 160 if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
016a316b
AJ
161 debugs(33, 5, "Too large request-line");
162 // XXX: return URL-too-log status code if second_whitespace is not yet found.
163 request_parse_status = Http::scHeaderTooLarge;
164 return -1;
165 }
166
74f478f8
AJ
167 debugs(74, 5, "Parser: retval 0: from " << req.start <<
168 "->" << req.end << ": needs more data to complete first line.");
4c14658e
AJ
169 return 0;
170 }
171
172 // NP: we have now seen EOL, more-data (0) cannot occur.
173 // From here on any failure is -1, success is 1
174
4c14658e
AJ
175 // Input Validation:
176
016a316b
AJ
177 // DoS protection against long first-line
178 if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
179 debugs(33, 5, "Too large request-line");
180 request_parse_status = Http::scHeaderTooLarge;
181 return -1;
182 }
183
4c14658e
AJ
184 // Process what we now know about the line structure into field offsets
185 // generating HTTP status for any aborts as we go.
186
187 // First non-whitespace = beginning of method
74f478f8 188 if (req.start > line_end) {
955394ce 189 request_parse_status = Http::scBadRequest;
4c14658e
AJ
190 return -1;
191 }
74f478f8 192 req.m_start = req.start;
4c14658e
AJ
193
194 // First whitespace = end of method
74f478f8 195 if (first_whitespace > line_end || first_whitespace < req.start) {
955394ce 196 request_parse_status = Http::scBadRequest; // no method
4c14658e
AJ
197 return -1;
198 }
74f478f8
AJ
199 req.m_end = first_whitespace - 1;
200 if (req.m_end < req.m_start) {
955394ce 201 request_parse_status = Http::scBadRequest; // missing URI?
4c14658e
AJ
202 return -1;
203 }
204
274bd5ad 205 /* Set method_ */
b749de75 206 const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
7a4fa6a0 207 method_ = HttpRequestMethod(tmp);
274bd5ad 208
4c14658e 209 // First non-whitespace after first SP = beginning of URL+Version
74f478f8 210 if (second_word > line_end || second_word < req.start) {
955394ce 211 request_parse_status = Http::scBadRequest; // missing URI
4c14658e
AJ
212 return -1;
213 }
74f478f8 214 req.u_start = second_word;
4c14658e
AJ
215
216 // RFC 1945: SP and version following URI are optional, marking version 0.9
217 // we identify this by the last whitespace being earlier than URI start
74f478f8 218 if (last_whitespace < second_word && last_whitespace >= req.start) {
5aedd08d 219 msgProtocol_ = Http::ProtocolVersion(0,9);
74f478f8 220 req.u_end = line_end;
b749de75 221 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
955394ce 222 request_parse_status = Http::scOkay; // HTTP/0.9
4c14658e
AJ
223 return 1;
224 } else {
225 // otherwise last whitespace is somewhere after end of URI.
74f478f8 226 req.u_end = last_whitespace;
4c14658e 227 // crop any trailing whitespace in the area we think of as URI
b749de75 228 for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
4c14658e 229 }
74f478f8 230 if (req.u_end < req.u_start) {
955394ce 231 request_parse_status = Http::scBadRequest; // missing URI
4c14658e
AJ
232 return -1;
233 }
b749de75 234 uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
4c14658e
AJ
235
236 // Last whitespace SP = before start of protocol/version
237 if (last_whitespace >= line_end) {
955394ce 238 request_parse_status = Http::scBadRequest; // missing version
4c14658e
AJ
239 return -1;
240 }
74f478f8
AJ
241 req.v_start = last_whitespace + 1;
242 req.v_end = line_end;
4c14658e 243
9651320a 244 /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */
b749de75 245 if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
9651320a 246 // non-HTTP/1 protocols not supported / implemented.
955394ce 247 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e 248 return -1;
4c14658e 249 }
9651320a 250 // NP: magic octets include the protocol name and major version DIGIT.
5aedd08d 251 msgProtocol_.protocol = AnyP::PROTO_HTTP;
9651320a 252 msgProtocol_.major = 1;
4c14658e 253
9651320a 254 int i = req.v_start + Http1magic.length() -1;
4c14658e 255
4c14658e
AJ
256 // catch missing minor part
257 if (++i > line_end) {
955394ce 258 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
259 return -1;
260 }
261 /* next should be one or more digits */
b749de75 262 if (!isdigit(buf_[i])) {
955394ce 263 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
264 return -1;
265 }
266 int min = 0;
b749de75 267 for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
4c14658e 268 min = min * 10;
b749de75 269 min = min + (buf_[i]) - '0';
4c14658e
AJ
270 }
271 // catch too-big values or trailing garbage
272 if (min >= 65536 || i < line_end) {
955394ce 273 request_parse_status = Http::scHttpVersionNotSupported;
4c14658e
AJ
274 return -1;
275 }
5aedd08d 276 msgProtocol_.minor = min;
4c14658e
AJ
277
278 /*
279 * Rightio - we have all the schtuff. Return true; we've got enough.
280 */
955394ce 281 request_parse_status = Http::scOkay;
4c14658e
AJ
282 return 1;
283}
7a4fa6a0 284
87abd755 285bool
36a9c964 286Http::One::RequestParser::parse(const SBuf &aBuf)
4c14658e 287{
b749de75 288 buf_ = aBuf;
36a9c964
AJ
289 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
290
cbcd99df 291 // stage 1: locate the request-line
36a9c964 292 if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df 293 skipGarbageLines();
cbcd99df
AJ
294
295 // if we hit something before EOS treat it as a message
b749de75 296 if (!buf_.isEmpty())
cbcd99df
AJ
297 parsingStage_ = HTTP_PARSE_FIRST;
298 else
f9daf571 299 return false;
cbcd99df 300 }
c11191e0 301
cbcd99df
AJ
302 // stage 2: parse the request-line
303 if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526 304 PROF_start(HttpParserParseReqLine);
678451c0 305 const int retcode = parseRequestFirstLine();
e4cff825
AJ
306
307 // first-line (or a look-alike) found successfully.
308 if (retcode > 0) {
b749de75 309 buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
e4cff825
AJ
310 parsingStage_ = HTTP_PARSE_MIME;
311 }
312
7a4fa6a0 313 debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
e4cff825 314 " line={" << aBuf.length() << ", data='" << aBuf << "'}");
9ff1b8ca 315 debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
5f3cc9a2 316 debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
f4880526 317 debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
b749de75 318 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526 319 PROF_stop(HttpParserParseReqLine);
cbcd99df
AJ
320
321 // syntax errors already
f4880526 322 if (retcode < 0) {
cbcd99df 323 parsingStage_ = HTTP_PARSE_DONE;
f4880526
AJ
324 return false;
325 }
326 }
327
328 // stage 3: locate the mime header block
cbcd99df 329 if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526
AJ
330 // HTTP/1.x request-line is valid and parsing completed.
331 if (msgProtocol_.major == 1) {
332 /* NOTE: HTTP/0.9 requests do not have a mime header block.
333 * So the rest of the code will need to deal with '0'-byte headers
334 * (ie, none, so don't try parsing em)
335 */
eb1bd364 336 int64_t mimeHeaderBytes = 0;
2169fd4d 337 // XXX: c_str() reallocates. performance regression.
b749de75
AJ
338 if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
339 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
016a316b
AJ
340 debugs(33, 5, "Too large request");
341 request_parse_status = Http::scHeaderTooLarge;
cbcd99df 342 parsingStage_ = HTTP_PARSE_DONE;
7a4fa6a0 343 } else
016a316b 344 debugs(33, 5, "Incomplete request, waiting for end of headers");
7a4fa6a0 345 return false;
f4880526 346 }
38012e61
AJ
347 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
348 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
f4880526 349
7a4fa6a0 350 } else
f4880526 351 debugs(33, 3, "Missing HTTP/1.x identifier");
7a4fa6a0 352
cbcd99df
AJ
353 // NP: we do not do any further stages here yet so go straight to DONE
354 parsingStage_ = HTTP_PARSE_DONE;
016a316b
AJ
355
356 // Squid could handle these headers, but admin does not want to
357 if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
358 debugs(33, 5, "Too large request");
359 request_parse_status = Http::scHeaderTooLarge;
360 return false;
361 }
f4880526 362 }
87abd755 363
36a9c964 364 return !needsMoreData();
4c14658e 365}