]>
Commit | Line | Data |
---|---|---|
eac61ce1 AJ |
1 | /* |
2 | * Copyright (C) 1996-2014 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
f7f3304a | 9 | #include "squid.h" |
4c14658e | 10 | #include "Debug.h" |
c99510dd AJ |
11 | #include "http/one/RequestParser.h" |
12 | #include "http/ProtocolVersion.h" | |
f4880526 | 13 | #include "mime_header.h" |
582c2af2 | 14 | #include "profiler/Profiler.h" |
4d5904f7 | 15 | #include "SquidConfig.h" |
4c14658e | 16 | |
f9688132 | 17 | Http::One::RequestParser::RequestParser() : |
f53969cc SM |
18 | Parser(), |
19 | request_parse_status(Http::scNone) | |
7322c9dd | 20 | { |
74f478f8 | 21 | req.start = req.end = -1; |
74f478f8 AJ |
22 | req.m_start = req.m_end = -1; |
23 | req.u_start = req.u_end = -1; | |
24 | req.v_start = req.v_end = -1; | |
4c14658e AJ |
25 | } |
26 | ||
c11191e0 AJ |
27 | /** |
28 | * Attempt to parse the first line of a new request message. | |
29 | * | |
a4c74dd8 | 30 | * Governed by RFC 7230 section 3.5 |
c11191e0 | 31 | * " |
a4c74dd8 AJ |
32 | * In the interest of robustness, a server that is expecting to receive |
33 | * and parse a request-line SHOULD ignore at least one empty line (CRLF) | |
34 | * received prior to the request-line. | |
c11191e0 AJ |
35 | * " |
36 | * | |
37 | * Parsing state is stored between calls to avoid repeating buffer scans. | |
cbcd99df | 38 | * If garbage is found the parsing offset is incremented. |
c11191e0 | 39 | */ |
cbcd99df | 40 | void |
678451c0 | 41 | Http::One::RequestParser::skipGarbageLines() |
c11191e0 | 42 | { |
c11191e0 | 43 | if (Config.onoff.relaxed_header_parser) { |
b749de75 | 44 | if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n')) |
c11191e0 AJ |
45 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
46 | "CRLF bytes received ahead of request-line. " << | |
47 | "Ignored due to relaxed_header_parser."); | |
48 | // Be tolerant of prefix empty lines | |
cbcd99df | 49 | // ie any series of either \n or \r\n with no other characters and no repeated \r |
b749de75 AJ |
50 | while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) { |
51 | buf_.consume(1); | |
7a4fa6a0 | 52 | } |
c11191e0 | 53 | } |
c11191e0 AJ |
54 | |
55 | /* XXX: this is a Squid-specific tolerance | |
56 | * it appears never to have been relevant outside out unit-tests | |
57 | * because the ConnStateData parser loop starts with consumeWhitespace() | |
58 | * which absorbs any SP HTAB VTAB CR LF characters. | |
59 | * But unit-tests called the HttpParser method directly without that pruning. | |
60 | */ | |
61 | #if USE_HTTP_VIOLATIONS | |
62 | if (Config.onoff.relaxed_header_parser) { | |
b749de75 | 63 | if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ') |
c11191e0 AJ |
64 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
65 | "Whitespace bytes received ahead of method. " << | |
66 | "Ignored due to relaxed_header_parser."); | |
67 | // Be tolerant of prefix spaces (other bytes are valid method values) | |
b749de75 AJ |
68 | while (!buf_.isEmpty() && buf_[0] == ' ') { |
69 | buf_.consume(1); | |
7a4fa6a0 | 70 | } |
c11191e0 AJ |
71 | } |
72 | #endif | |
c11191e0 AJ |
73 | } |
74 | ||
75 | /** | |
76 | * Attempt to parse the first line of a new request message. | |
77 | * | |
78 | * Governed by: | |
79 | * RFC 1945 section 5.1 | |
a4c74dd8 | 80 | * RFC 7230 section 3.1 and 3.5 |
c11191e0 AJ |
81 | * |
82 | * Parsing state is stored between calls. However the current implementation | |
83 | * begins parsing from scratch on every call. | |
84 | * The return value tells you whether the parsing state fields are valid or not. | |
85 | * | |
86 | * \retval -1 an error occurred. request_parse_status indicates HTTP status result. | |
87 | * \retval 1 successful parse. member fields contain the request-line items | |
88 | * \retval 0 more data is needed to complete the parse | |
89 | */ | |
4c14658e | 90 | int |
678451c0 | 91 | Http::One::RequestParser::parseRequestFirstLine() |
4c14658e AJ |
92 | { |
93 | int second_word = -1; // track the suspected URI start | |
94 | int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte | |
95 | int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence | |
96 | ||
b749de75 AJ |
97 | debugs(74, 5, "parsing possible request: buf.length=" << buf_.length()); |
98 | debugs(74, DBG_DATA, buf_); | |
4c14658e AJ |
99 | |
100 | // Single-pass parse: (provided we have the whole line anyways) | |
101 | ||
7a4fa6a0 | 102 | req.start = 0; |
74f478f8 | 103 | req.end = -1; |
b749de75 | 104 | for (SBuf::size_type i = 0; i < buf_.length(); ++i) { |
4c14658e | 105 | // track first and last whitespace (SP only) |
b749de75 | 106 | if (buf_[i] == ' ') { |
4c14658e | 107 | last_whitespace = i; |
74f478f8 | 108 | if (first_whitespace < req.start) |
4c14658e AJ |
109 | first_whitespace = i; |
110 | } | |
111 | ||
112 | // track next non-SP/non-HT byte after first_whitespace | |
b749de75 | 113 | if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') { |
4c14658e AJ |
114 | second_word = i; |
115 | } | |
116 | ||
117 | // locate line terminator | |
b749de75 | 118 | if (buf_[i] == '\n') { |
74f478f8 | 119 | req.end = i; |
4c14658e AJ |
120 | line_end = i - 1; |
121 | break; | |
122 | } | |
b749de75 | 123 | if (i < buf_.length() - 1 && buf_[i] == '\r') { |
4c14658e | 124 | if (Config.onoff.relaxed_header_parser) { |
b749de75 | 125 | if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r') |
4c14658e AJ |
126 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
127 | "Series of carriage-return bytes received prior to line terminator. " << | |
128 | "Ignored due to relaxed_header_parser."); | |
129 | ||
130 | // Be tolerant of invalid multiple \r prior to terminal \n | |
b749de75 | 131 | if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r') |
4c14658e | 132 | line_end = i - 1; |
b749de75 | 133 | while (i < buf_.length() - 1 && buf_[i + 1] == '\r') |
95dc7ff4 | 134 | ++i; |
4c14658e | 135 | |
b749de75 | 136 | if (buf_[i + 1] == '\n') { |
74f478f8 | 137 | req.end = i + 1; |
4c14658e AJ |
138 | break; |
139 | } | |
140 | } else { | |
b749de75 | 141 | if (buf_[i + 1] == '\n') { |
74f478f8 | 142 | req.end = i + 1; |
4c14658e AJ |
143 | line_end = i - 1; |
144 | break; | |
145 | } | |
146 | } | |
147 | ||
a4c74dd8 AJ |
148 | // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to. |
149 | // However it does explicitly state an exact syntax which omits un-encoded CR | |
150 | // and defines 400 (Bad Request) as the required action when | |
151 | // handed an invalid request-line. | |
955394ce | 152 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
153 | return -1; |
154 | } | |
155 | } | |
016a316b | 156 | |
74f478f8 | 157 | if (req.end == -1) { |
016a316b | 158 | // DoS protection against long first-line |
b749de75 | 159 | if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) { |
016a316b | 160 | debugs(33, 5, "Too large request-line"); |
a4c74dd8 AJ |
161 | // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible. |
162 | request_parse_status = Http::scUriTooLong; | |
016a316b AJ |
163 | return -1; |
164 | } | |
165 | ||
74f478f8 AJ |
166 | debugs(74, 5, "Parser: retval 0: from " << req.start << |
167 | "->" << req.end << ": needs more data to complete first line."); | |
4c14658e AJ |
168 | return 0; |
169 | } | |
170 | ||
171 | // NP: we have now seen EOL, more-data (0) cannot occur. | |
172 | // From here on any failure is -1, success is 1 | |
173 | ||
4c14658e AJ |
174 | // Input Validation: |
175 | ||
016a316b AJ |
176 | // DoS protection against long first-line |
177 | if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) { | |
178 | debugs(33, 5, "Too large request-line"); | |
a4c74dd8 | 179 | request_parse_status = Http::scUriTooLong; |
016a316b AJ |
180 | return -1; |
181 | } | |
182 | ||
4c14658e AJ |
183 | // Process what we now know about the line structure into field offsets |
184 | // generating HTTP status for any aborts as we go. | |
185 | ||
186 | // First non-whitespace = beginning of method | |
74f478f8 | 187 | if (req.start > line_end) { |
955394ce | 188 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
189 | return -1; |
190 | } | |
74f478f8 | 191 | req.m_start = req.start; |
4c14658e AJ |
192 | |
193 | // First whitespace = end of method | |
74f478f8 | 194 | if (first_whitespace > line_end || first_whitespace < req.start) { |
955394ce | 195 | request_parse_status = Http::scBadRequest; // no method |
4c14658e AJ |
196 | return -1; |
197 | } | |
74f478f8 AJ |
198 | req.m_end = first_whitespace - 1; |
199 | if (req.m_end < req.m_start) { | |
955394ce | 200 | request_parse_status = Http::scBadRequest; // missing URI? |
4c14658e AJ |
201 | return -1; |
202 | } | |
203 | ||
274bd5ad | 204 | /* Set method_ */ |
b749de75 | 205 | const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1); |
7a4fa6a0 | 206 | method_ = HttpRequestMethod(tmp); |
274bd5ad | 207 | |
4c14658e | 208 | // First non-whitespace after first SP = beginning of URL+Version |
74f478f8 | 209 | if (second_word > line_end || second_word < req.start) { |
955394ce | 210 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
211 | return -1; |
212 | } | |
74f478f8 | 213 | req.u_start = second_word; |
4c14658e AJ |
214 | |
215 | // RFC 1945: SP and version following URI are optional, marking version 0.9 | |
216 | // we identify this by the last whitespace being earlier than URI start | |
74f478f8 | 217 | if (last_whitespace < second_word && last_whitespace >= req.start) { |
5aedd08d | 218 | msgProtocol_ = Http::ProtocolVersion(0,9); |
74f478f8 | 219 | req.u_end = line_end; |
b749de75 | 220 | uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1); |
955394ce | 221 | request_parse_status = Http::scOkay; // HTTP/0.9 |
4c14658e AJ |
222 | return 1; |
223 | } else { | |
224 | // otherwise last whitespace is somewhere after end of URI. | |
74f478f8 | 225 | req.u_end = last_whitespace; |
4c14658e | 226 | // crop any trailing whitespace in the area we think of as URI |
b749de75 | 227 | for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end); |
4c14658e | 228 | } |
74f478f8 | 229 | if (req.u_end < req.u_start) { |
955394ce | 230 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
231 | return -1; |
232 | } | |
b749de75 | 233 | uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1); |
4c14658e AJ |
234 | |
235 | // Last whitespace SP = before start of protocol/version | |
236 | if (last_whitespace >= line_end) { | |
955394ce | 237 | request_parse_status = Http::scBadRequest; // missing version |
4c14658e AJ |
238 | return -1; |
239 | } | |
74f478f8 AJ |
240 | req.v_start = last_whitespace + 1; |
241 | req.v_end = line_end; | |
4c14658e | 242 | |
a4c74dd8 | 243 | /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */ |
b749de75 | 244 | if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) { |
9651320a | 245 | // non-HTTP/1 protocols not supported / implemented. |
955394ce | 246 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e | 247 | return -1; |
4c14658e | 248 | } |
9651320a | 249 | // NP: magic octets include the protocol name and major version DIGIT. |
5aedd08d | 250 | msgProtocol_.protocol = AnyP::PROTO_HTTP; |
9651320a | 251 | msgProtocol_.major = 1; |
4c14658e | 252 | |
9651320a | 253 | int i = req.v_start + Http1magic.length() -1; |
4c14658e | 254 | |
4c14658e AJ |
255 | // catch missing minor part |
256 | if (++i > line_end) { | |
955394ce | 257 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
258 | return -1; |
259 | } | |
260 | /* next should be one or more digits */ | |
b749de75 | 261 | if (!isdigit(buf_[i])) { |
955394ce | 262 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
263 | return -1; |
264 | } | |
265 | int min = 0; | |
b749de75 | 266 | for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) { |
4c14658e | 267 | min = min * 10; |
b749de75 | 268 | min = min + (buf_[i]) - '0'; |
4c14658e AJ |
269 | } |
270 | // catch too-big values or trailing garbage | |
271 | if (min >= 65536 || i < line_end) { | |
955394ce | 272 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
273 | return -1; |
274 | } | |
5aedd08d | 275 | msgProtocol_.minor = min; |
4c14658e AJ |
276 | |
277 | /* | |
278 | * Rightio - we have all the schtuff. Return true; we've got enough. | |
279 | */ | |
955394ce | 280 | request_parse_status = Http::scOkay; |
4c14658e AJ |
281 | return 1; |
282 | } | |
7a4fa6a0 | 283 | |
87abd755 | 284 | bool |
36a9c964 | 285 | Http::One::RequestParser::parse(const SBuf &aBuf) |
4c14658e | 286 | { |
b749de75 | 287 | buf_ = aBuf; |
36a9c964 AJ |
288 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); |
289 | ||
cbcd99df | 290 | // stage 1: locate the request-line |
36a9c964 | 291 | if (parsingStage_ == HTTP_PARSE_NONE) { |
cbcd99df | 292 | skipGarbageLines(); |
cbcd99df AJ |
293 | |
294 | // if we hit something before EOS treat it as a message | |
b749de75 | 295 | if (!buf_.isEmpty()) |
cbcd99df AJ |
296 | parsingStage_ = HTTP_PARSE_FIRST; |
297 | else | |
f9daf571 | 298 | return false; |
cbcd99df | 299 | } |
c11191e0 | 300 | |
cbcd99df AJ |
301 | // stage 2: parse the request-line |
302 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
f4880526 | 303 | PROF_start(HttpParserParseReqLine); |
678451c0 | 304 | const int retcode = parseRequestFirstLine(); |
e4cff825 AJ |
305 | |
306 | // first-line (or a look-alike) found successfully. | |
307 | if (retcode > 0) { | |
b749de75 | 308 | buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done. |
e4cff825 AJ |
309 | parsingStage_ = HTTP_PARSE_MIME; |
310 | } | |
311 | ||
7a4fa6a0 | 312 | debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end << |
e4cff825 | 313 | " line={" << aBuf.length() << ", data='" << aBuf << "'}"); |
9ff1b8ca | 314 | debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")"); |
5f3cc9a2 | 315 | debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")"); |
f4880526 | 316 | debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")"); |
b749de75 | 317 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); |
f4880526 | 318 | PROF_stop(HttpParserParseReqLine); |
cbcd99df AJ |
319 | |
320 | // syntax errors already | |
f4880526 | 321 | if (retcode < 0) { |
cbcd99df | 322 | parsingStage_ = HTTP_PARSE_DONE; |
f4880526 AJ |
323 | return false; |
324 | } | |
325 | } | |
326 | ||
327 | // stage 3: locate the mime header block | |
cbcd99df | 328 | if (parsingStage_ == HTTP_PARSE_MIME) { |
f4880526 AJ |
329 | // HTTP/1.x request-line is valid and parsing completed. |
330 | if (msgProtocol_.major == 1) { | |
331 | /* NOTE: HTTP/0.9 requests do not have a mime header block. | |
332 | * So the rest of the code will need to deal with '0'-byte headers | |
333 | * (ie, none, so don't try parsing em) | |
334 | */ | |
eb1bd364 | 335 | int64_t mimeHeaderBytes = 0; |
2169fd4d | 336 | // XXX: c_str() reallocates. performance regression. |
b749de75 AJ |
337 | if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) { |
338 | if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) { | |
016a316b | 339 | debugs(33, 5, "Too large request"); |
a4c74dd8 | 340 | request_parse_status = Http::scRequestHeaderFieldsTooLarge; |
cbcd99df | 341 | parsingStage_ = HTTP_PARSE_DONE; |
7a4fa6a0 | 342 | } else |
016a316b | 343 | debugs(33, 5, "Incomplete request, waiting for end of headers"); |
7a4fa6a0 | 344 | return false; |
f4880526 | 345 | } |
38012e61 AJ |
346 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); |
347 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); | |
f4880526 | 348 | |
7a4fa6a0 | 349 | } else |
f4880526 | 350 | debugs(33, 3, "Missing HTTP/1.x identifier"); |
7a4fa6a0 | 351 | |
cbcd99df AJ |
352 | // NP: we do not do any further stages here yet so go straight to DONE |
353 | parsingStage_ = HTTP_PARSE_DONE; | |
016a316b AJ |
354 | |
355 | // Squid could handle these headers, but admin does not want to | |
356 | if (messageHeaderSize() >= Config.maxRequestHeaderSize) { | |
357 | debugs(33, 5, "Too large request"); | |
a4c74dd8 | 358 | request_parse_status = Http::scRequestHeaderFieldsTooLarge; |
016a316b AJ |
359 | return false; |
360 | } | |
f4880526 | 361 | } |
87abd755 | 362 | |
36a9c964 | 363 | return !needsMoreData(); |
4c14658e | 364 | } |
f53969cc | 365 |