]>
Commit | Line | Data |
---|---|---|
f7f3304a | 1 | #include "squid.h" |
4c14658e | 2 | #include "Debug.h" |
c99510dd AJ |
3 | #include "http/one/RequestParser.h" |
4 | #include "http/ProtocolVersion.h" | |
f4880526 | 5 | #include "mime_header.h" |
582c2af2 | 6 | #include "profiler/Profiler.h" |
4d5904f7 | 7 | #include "SquidConfig.h" |
4c14658e | 8 | |
7322c9dd | 9 | void |
678451c0 | 10 | Http::One::RequestParser::clear() |
7322c9dd AJ |
11 | { |
12 | Http1::Parser::clear(); | |
13 | ||
14 | request_parse_status = Http::scNone; | |
74f478f8 | 15 | req.start = req.end = -1; |
74f478f8 AJ |
16 | req.m_start = req.m_end = -1; |
17 | req.u_start = req.u_end = -1; | |
18 | req.v_start = req.v_end = -1; | |
9ff1b8ca | 19 | method_ = HttpRequestMethod(); |
4c14658e AJ |
20 | } |
21 | ||
c11191e0 AJ |
22 | /** |
23 | * Attempt to parse the first line of a new request message. | |
24 | * | |
25 | * Governed by RFC 2616 section 4.1 | |
26 | * " | |
27 | * In the interest of robustness, servers SHOULD ignore any empty | |
28 | * line(s) received where a Request-Line is expected. In other words, if | |
29 | * the server is reading the protocol stream at the beginning of a | |
30 | * message and receives a CRLF first, it should ignore the CRLF. | |
31 | * | |
32 | * ... To restate what is explicitly forbidden by the | |
33 | * BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an | |
34 | * extra CRLF. | |
35 | * " | |
36 | * | |
37 | * Parsing state is stored between calls to avoid repeating buffer scans. | |
cbcd99df | 38 | * If garbage is found the parsing offset is incremented. |
c11191e0 | 39 | */ |
cbcd99df | 40 | void |
678451c0 | 41 | Http::One::RequestParser::skipGarbageLines() |
c11191e0 | 42 | { |
c11191e0 AJ |
43 | #if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ... |
44 | if (Config.onoff.relaxed_header_parser) { | |
b749de75 | 45 | if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n')) |
c11191e0 AJ |
46 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
47 | "CRLF bytes received ahead of request-line. " << | |
48 | "Ignored due to relaxed_header_parser."); | |
49 | // Be tolerant of prefix empty lines | |
cbcd99df | 50 | // ie any series of either \n or \r\n with no other characters and no repeated \r |
b749de75 AJ |
51 | while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) { |
52 | buf_.consume(1); | |
7a4fa6a0 | 53 | } |
c11191e0 AJ |
54 | } |
55 | #endif | |
56 | ||
57 | /* XXX: this is a Squid-specific tolerance | |
58 | * it appears never to have been relevant outside out unit-tests | |
59 | * because the ConnStateData parser loop starts with consumeWhitespace() | |
60 | * which absorbs any SP HTAB VTAB CR LF characters. | |
61 | * But unit-tests called the HttpParser method directly without that pruning. | |
62 | */ | |
63 | #if USE_HTTP_VIOLATIONS | |
64 | if (Config.onoff.relaxed_header_parser) { | |
b749de75 | 65 | if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ') |
c11191e0 AJ |
66 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
67 | "Whitespace bytes received ahead of method. " << | |
68 | "Ignored due to relaxed_header_parser."); | |
69 | // Be tolerant of prefix spaces (other bytes are valid method values) | |
b749de75 AJ |
70 | while (!buf_.isEmpty() && buf_[0] == ' ') { |
71 | buf_.consume(1); | |
7a4fa6a0 | 72 | } |
c11191e0 AJ |
73 | } |
74 | #endif | |
c11191e0 AJ |
75 | } |
76 | ||
77 | /** | |
78 | * Attempt to parse the first line of a new request message. | |
79 | * | |
80 | * Governed by: | |
81 | * RFC 1945 section 5.1 | |
82 | * RFC 2616 section 5.1 | |
9651320a | 83 | * RFC 7230 |
c11191e0 AJ |
84 | * |
85 | * Parsing state is stored between calls. However the current implementation | |
86 | * begins parsing from scratch on every call. | |
87 | * The return value tells you whether the parsing state fields are valid or not. | |
88 | * | |
89 | * \retval -1 an error occurred. request_parse_status indicates HTTP status result. | |
90 | * \retval 1 successful parse. member fields contain the request-line items | |
91 | * \retval 0 more data is needed to complete the parse | |
92 | */ | |
4c14658e | 93 | int |
678451c0 | 94 | Http::One::RequestParser::parseRequestFirstLine() |
4c14658e AJ |
95 | { |
96 | int second_word = -1; // track the suspected URI start | |
97 | int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte | |
98 | int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence | |
99 | ||
b749de75 AJ |
100 | debugs(74, 5, "parsing possible request: buf.length=" << buf_.length()); |
101 | debugs(74, DBG_DATA, buf_); | |
4c14658e AJ |
102 | |
103 | // Single-pass parse: (provided we have the whole line anyways) | |
104 | ||
7a4fa6a0 | 105 | req.start = 0; |
74f478f8 | 106 | req.end = -1; |
b749de75 | 107 | for (SBuf::size_type i = 0; i < buf_.length(); ++i) { |
4c14658e | 108 | // track first and last whitespace (SP only) |
b749de75 | 109 | if (buf_[i] == ' ') { |
4c14658e | 110 | last_whitespace = i; |
74f478f8 | 111 | if (first_whitespace < req.start) |
4c14658e AJ |
112 | first_whitespace = i; |
113 | } | |
114 | ||
115 | // track next non-SP/non-HT byte after first_whitespace | |
b749de75 | 116 | if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') { |
4c14658e AJ |
117 | second_word = i; |
118 | } | |
119 | ||
120 | // locate line terminator | |
b749de75 | 121 | if (buf_[i] == '\n') { |
74f478f8 | 122 | req.end = i; |
4c14658e AJ |
123 | line_end = i - 1; |
124 | break; | |
125 | } | |
b749de75 | 126 | if (i < buf_.length() - 1 && buf_[i] == '\r') { |
4c14658e | 127 | if (Config.onoff.relaxed_header_parser) { |
b749de75 | 128 | if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r') |
4c14658e AJ |
129 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
130 | "Series of carriage-return bytes received prior to line terminator. " << | |
131 | "Ignored due to relaxed_header_parser."); | |
132 | ||
133 | // Be tolerant of invalid multiple \r prior to terminal \n | |
b749de75 | 134 | if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r') |
4c14658e | 135 | line_end = i - 1; |
b749de75 | 136 | while (i < buf_.length() - 1 && buf_[i + 1] == '\r') |
95dc7ff4 | 137 | ++i; |
4c14658e | 138 | |
b749de75 | 139 | if (buf_[i + 1] == '\n') { |
74f478f8 | 140 | req.end = i + 1; |
4c14658e AJ |
141 | break; |
142 | } | |
143 | } else { | |
b749de75 | 144 | if (buf_[i + 1] == '\n') { |
74f478f8 | 145 | req.end = i + 1; |
4c14658e AJ |
146 | line_end = i - 1; |
147 | break; | |
148 | } | |
149 | } | |
150 | ||
151 | // RFC 2616 section 5.1 | |
152 | // "No CR or LF is allowed except in the final CRLF sequence" | |
955394ce | 153 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
154 | return -1; |
155 | } | |
156 | } | |
016a316b | 157 | |
74f478f8 | 158 | if (req.end == -1) { |
016a316b | 159 | // DoS protection against long first-line |
b749de75 | 160 | if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) { |
016a316b AJ |
161 | debugs(33, 5, "Too large request-line"); |
162 | // XXX: return URL-too-log status code if second_whitespace is not yet found. | |
163 | request_parse_status = Http::scHeaderTooLarge; | |
164 | return -1; | |
165 | } | |
166 | ||
74f478f8 AJ |
167 | debugs(74, 5, "Parser: retval 0: from " << req.start << |
168 | "->" << req.end << ": needs more data to complete first line."); | |
4c14658e AJ |
169 | return 0; |
170 | } | |
171 | ||
172 | // NP: we have now seen EOL, more-data (0) cannot occur. | |
173 | // From here on any failure is -1, success is 1 | |
174 | ||
4c14658e AJ |
175 | // Input Validation: |
176 | ||
016a316b AJ |
177 | // DoS protection against long first-line |
178 | if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) { | |
179 | debugs(33, 5, "Too large request-line"); | |
180 | request_parse_status = Http::scHeaderTooLarge; | |
181 | return -1; | |
182 | } | |
183 | ||
4c14658e AJ |
184 | // Process what we now know about the line structure into field offsets |
185 | // generating HTTP status for any aborts as we go. | |
186 | ||
187 | // First non-whitespace = beginning of method | |
74f478f8 | 188 | if (req.start > line_end) { |
955394ce | 189 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
190 | return -1; |
191 | } | |
74f478f8 | 192 | req.m_start = req.start; |
4c14658e AJ |
193 | |
194 | // First whitespace = end of method | |
74f478f8 | 195 | if (first_whitespace > line_end || first_whitespace < req.start) { |
955394ce | 196 | request_parse_status = Http::scBadRequest; // no method |
4c14658e AJ |
197 | return -1; |
198 | } | |
74f478f8 AJ |
199 | req.m_end = first_whitespace - 1; |
200 | if (req.m_end < req.m_start) { | |
955394ce | 201 | request_parse_status = Http::scBadRequest; // missing URI? |
4c14658e AJ |
202 | return -1; |
203 | } | |
204 | ||
274bd5ad | 205 | /* Set method_ */ |
b749de75 | 206 | const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1); |
7a4fa6a0 | 207 | method_ = HttpRequestMethod(tmp); |
274bd5ad | 208 | |
4c14658e | 209 | // First non-whitespace after first SP = beginning of URL+Version |
74f478f8 | 210 | if (second_word > line_end || second_word < req.start) { |
955394ce | 211 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
212 | return -1; |
213 | } | |
74f478f8 | 214 | req.u_start = second_word; |
4c14658e AJ |
215 | |
216 | // RFC 1945: SP and version following URI are optional, marking version 0.9 | |
217 | // we identify this by the last whitespace being earlier than URI start | |
74f478f8 | 218 | if (last_whitespace < second_word && last_whitespace >= req.start) { |
5aedd08d | 219 | msgProtocol_ = Http::ProtocolVersion(0,9); |
74f478f8 | 220 | req.u_end = line_end; |
b749de75 | 221 | uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1); |
955394ce | 222 | request_parse_status = Http::scOkay; // HTTP/0.9 |
4c14658e AJ |
223 | return 1; |
224 | } else { | |
225 | // otherwise last whitespace is somewhere after end of URI. | |
74f478f8 | 226 | req.u_end = last_whitespace; |
4c14658e | 227 | // crop any trailing whitespace in the area we think of as URI |
b749de75 | 228 | for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end); |
4c14658e | 229 | } |
74f478f8 | 230 | if (req.u_end < req.u_start) { |
955394ce | 231 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
232 | return -1; |
233 | } | |
b749de75 | 234 | uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1); |
4c14658e AJ |
235 | |
236 | // Last whitespace SP = before start of protocol/version | |
237 | if (last_whitespace >= line_end) { | |
955394ce | 238 | request_parse_status = Http::scBadRequest; // missing version |
4c14658e AJ |
239 | return -1; |
240 | } | |
74f478f8 AJ |
241 | req.v_start = last_whitespace + 1; |
242 | req.v_end = line_end; | |
4c14658e | 243 | |
9651320a | 244 | /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */ |
b749de75 | 245 | if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) { |
9651320a | 246 | // non-HTTP/1 protocols not supported / implemented. |
955394ce | 247 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e | 248 | return -1; |
4c14658e | 249 | } |
9651320a | 250 | // NP: magic octets include the protocol name and major version DIGIT. |
5aedd08d | 251 | msgProtocol_.protocol = AnyP::PROTO_HTTP; |
9651320a | 252 | msgProtocol_.major = 1; |
4c14658e | 253 | |
9651320a | 254 | int i = req.v_start + Http1magic.length() -1; |
4c14658e | 255 | |
4c14658e AJ |
256 | // catch missing minor part |
257 | if (++i > line_end) { | |
955394ce | 258 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
259 | return -1; |
260 | } | |
261 | /* next should be one or more digits */ | |
b749de75 | 262 | if (!isdigit(buf_[i])) { |
955394ce | 263 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
264 | return -1; |
265 | } | |
266 | int min = 0; | |
b749de75 | 267 | for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) { |
4c14658e | 268 | min = min * 10; |
b749de75 | 269 | min = min + (buf_[i]) - '0'; |
4c14658e AJ |
270 | } |
271 | // catch too-big values or trailing garbage | |
272 | if (min >= 65536 || i < line_end) { | |
955394ce | 273 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
274 | return -1; |
275 | } | |
5aedd08d | 276 | msgProtocol_.minor = min; |
4c14658e AJ |
277 | |
278 | /* | |
279 | * Rightio - we have all the schtuff. Return true; we've got enough. | |
280 | */ | |
955394ce | 281 | request_parse_status = Http::scOkay; |
4c14658e AJ |
282 | return 1; |
283 | } | |
7a4fa6a0 | 284 | |
87abd755 | 285 | bool |
36a9c964 | 286 | Http::One::RequestParser::parse(const SBuf &aBuf) |
4c14658e | 287 | { |
b749de75 | 288 | buf_ = aBuf; |
36a9c964 AJ |
289 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); |
290 | ||
cbcd99df | 291 | // stage 1: locate the request-line |
36a9c964 | 292 | if (parsingStage_ == HTTP_PARSE_NONE) { |
cbcd99df | 293 | skipGarbageLines(); |
cbcd99df AJ |
294 | |
295 | // if we hit something before EOS treat it as a message | |
b749de75 | 296 | if (!buf_.isEmpty()) |
cbcd99df AJ |
297 | parsingStage_ = HTTP_PARSE_FIRST; |
298 | else | |
f9daf571 | 299 | return false; |
cbcd99df | 300 | } |
c11191e0 | 301 | |
cbcd99df AJ |
302 | // stage 2: parse the request-line |
303 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
f4880526 | 304 | PROF_start(HttpParserParseReqLine); |
678451c0 | 305 | const int retcode = parseRequestFirstLine(); |
e4cff825 AJ |
306 | |
307 | // first-line (or a look-alike) found successfully. | |
308 | if (retcode > 0) { | |
b749de75 | 309 | buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done. |
e4cff825 AJ |
310 | parsingStage_ = HTTP_PARSE_MIME; |
311 | } | |
312 | ||
7a4fa6a0 | 313 | debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end << |
e4cff825 | 314 | " line={" << aBuf.length() << ", data='" << aBuf << "'}"); |
9ff1b8ca | 315 | debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")"); |
5f3cc9a2 | 316 | debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")"); |
f4880526 | 317 | debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")"); |
b749de75 | 318 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); |
f4880526 | 319 | PROF_stop(HttpParserParseReqLine); |
cbcd99df AJ |
320 | |
321 | // syntax errors already | |
f4880526 | 322 | if (retcode < 0) { |
cbcd99df | 323 | parsingStage_ = HTTP_PARSE_DONE; |
f4880526 AJ |
324 | return false; |
325 | } | |
326 | } | |
327 | ||
328 | // stage 3: locate the mime header block | |
cbcd99df | 329 | if (parsingStage_ == HTTP_PARSE_MIME) { |
f4880526 AJ |
330 | // HTTP/1.x request-line is valid and parsing completed. |
331 | if (msgProtocol_.major == 1) { | |
332 | /* NOTE: HTTP/0.9 requests do not have a mime header block. | |
333 | * So the rest of the code will need to deal with '0'-byte headers | |
334 | * (ie, none, so don't try parsing em) | |
335 | */ | |
eb1bd364 | 336 | int64_t mimeHeaderBytes = 0; |
2169fd4d | 337 | // XXX: c_str() reallocates. performance regression. |
b749de75 AJ |
338 | if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) { |
339 | if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) { | |
016a316b AJ |
340 | debugs(33, 5, "Too large request"); |
341 | request_parse_status = Http::scHeaderTooLarge; | |
cbcd99df | 342 | parsingStage_ = HTTP_PARSE_DONE; |
7a4fa6a0 | 343 | } else |
016a316b | 344 | debugs(33, 5, "Incomplete request, waiting for end of headers"); |
7a4fa6a0 | 345 | return false; |
f4880526 | 346 | } |
38012e61 AJ |
347 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); |
348 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); | |
f4880526 | 349 | |
7a4fa6a0 | 350 | } else |
f4880526 | 351 | debugs(33, 3, "Missing HTTP/1.x identifier"); |
7a4fa6a0 | 352 | |
cbcd99df AJ |
353 | // NP: we do not do any further stages here yet so go straight to DONE |
354 | parsingStage_ = HTTP_PARSE_DONE; | |
016a316b AJ |
355 | |
356 | // Squid could handle these headers, but admin does not want to | |
357 | if (messageHeaderSize() >= Config.maxRequestHeaderSize) { | |
358 | debugs(33, 5, "Too large request"); | |
359 | request_parse_status = Http::scHeaderTooLarge; | |
360 | return false; | |
361 | } | |
f4880526 | 362 | } |
87abd755 | 363 | |
36a9c964 | 364 | return !needsMoreData(); |
4c14658e | 365 | } |