]> git.ipfire.org Git - thirdparty/squid.git/blob - src/HttpMsg.cc
Added specific flags for Intel(R)'s icc compiler
[thirdparty/squid.git] / src / HttpMsg.cc
1
2 /*
3 * $Id$
4 *
5 * DEBUG: section 74 HTTP Message
6 * AUTHOR: Alex Rousskov
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "squid.h"
37 #include "HttpMsg.h"
38 #include "MemBuf.h"
39
40 HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
41 cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
42 pstate(psReadyToParseStartLine), lock_count(0)
43 {}
44
45 HttpMsg::~HttpMsg()
46 {
47 assert(lock_count == 0);
48 assert(!body_pipe);
49 }
50
51 HttpMsgParseState &operator++ (HttpMsgParseState &aState)
52 {
53 int tmp = (int)aState;
54 aState = (HttpMsgParseState)(++tmp);
55 return aState;
56 }
57
58 /* find end of headers */
59 int
60 httpMsgIsolateHeaders(const char **parse_start, int l, const char **blk_start, const char **blk_end)
61 {
62 /*
63 * parse_start points to the first line of HTTP message *headers*,
64 * not including the request or status lines
65 */
66 size_t end = headersEnd(*parse_start, l);
67 int nnl;
68
69 if (end) {
70 *blk_start = *parse_start;
71 *blk_end = *parse_start + end - 1;
72 /*
73 * leave blk_end pointing to the first character after the
74 * first newline which terminates the headers
75 */
76 assert(**blk_end == '\n');
77
78 while (*(*blk_end - 1) == '\r')
79 (*blk_end)--;
80
81 assert(*(*blk_end - 1) == '\n');
82
83 *parse_start += end;
84
85 return 1;
86 }
87
88 /*
89 * If we didn't find the end of headers, and parse_start does
90 * NOT point to a CR or NL character, then return failure
91 */
92 if (**parse_start != '\r' && **parse_start != '\n')
93 return 0; /* failure */
94
95 /*
96 * If we didn't find the end of headers, and parse_start does point
97 * to an empty line, then we have empty headers. Skip all CR and
98 * NL characters up to the first NL. Leave parse_start pointing at
99 * the first character after the first NL.
100 */
101 *blk_start = *parse_start;
102
103 *blk_end = *blk_start;
104
105 for (nnl = 0; nnl == 0; (*parse_start)++) {
106 if (**parse_start == '\r')
107 (void) 0;
108 else if (**parse_start == '\n')
109 nnl++;
110 else
111 break;
112 }
113
114 return 1;
115 }
116
117 /* find first CRLF */
118 static int
119 httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
120 {
121 int slen = strcspn(*parse_start, "\r\n");
122
123 if (!(*parse_start)[slen]) /* no CRLF found */
124 return 0;
125
126 *blk_start = *parse_start;
127
128 *blk_end = *blk_start + slen;
129
130 while (**blk_end == '\r') /* CR */
131 (*blk_end)++;
132
133 if (**blk_end == '\n') /* LF */
134 (*blk_end)++;
135
136 *parse_start = *blk_end;
137
138 return 1;
139 }
140
141 // negative return is the negated HTTP_ error code
142 // zero return means need more data
143 // positive return is the size of parsed headers
144 bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error)
145 {
146 assert(error);
147 *error = HTTP_STATUS_NONE;
148
149 // httpMsgParseStep() and debugging require 0-termination, unfortunately
150 buf->terminate(); // does not affect content size
151
152 // find the end of headers
153 const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());
154
155 // sanity check the start line to see if this is in fact an HTTP message
156 if (!sanityCheckStartLine(buf, hdr_len, error)) {
157 // NP: sanityCheck sets *error and sends debug warnings on syntax errors.
158 // if we have seen the connection close, this is an error too
159 if (eof && *error==HTTP_STATUS_NONE)
160 *error = HTTP_INVALID_HEADER;
161
162 return false;
163 }
164
165 // TODO: move to httpReplyParseStep()
166 if (hdr_len > Config.maxReplyHeaderSize || (hdr_len <= 0 && (size_t)buf->contentSize() > Config.maxReplyHeaderSize)) {
167 debugs(58, 1, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
168 *error = HTTP_HEADER_TOO_LARGE;
169 return false;
170 }
171
172 if (hdr_len <= 0) {
173 debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf->content() << "'");
174
175 if (eof) // iff we have seen the end, this is an error
176 *error = HTTP_INVALID_HEADER;
177
178 return false;
179 }
180
181 const int res = httpMsgParseStep(buf->content(), buf->contentSize(), eof);
182
183 if (res < 0) { // error
184 debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf->content() << "'");
185 *error = HTTP_INVALID_HEADER;
186 return false;
187 }
188
189 if (res == 0) {
190 debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf->content() << "'");
191 *error = HTTP_INVALID_HEADER;
192 return false; // but this should not happen due to headersEnd() above
193 }
194
195 assert(res > 0);
196 debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf->content() << "'");
197
198 if (hdr_sz != (int)hdr_len) {
199 debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
200 hdr_sz << " != " << hdr_len);
201 hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
202 }
203
204 return true;
205 }
206
207 /*
208 * parseCharBuf() takes character buffer of HTTP headers (buf),
209 * which may not be NULL-terminated, and fills in an HttpMsg
210 * structure. The parameter 'end' specifies the offset to
211 * the end of the reply headers. The caller may know where the
212 * end is, but is unable to NULL-terminate the buffer. This function
213 * returns true on success.
214 */
215 bool
216 HttpMsg::parseCharBuf(const char *buf, ssize_t end)
217 {
218 MemBuf mb;
219 int success;
220 /* reset current state, because we are not used in incremental fashion */
221 reset();
222 mb.init();
223 mb.append(buf, end);
224 mb.terminate();
225 success = httpMsgParseStep(mb.buf, mb.size, 0);
226 mb.clean();
227 return success == 1;
228 }
229
230 /*
231 * parses a 0-terminating buffer into HttpMsg.
232 * Returns:
233 * 1 -- success
234 * 0 -- need more data (partial parse)
235 * -1 -- parse error
236 */
237 int
238 HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
239 {
240 const char *parse_start = buf;
241 int parse_len = len;
242 const char *blk_start, *blk_end;
243 const char **parse_end_ptr = &blk_end;
244 assert(parse_start);
245 assert(pstate < psParsed);
246
247 *parse_end_ptr = parse_start;
248
249 PROF_start(HttpMsg_httpMsgParseStep);
250
251 if (pstate == psReadyToParseStartLine) {
252 if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
253 PROF_stop(HttpMsg_httpMsgParseStep);
254 return 0;
255 }
256
257 if (!parseFirstLine(blk_start, blk_end)) {
258 PROF_stop(HttpMsg_httpMsgParseStep);
259 return httpMsgParseError();
260 }
261
262 *parse_end_ptr = parse_start;
263
264 hdr_sz = *parse_end_ptr - buf;
265 parse_len = parse_len - hdr_sz;
266
267 ++pstate;
268 }
269
270 /*
271 * XXX This code uses parse_start; but if we're incrementally parsing then
272 * this code might not actually be given parse_start at the right spot (just
273 * after headers.) Grr.
274 */
275 if (pstate == psReadyToParseHeaders) {
276 if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
277 if (atEnd) {
278 blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
279 } else {
280 PROF_stop(HttpMsg_httpMsgParseStep);
281 return 0;
282 }
283 }
284
285 if (!header.parse(blk_start, blk_end)) {
286 PROF_stop(HttpMsg_httpMsgParseStep);
287 return httpMsgParseError();
288 }
289
290 hdrCacheInit();
291
292 *parse_end_ptr = parse_start;
293
294 hdr_sz = *parse_end_ptr - buf;
295
296 ++pstate;
297 }
298
299 PROF_stop(HttpMsg_httpMsgParseStep);
300 return 1;
301 }
302
303 /* handy: resets and returns -1 */
304 int
305 HttpMsg::httpMsgParseError()
306 {
307 reset();
308 return -1;
309 }
310
311 void
312 HttpMsg::setContentLength(int64_t clen)
313 {
314 header.delById(HDR_CONTENT_LENGTH); // if any
315 header.putInt64(HDR_CONTENT_LENGTH, clen);
316 content_length = clen;
317 }
318
319 bool
320 HttpMsg::persistent() const
321 {
322 if (http_ver > HttpVersion(1, 0)) {
323 /*
324 * for modern versions of HTTP: persistent unless there is
325 * a "Connection: close" header.
326 */
327 return !httpHeaderHasConnDir(&header, "close");
328 } else {
329 /* for old versions of HTTP: persistent if has "keep-alive" */
330 return httpHeaderHasConnDir(&header, "keep-alive");
331 }
332 }
333
334 void HttpMsg::packInto(Packer *p, bool full_uri) const
335 {
336 packFirstLineInto(p, full_uri);
337 header.packInto(p);
338 packerAppend(p, "\r\n", 2);
339 }
340
341 void HttpMsg::hdrCacheInit()
342 {
343 content_length = header.getInt64(HDR_CONTENT_LENGTH);
344 assert(NULL == cache_control);
345 cache_control = header.getCc();
346 }
347
348 /*
349 * useful for debugging
350 */
351 void HttpMsg::firstLineBuf(MemBuf& mb)
352 {
353 Packer p;
354 packerToMemInit(&p, &mb);
355 packFirstLineInto(&p, true);
356 packerClean(&p);
357 }
358
359 // use HTTPMSGLOCK() instead of calling this directly
360 HttpMsg *
361 HttpMsg::_lock()
362 {
363 lock_count++;
364 return this;
365 }
366
367 // use HTTPMSGUNLOCK() instead of calling this directly
368 void
369 HttpMsg::_unlock()
370 {
371 assert(lock_count > 0);
372 --lock_count;
373
374 if (0 == lock_count)
375 delete this;
376 }
377
378
379 void
380 HttpParserInit(HttpParser *hdr, const char *buf, int bufsiz)
381 {
382 hdr->state = 1;
383 hdr->request_parse_status = HTTP_STATUS_NONE;
384 hdr->buf = buf;
385 hdr->bufsiz = bufsiz;
386 hdr->req_start = hdr->req_end = -1;
387 hdr->hdr_start = hdr->hdr_end = -1;
388 debugs(74, 5, "httpParseInit: Request buffer is " << buf);
389 hdr->m_start = hdr->m_end = -1;
390 hdr->u_start = hdr->u_end = -1;
391 hdr->v_start = hdr->v_end = -1;
392 hdr->v_maj = hdr->v_min = 0;
393 }
394
395 #if MSGDODEBUG
396 /* XXX This should eventually turn into something inlined or #define'd */
397 int
398 HttpParserReqSz(HttpParser *hp)
399 {
400 assert(hp->state == 1);
401 assert(hp->req_start != -1);
402 assert(hp->req_end != -1);
403 return hp->req_end - hp->req_start + 1;
404 }
405
406
407 /*
408 * This +1 makes it 'right' but won't make any sense if
409 * there's a 0 byte header? This won't happen normally - a valid header
410 * is at -least- a blank line (\n, or \r\n.)
411 */
412 int
413 HttpParserHdrSz(HttpParser *hp)
414 {
415 assert(hp->state == 1);
416 assert(hp->hdr_start != -1);
417 assert(hp->hdr_end != -1);
418 return hp->hdr_end - hp->hdr_start + 1;
419 }
420
421 const char *
422 HttpParserHdrBuf(HttpParser *hp)
423 {
424 assert(hp->state == 1);
425 assert(hp->hdr_start != -1);
426 assert(hp->hdr_end != -1);
427 return hp->buf + hp->hdr_start;
428 }
429
430 int
431 HttpParserRequestLen(HttpParser *hp)
432 {
433 return hp->hdr_end - hp->req_start + 1;
434 }
435 #endif
436
437 int
438 HttpParser::parseRequestFirstLine()
439 {
440 int second_word = -1; // track the suspected URI start
441 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
442 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
443
444 debugs(74, 5, HERE << "parsing possible request: " << buf);
445
446 // Single-pass parse: (provided we have the whole line anyways)
447
448 req_start = 0;
449 if (Config.onoff.relaxed_header_parser) {
450 if (Config.onoff.relaxed_header_parser < 0 && buf[req_start] == ' ')
451 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
452 "Whitespace bytes received ahead of method. " <<
453 "Ignored due to relaxed_header_parser.");
454 // Be tolerant of prefix spaces (other bytes are valid method values)
455 for (; req_start < bufsiz && buf[req_start] == ' '; req_start++);
456 }
457 req_end = -1;
458 for (int i = 0; i < bufsiz; i++) {
459 // track first and last whitespace (SP only)
460 if (buf[i] == ' ') {
461 last_whitespace = i;
462 if (first_whitespace < req_start)
463 first_whitespace = i;
464 }
465
466 // track next non-SP/non-HT byte after first_whitespace
467 if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
468 second_word = i;
469 }
470
471 // locate line terminator
472 if (buf[i] == '\n') {
473 req_end = i;
474 line_end = i - 1;
475 break;
476 }
477 if (i < bufsiz - 1 && buf[i] == '\r') {
478 if (Config.onoff.relaxed_header_parser) {
479 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
480 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
481 "Series of carriage-return bytes received prior to line terminator. " <<
482 "Ignored due to relaxed_header_parser.");
483
484 // Be tolerant of invalid multiple \r prior to terminal \n
485 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
486 line_end = i - 1;
487 while (i < bufsiz - 1 && buf[i + 1] == '\r')
488 i++;
489
490 if (buf[i + 1] == '\n') {
491 req_end = i + 1;
492 break;
493 }
494 } else {
495 if (buf[i + 1] == '\n') {
496 req_end = i + 1;
497 line_end = i - 1;
498 break;
499 }
500 }
501
502 // RFC 2616 section 5.1
503 // "No CR or LF is allowed except in the final CRLF sequence"
504 request_parse_status = HTTP_BAD_REQUEST;
505 return -1;
506 }
507 }
508 if (req_end == -1) {
509 debugs(74, 5, "Parser: retval 0: from " << req_start <<
510 "->" << req_end << ": needs more data to complete first line.");
511 return 0;
512 }
513
514 // NP: we have now seen EOL, more-data (0) cannot occur.
515 // From here on any failure is -1, success is 1
516
517
518 // Input Validation:
519
520 // Process what we now know about the line structure into field offsets
521 // generating HTTP status for any aborts as we go.
522
523 // First non-whitespace = beginning of method
524 if (req_start > line_end) {
525 request_parse_status = HTTP_BAD_REQUEST;
526 return -1;
527 }
528 m_start = req_start;
529
530 // First whitespace = end of method
531 if (first_whitespace > line_end || first_whitespace < req_start) {
532 request_parse_status = HTTP_BAD_REQUEST; // no method
533 return -1;
534 }
535 m_end = first_whitespace - 1;
536 if (m_end < m_start) {
537 request_parse_status = HTTP_BAD_REQUEST; // missing URI?
538 return -1;
539 }
540
541 // First non-whitespace after first SP = beginning of URL+Version
542 if (second_word > line_end || second_word < req_start) {
543 request_parse_status = HTTP_BAD_REQUEST; // missing URI
544 return -1;
545 }
546 u_start = second_word;
547
548 // RFC 1945: SP and version following URI are optional, marking version 0.9
549 // we identify this by the last whitespace being earlier than URI start
550 if (last_whitespace < second_word && last_whitespace >= req_start) {
551 v_maj = 0;
552 v_min = 9;
553 u_end = line_end;
554 request_parse_status = HTTP_OK; // HTTP/0.9
555 return 1;
556 } else {
557 // otherwise last whitespace is somewhere after end of URI.
558 u_end = last_whitespace;
559 // crop any trailing whitespace in the area we think of as URI
560 for (; u_end >= u_start && xisspace(buf[u_end]); u_end--);
561 }
562 if (u_end < u_start) {
563 request_parse_status = HTTP_BAD_REQUEST; // missing URI
564 return -1;
565 }
566
567 // Last whitespace SP = before start of protocol/version
568 if (last_whitespace >= line_end) {
569 request_parse_status = HTTP_BAD_REQUEST; // missing version
570 return -1;
571 }
572 v_start = last_whitespace + 1;
573 v_end = line_end;
574
575 // We only accept HTTP protocol requests right now.
576 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
577 if ((v_end - v_start +1) < 5 || strncasecmp(&buf[v_start], "HTTP/", 5) != 0) {
578 #if USE_HTTP_VIOLATIONS
579 // being lax; old parser accepted strange versions
580 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
581 v_maj = 0;
582 v_min = 9;
583 u_end = line_end;
584 request_parse_status = HTTP_OK; // treat as HTTP/0.9
585 return 1;
586 #else
587 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; // protocol not supported / implemented.
588 return -1;
589 #endif
590 }
591
592 int i = v_start + sizeof("HTTP/") -1;
593
594 /* next should be 1 or more digits */
595 if (!isdigit(buf[i])) {
596 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
597 return -1;
598 }
599 int maj = 0;
600 for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; i++) {
601 maj = maj * 10;
602 maj = maj + (buf[i]) - '0';
603 }
604 // catch too-big values or missing remainders
605 if (maj >= 65536 || i > line_end) {
606 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
607 return -1;
608 }
609 v_maj = maj;
610
611 /* next should be .; we -have- to have this as we have a whole line.. */
612 if (buf[i] != '.') {
613 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
614 return -1;
615 }
616 // catch missing minor part
617 if (++i > line_end) {
618 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
619 return -1;
620 }
621
622 /* next should be one or more digits */
623 if (!isdigit(buf[i])) {
624 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
625 return -1;
626 }
627 int min = 0;
628 for (; i <= line_end && (isdigit(buf[i])) && min < 65536; i++) {
629 min = min * 10;
630 min = min + (buf[i]) - '0';
631 }
632 // catch too-big values or trailing garbage
633 if (min >= 65536 || i < line_end) {
634 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
635 return -1;
636 }
637 v_min = min;
638
639 /*
640 * Rightio - we have all the schtuff. Return true; we've got enough.
641 */
642 request_parse_status = HTTP_OK;
643 return 1;
644 }
645
646 int
647 HttpParserParseReqLine(HttpParser *hmsg)
648 {
649 PROF_start(HttpParserParseReqLine);
650 int retcode = hmsg->parseRequestFirstLine();
651 debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
652 "->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
653 hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
654 "; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << hmsg->v_maj <<
655 "/" << hmsg->v_min << ")");
656 PROF_stop(HttpParserParseReqLine);
657 return retcode;
658 }