]> git.ipfire.org Git - thirdparty/squid.git/blame - src/HttpMsg.cc
- ICAP-unrelated improvements from the squid3-icap branch on SF
[thirdparty/squid.git] / src / HttpMsg.cc
CommitLineData
2246b732 1
2/*
5f8252d2 3 * $Id: HttpMsg.cc,v 1.40 2007/04/06 04:50:04 rousskov Exp $
2246b732 4 *
5 * DEBUG: section 74 HTTP Message
6 * AUTHOR: Alex Rousskov
7 *
2b6662ba 8 * SQUID Web Proxy Cache http://www.squid-cache.org/
e25c139f 9 * ----------------------------------------------------------
2246b732 10 *
2b6662ba 11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
2246b732 19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
cbdec147 32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
e25c139f 33 *
2246b732 34 */
35
36#include "squid.h"
8596962e 37#include "HttpMsg.h"
0eb49b6d 38#include "MemBuf.h"
8596962e 39
40HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
41 cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
4a56ee8d 42 pstate(psReadyToParseStartLine), lock_count(0)
8596962e 43{}
44
4a56ee8d 45HttpMsg::~HttpMsg()
46{
47 assert(lock_count == 0);
5f8252d2 48 assert(!body_pipe);
4a56ee8d 49}
50
8596962e 51HttpMsgParseState &operator++ (HttpMsgParseState &aState)
52{
53 int tmp = (int)aState;
54 aState = (HttpMsgParseState)(++tmp);
55 return aState;
56}
57
2246b732 58/* find end of headers */
59int
666f514b 60httpMsgIsolateHeaders(const char **parse_start, int l, const char **blk_start, const char **blk_end)
2246b732 61{
bdb1a5d5 62 /*
63 * parse_start points to the first line of HTTP message *headers*,
64 * not including the request or status lines
65 */
bdb1a5d5 66 size_t end = headersEnd(*parse_start, l);
67 int nnl;
62e76326 68
2246b732 69 if (end) {
62e76326 70 *blk_start = *parse_start;
71 *blk_end = *parse_start + end - 1;
72 /*
73 * leave blk_end pointing to the first character after the
74 * first newline which terminates the headers
75 */
76 assert(**blk_end == '\n');
77
78 while (*(*blk_end - 1) == '\r')
79 (*blk_end)--;
80
81 assert(*(*blk_end - 1) == '\n');
82
83 *parse_start += end;
84
85 return 1;
2246b732 86 }
62e76326 87
bdb1a5d5 88 /*
89 * If we didn't find the end of headers, and parse_start does
90 * NOT point to a CR or NL character, then return failure
91 */
92 if (**parse_start != '\r' && **parse_start != '\n')
62e76326 93 return 0; /* failure */
94
bdb1a5d5 95 /*
96 * If we didn't find the end of headers, and parse_start does point
97 * to an empty line, then we have empty headers. Skip all CR and
98 * NL characters up to the first NL. Leave parse_start pointing at
99 * the first character after the first NL.
100 */
101 *blk_start = *parse_start;
62e76326 102
bdb1a5d5 103 *blk_end = *blk_start;
62e76326 104
a4295415 105 for (nnl = 0; nnl == 0; (*parse_start)++) {
62e76326 106 if (**parse_start == '\r')
107 (void) 0;
108 else if (**parse_start == '\n')
109 nnl++;
110 else
111 break;
2246b732 112 }
62e76326 113
bdb1a5d5 114 return 1;
2246b732 115}
116
8596962e 117/* find first CRLF */
118static int
119httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
120{
121 int slen = strcspn(*parse_start, "\r\n");
122
123 if (!(*parse_start)[slen]) /* no CRLF found */
124 return 0;
125
126 *blk_start = *parse_start;
127
128 *blk_end = *blk_start + slen;
129
130 while (**blk_end == '\r') /* CR */
131 (*blk_end)++;
132
133 if (**blk_end == '\n') /* LF */
134 (*blk_end)++;
135
136 *parse_start = *blk_end;
137
138 return 1;
139}
140
141// negative return is the negated HTTP_ error code
142// zero return means need more data
143// positive return is the size of parsed headers
144bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error)
145{
146 assert(error);
147 *error = HTTP_STATUS_NONE;
148
149 // httpMsgParseStep() and debugging require 0-termination, unfortunately
150 buf->terminate(); // does not affect content size
151
152 // find the end of headers
153 // TODO: Remove? httpReplyParseStep() should do similar checks
154 const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());
155
156 if (hdr_len <= 0) {
157 debugs(58, 3, "HttpMsg::parse: failed to find end of headers " <<
158 "(eof: " << eof << ") in '" << buf->content() << "'");
159
160 if (eof) // iff we have seen the end, this is an error
161 *error = HTTP_INVALID_HEADER;
162
163 return false;
164 }
165
166 // TODO: move to httpReplyParseStep()
167 if (hdr_len > Config.maxReplyHeaderSize) {
168 debugs(58, 1, "HttpMsg::parse: Too large reply header (" <<
169 hdr_len << " > " << Config.maxReplyHeaderSize);
170 *error = HTTP_HEADER_TOO_LARGE;
171 return false;
172 }
173
7d5c6423 174 if (!sanityCheckStartLine(buf, error)) {
175 debugs(58,1, HERE << "first line of HTTP message is invalid");
176 *error = HTTP_INVALID_HEADER;
8596962e 177 return false;
7d5c6423 178 }
8596962e 179
666f514b 180 const int res = httpMsgParseStep(buf->content(), buf->contentSize(), eof);
8596962e 181
182 if (res < 0) { // error
183 debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers " <<
184 "in '" << buf->content() << "'");
185 *error = HTTP_INVALID_HEADER;
186 return false;
187 }
188
189 if (res == 0) {
190 debugs(58, 2, "HttpMsg::parse: strange, need more data near '" <<
191 buf->content() << "'");
192 return false; // but this should not happen due to headersEnd() above
193 }
194
195 assert(res > 0);
196 debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) " <<
197 "near '" << buf->content() << "'");
198
199 if (hdr_sz != (int)hdr_len) {
200 debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
201 hdr_sz << " != " << hdr_len);
202 hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
203 }
204
205 return true;
206}
207
59eed7dc 208/*
bf9fb8ff 209 * parseCharBuf() takes character buffer of HTTP headers (buf),
59eed7dc 210 * which may not be NULL-terminated, and fills in an HttpMsg
211 * structure. The parameter 'end' specifies the offset to
212 * the end of the reply headers. The caller may know where the
213 * end is, but is unable to NULL-terminate the buffer. This function
214 * returns true on success.
215 */
216bool
217HttpMsg::parseCharBuf(const char *buf, ssize_t end)
218{
219 MemBuf mb;
220 int success;
221 /* reset current state, because we are not used in incremental fashion */
222 reset();
223 mb.init();
224 mb.append(buf, end);
225 mb.terminate();
666f514b 226 success = httpMsgParseStep(mb.buf, mb.size, 0);
59eed7dc 227 mb.clean();
228 return success == 1;
229}
8596962e 230
231/*
232 * parses a 0-terminating buffer into HttpMsg.
233 * Returns:
234 * 1 -- success
235 * 0 -- need more data (partial parse)
236 * -1 -- parse error
237 */
238int
666f514b 239HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
8596962e 240{
241 const char *parse_start = buf;
666f514b 242 int parse_len = len;
8596962e 243 const char *blk_start, *blk_end;
244 const char **parse_end_ptr = &blk_end;
245 assert(parse_start);
246 assert(pstate < psParsed);
9ea37c79 247 int retval;
8596962e 248
249 *parse_end_ptr = parse_start;
250
9ea37c79 251 PROF_start(HttpMsg_httpMsgParseStep);
252
8596962e 253 if (pstate == psReadyToParseStartLine) {
9ea37c79 254 if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
255 retval = 0;
256 goto finish;
257 }
8596962e 258
9ea37c79 259 if (!parseFirstLine(blk_start, blk_end)) {
260 retval = httpMsgParseError();
261 goto finish;
262 }
8596962e 263
264 *parse_end_ptr = parse_start;
265
266 hdr_sz = *parse_end_ptr - buf;
666f514b 267 parse_len = parse_len - hdr_sz;
8596962e 268
269 ++pstate;
270 }
271
666f514b 272 /*
273 * XXX This code uses parse_start; but if we're incrementally parsing then
274 * this code might not actually be given parse_start at the right spot (just
275 * after headers.) Grr.
276 */
8596962e 277 if (pstate == psReadyToParseHeaders) {
666f514b 278 if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
9ea37c79 279 if (atEnd) {
8596962e 280 blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
9ea37c79 281 } else {
282 retval = 0;
283 goto finish;
284 }
8596962e 285 }
286
a9925b40 287 if (!header.parse(blk_start, blk_end))
8596962e 288 return httpMsgParseError();
289
07947ad8 290 hdrCacheInit();
8596962e 291
292 *parse_end_ptr = parse_start;
293
294 hdr_sz = *parse_end_ptr - buf;
295
296 ++pstate;
297 }
9ea37c79 298 retval = 1;
299finish:
300 PROF_stop(HttpMsg_httpMsgParseStep);
301 return retval;
8596962e 302}
303
8596962e 304/* handy: resets and returns -1 */
305int
306HttpMsg::httpMsgParseError()
307{
308 reset();
8596962e 309 return -1;
310}
311
62e76326 312/* returns true if connection should be "persistent"
2246b732 313 * after processing this message */
314int
450e0c10 315httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr)
2246b732 316{
6f3e5833 317#if WHEN_SQUID_IS_HTTP1_1
21b92762 318
bffee5af 319 if ((http_ver.major >= 1) && (http_ver.minor >= 1)) {
62e76326 320 /*
321 * for modern versions of HTTP: persistent unless there is
322 * a "Connection: close" header.
323 */
324 return !httpHeaderHasConnDir(hdr, "close");
8596962e 325 } else
21b92762 326#else
327 {
328#endif
62e76326 329 /*
330 * Persistent connections in Netscape 3.x are allegedly broken,
331 * return false if it is a browser connection. If there is a
332 * VIA header, then we assume this is NOT a browser connection.
333 */
a9925b40 334 const char *agent = hdr->getStr(HDR_USER_AGENT);
62e76326 335
a9925b40 336 if (agent && !hdr->has(HDR_VIA)) {
8596962e 337 if (!strncasecmp(agent, "Mozilla/3.", 10))
338 return 0;
62e76326 339
8596962e 340 if (!strncasecmp(agent, "Netscape/3.", 11))
341 return 0;
2246b732 342 }
8596962e 343
344 /* for old versions of HTTP: persistent if has "keep-alive" */
345 return httpHeaderHasConnDir(hdr, "keep-alive");
346}
2246b732 347}
8596962e 348
349void HttpMsg::packInto(Packer *p, bool full_uri) const
350{
351 packFirstLineInto(p, full_uri);
a9925b40 352 header.packInto(p);
8596962e 353 packerAppend(p, "\r\n", 2);
354}
355
07947ad8 356void HttpMsg::hdrCacheInit()
357{
a9925b40 358 content_length = header.getInt(HDR_CONTENT_LENGTH);
07947ad8 359 assert(NULL == cache_control);
a9925b40 360 cache_control = header.getCc();
07947ad8 361}
3cfc19b3 362
363/*
364 * useful for debugging
365 */
366void HttpMsg::firstLineBuf(MemBuf& mb)
367{
368 Packer p;
369 packerToMemInit(&p, &mb);
370 packFirstLineInto(&p, true);
371 packerClean(&p);
372}
4a56ee8d 373
374HttpMsg *
375
6dd9f4bd 376// use HTTPMSGLOCK() instead of calling this directly
377HttpMsg::_lock()
4a56ee8d 378{
379 lock_count++;
380 return this;
381}
382
6dd9f4bd 383// use HTTPMSGUNLOCK() instead of calling this directly
4a56ee8d 384void
6dd9f4bd 385HttpMsg::_unlock()
4a56ee8d 386{
387 assert(lock_count > 0);
388 --lock_count;
389
390 if (0 == lock_count)
391 delete this;
392}
a5baffba 393
394
395void
396HttpParserInit(HttpParser *hdr, const char *buf, int bufsiz)
397{
398 hdr->state = 1;
399 hdr->buf = buf;
400 hdr->bufsiz = bufsiz;
401 hdr->req_start = hdr->req_end = -1;
402 hdr->hdr_start = hdr->hdr_end = -1;
4c29340e 403 debug(74, 5)("httpParseInit: Request buffer is %s\n", buf);
a5baffba 404}
405
52512f28 406#if MSGDODEBUG
a5baffba 407/* XXX This should eventually turn into something inlined or #define'd */
408int
409HttpParserReqSz(HttpParser *hp)
410{
411 assert(hp->state == 1);
412 assert(hp->req_start != -1);
413 assert(hp->req_end != -1);
414 return hp->req_end - hp->req_start + 1;
415}
416
417
418/*
419 * This +1 makes it 'right' but won't make any sense if
420 * there's a 0 byte header? This won't happen normally - a valid header
421 * is at -least- a blank line (\n, or \r\n.)
422 */
423int
424HttpParserHdrSz(HttpParser *hp)
425{
426 assert(hp->state == 1);
427 assert(hp->hdr_start != -1);
428 assert(hp->hdr_end != -1);
429 return hp->hdr_end - hp->hdr_start + 1;
430}
431
432const char *
433HttpParserHdrBuf(HttpParser *hp)
434{
435 assert(hp->state == 1);
436 assert(hp->hdr_start != -1);
437 assert(hp->hdr_end != -1);
438 return hp->buf + hp->hdr_start;
439}
440
441int
442HttpParserRequestLen(HttpParser *hp)
443{
444 return hp->hdr_end - hp->req_start + 1;
445}
52512f28 446#endif
a5baffba 447
84cc2635 448/*
449 * Attempt to parse the request line.
450 *
451 * This will set the values in hmsg that it determines. One may end up
452 * with a partially-parsed buffer; the return value tells you whether
453 * the values are valid or not.
454 *
455 * @return 1 if parsed correctly, 0 if more is needed, -1 if error
456 *
457 * TODO:
458 * * have it indicate "error" and "not enough" as two separate conditions!
459 * * audit this code as off-by-one errors are probably everywhere!
460 */
461int
462HttpParserParseReqLine(HttpParser *hmsg)
463{
464 int i = 0;
465 int retcode = 0;
466 int maj = -1, min = -1;
467 int last_whitespace = -1, line_end = -1;
468
4c29340e 469 debug(74, 5)("httpParserParseReqLine: parsing %s\n", hmsg->buf);
470
52512f28 471 PROF_start(HttpParserParseReqLine);
84cc2635 472 /* Find \r\n - end of URL+Version (and the request) */
192333af 473 hmsg->req_end = -1;
84cc2635 474 for (i = 0; i < hmsg->bufsiz; i++) {
475 if (hmsg->buf[i] == '\n') {
882c6885 476 hmsg->req_end = i;
84cc2635 477 break;
478 }
192333af 479 if (i < hmsg->bufsiz - 1 && hmsg->buf[i] == '\r' && hmsg->buf[i + 1] == '\n') {
882c6885 480 hmsg->req_end = i + 1;
84cc2635 481 break;
482 }
483 }
192333af 484 if (hmsg->req_end == -1) {
84cc2635 485 retcode = 0;
486 goto finish;
487 }
882c6885 488 assert(hmsg->buf[hmsg->req_end] == '\n');
192333af 489 /* Start at the beginning again */
84cc2635 490 i = 0;
491
492 /* Find first non-whitespace - beginning of method */
493 for (; i < hmsg->req_end && (isspace(hmsg->buf[i])); i++);
494 if (i >= hmsg->req_end) {
495 retcode = 0;
496 goto finish;
497 }
498 hmsg->m_start = i;
499 hmsg->req_start = i;
500
501 /* Find first whitespace - end of method */
502 for (; i < hmsg->req_end && (! isspace(hmsg->buf[i])); i++);
503 if (i >= hmsg->req_end) {
504 retcode = 0;
505 goto finish;
506 }
507 hmsg->m_end = i - 1;
508
509 /* Find first non-whitespace - beginning of URL+Version */
510 for (; i < hmsg->req_end && (isspace(hmsg->buf[i])); i++);
511 if (i >= hmsg->req_end) {
512 retcode = 0;
513 goto finish;
514 }
515 hmsg->u_start = i;
516
517 /* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */
518 for (; i <= hmsg->req_end; i++) {
519 /* If \n - its end of line */
520 if (hmsg->buf[i] == '\n') {
521 line_end = i;
522 break;
523 }
524 /* XXX could be off-by-one wrong! */
525 if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') {
526 line_end = i;
527 break;
528 }
529 /* If its a whitespace, note it as it'll delimit our version */
530 if (hmsg->buf[i] == ' ' || hmsg->buf[i] == '\t') {
531 last_whitespace = i;
532 }
533 }
534 if (i > hmsg->req_end) {
535 retcode = 0;
536 goto finish;
537 }
538
539 /* At this point we don't need the 'i' value; so we'll recycle it for version parsing */
540
541 /*
542 * At this point: line_end points to the first eol char (\r or \n);
543 * last_whitespace points to the last whitespace char in the URL.
544 * We know we have a full buffer here!
545 */
546 if (last_whitespace == -1) {
547 maj = 0; min = 9;
548 hmsg->u_end = line_end - 1;
549 assert(hmsg->u_end >= hmsg->u_start);
550 } else {
551 /* Find the first non-whitespace after last_whitespace */
552 /* XXX why <= vs < ? I do need to really re-audit all of this ..*/
553 for (i = last_whitespace; i <= hmsg->req_end && isspace(hmsg->buf[i]); i++);
554 if (i > hmsg->req_end) {
555 retcode = 0;
556 goto finish;
557 }
558
559 /* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */
560 if (i + 5 >= hmsg->req_end || (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) {
561 maj = 0; min = 9;
562 hmsg->u_end = line_end - 1;
563 assert(hmsg->u_end >= hmsg->u_start);
564 } else {
565 /* Ok, lets try parsing! Yes, this needs refactoring! */
566 hmsg->v_start = i;
567 i += 5;
568
569 /* next should be 1 or more digits */
570 maj = 0;
571 for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) {
572 maj = maj * 10;
573 maj = maj + (hmsg->buf[i]) - '0';
574 }
575 if (i >= hmsg->req_end) {
576 retcode = 0;
577 goto finish;
578 }
579
580 /* next should be .; we -have- to have this as we have a whole line.. */
581 if (hmsg->buf[i] != '.') {
582 retcode = 0;
583 goto finish;
584 }
585 if (i + 1 >= hmsg->req_end) {
586 retcode = 0;
587 goto finish;
588 }
589
590 /* next should be one or more digits */
591 i++;
592 min = 0;
593 for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) {
594 min = min * 10;
595 min = min + (hmsg->buf[i]) - '0';
596 }
597
598 /* Find whitespace, end of version */
599 hmsg->v_end = i;
600 hmsg->u_end = last_whitespace - 1;
601 }
602 }
603
604 /*
605 * Rightio - we have all the schtuff. Return true; we've got enough.
606 */
607 retcode = 1;
aa9aaa76 608 assert(maj != -1);
609 assert(min != -1);
84cc2635 610
611finish:
612 hmsg->v_maj = maj;
613 hmsg->v_min = min;
52512f28 614 PROF_stop(HttpParserParseReqLine);
4c29340e 615 debug(74, 5) ("Parser: retval %d: from %d->%d: method %d->%d; url %d->%d; version %d->%d (%d/%d)\n",
84cc2635 616 retcode, hmsg->req_start, hmsg->req_end,
617 hmsg->m_start, hmsg->m_end,
618 hmsg->u_start, hmsg->u_end,
619 hmsg->v_start, hmsg->v_end, maj, min);
620 return retcode;
621}
622