src/HttpMsg.cc

   1
   2 /*
   3  * $Id$
   4  *
   5  * DEBUG: section 74    HTTP Message
   6  * AUTHOR: Alex Rousskov
   7  *
   8  * SQUID Web Proxy Cache          http://www.squid-cache.org/
   9  * ----------------------------------------------------------
  10  *
  11  *  Squid is the result of efforts by numerous individuals from
  12  *  the Internet community; see the CONTRIBUTORS file for full
  13  *  details.   Many organizations have provided support for Squid's
  14  *  development; see the SPONSORS file for full details.  Squid is
  15  *  Copyrighted (C) 2001 by the Regents of the University of
  16  *  California; see the COPYRIGHT file for full details.  Squid
  17  *  incorporates software developed and/or copyrighted by other
  18  *  sources; see the CREDITS file for full details.
  19  *
  20  *  This program is free software; you can redistribute it and/or modify
  21  *  it under the terms of the GNU General Public License as published by
  22  *  the Free Software Foundation; either version 2 of the License, or
  23  *  (at your option) any later version.
  24  *
  25  *  This program is distributed in the hope that it will be useful,
  26  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  27  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  28  *  GNU General Public License for more details.
  29  *
  30  *  You should have received a copy of the GNU General Public License
  31  *  along with this program; if not, write to the Free Software
  32  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  33  *
  34  */
  35
  36 #include "squid.h"
  37 #include "HttpMsg.h"
  38 #include "MemBuf.h"
  39
  40 HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
  41         cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
  42         pstate(psReadyToParseStartLine), lock_count(0)
  43 {}
  44
  45 HttpMsg::~HttpMsg()
  46 {
  47     assert(lock_count == 0);
  48     assert(!body_pipe);
  49 }
  50
  51 HttpMsgParseState &operator++ (HttpMsgParseState &aState)
  52 {
  53     int tmp = (int)aState;
  54     aState = (HttpMsgParseState)(++tmp);
  55     return aState;
  56 }
  57
  58 /* find end of headers */
  59 int
  60 httpMsgIsolateHeaders(const char **parse_start, int l, const char **blk_start, const char **blk_end)
  61 {
  62     /*
  63      * parse_start points to the first line of HTTP message *headers*,
  64      * not including the request or status lines
  65      */
  66     size_t end = headersEnd(*parse_start, l);
  67     int nnl;
  68
  69     if (end) {
  70         *blk_start = *parse_start;
  71         *blk_end = *parse_start + end - 1;
  72         /*
  73          * leave blk_end pointing to the first character after the
  74          * first newline which terminates the headers
  75          */
  76         assert(**blk_end == '\n');
  77
  78         while (*(*blk_end - 1) == '\r')
  79             (*blk_end)--;
  80
  81         assert(*(*blk_end - 1) == '\n');
  82
  83         *parse_start += end;
  84
  85         return 1;
  86     }
  87
  88     /*
  89      * If we didn't find the end of headers, and parse_start does
  90      * NOT point to a CR or NL character, then return failure
  91      */
  92     if (**parse_start != '\r' && **parse_start != '\n')
  93         return 0;               /* failure */
  94
  95     /*
  96      * If we didn't find the end of headers, and parse_start does point
  97      * to an empty line, then we have empty headers.  Skip all CR and
  98      * NL characters up to the first NL.  Leave parse_start pointing at
  99      * the first character after the first NL.
 100      */
 101     *blk_start = *parse_start;
 102
 103     *blk_end = *blk_start;
 104
 105     for (nnl = 0; nnl == 0; (*parse_start)++) {
 106         if (**parse_start == '\r')
 107             (void) 0;
 108         else if (**parse_start == '\n')
 109             nnl++;
 110         else
 111             break;
 112     }
 113
 114     return 1;
 115 }
 116
 117 /* find first CRLF */
 118 static int
 119 httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
 120 {
 121     int slen = strcspn(*parse_start, "\r\n");
 122
 123     if (!(*parse_start)[slen])  /* no CRLF found */
 124         return 0;
 125
 126     *blk_start = *parse_start;
 127
 128     *blk_end = *blk_start + slen;
 129
 130     while (**blk_end == '\r')   /* CR */
 131         (*blk_end)++;
 132
 133     if (**blk_end == '\n')      /* LF */
 134         (*blk_end)++;
 135
 136     *parse_start = *blk_end;
 137
 138     return 1;
 139 }
 140
 141 // negative return is the negated HTTP_ error code
 142 // zero return means need more data
 143 // positive return is the size of parsed headers
 144 bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error)
 145 {
 146     assert(error);
 147     *error = HTTP_STATUS_NONE;
 148
 149     // httpMsgParseStep() and debugging require 0-termination, unfortunately
 150     buf->terminate(); // does not affect content size
 151
 152     // find the end of headers
 153     const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());
 154
 155     // sanity check the start line to see if this is in fact an HTTP message
 156     if (!sanityCheckStartLine(buf, hdr_len, error)) {
 157         // NP: sanityCheck sets *error and sends debug warnings on syntax errors.
 158         // if we have seen the connection close, this is an error too
 159         if (eof && *error==HTTP_STATUS_NONE)
 160             *error = HTTP_INVALID_HEADER;
 161
 162         return false;
 163     }
 164
 165     // TODO: move to httpReplyParseStep()
 166     if (hdr_len > Config.maxReplyHeaderSize || (hdr_len <= 0 && (size_t)buf->contentSize() > Config.maxReplyHeaderSize)) {
 167         debugs(58, 1, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
 168         *error = HTTP_HEADER_TOO_LARGE;
 169         return false;
 170     }
 171
 172     if (hdr_len <= 0) {
 173         debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf->content() << "'");
 174
 175         if (eof) // iff we have seen the end, this is an error
 176             *error = HTTP_INVALID_HEADER;
 177
 178         return false;
 179     }
 180
 181     const int res = httpMsgParseStep(buf->content(), buf->contentSize(), eof);
 182
 183     if (res < 0) { // error
 184         debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf->content() << "'");
 185         *error = HTTP_INVALID_HEADER;
 186         return false;
 187     }
 188
 189     if (res == 0) {
 190         debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf->content() << "'");
 191         *error = HTTP_INVALID_HEADER;
 192         return false; // but this should not happen due to headersEnd() above
 193     }
 194
 195     assert(res > 0);
 196     debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf->content() << "'");
 197
 198     if (hdr_sz != (int)hdr_len) {
 199         debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
 200                hdr_sz << " != " << hdr_len);
 201         hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
 202     }
 203
 204     return true;
 205 }
 206
 207 /*
 208  * parseCharBuf() takes character buffer of HTTP headers (buf),
 209  * which may not be NULL-terminated, and fills in an HttpMsg
 210  * structure.  The parameter 'end' specifies the offset to
 211  * the end of the reply headers.  The caller may know where the
 212  * end is, but is unable to NULL-terminate the buffer.  This function
 213  * returns true on success.
 214  */
 215 bool
 216 HttpMsg::parseCharBuf(const char *buf, ssize_t end)
 217 {
 218     MemBuf mb;
 219     int success;
 220     /* reset current state, because we are not used in incremental fashion */
 221     reset();
 222     mb.init();
 223     mb.append(buf, end);
 224     mb.terminate();
 225     success = httpMsgParseStep(mb.buf, mb.size, 0);
 226     mb.clean();
 227     return success == 1;
 228 }
 229
 230 /*
 231  * parses a 0-terminating buffer into HttpMsg.
 232  * Returns:
 233  *      1 -- success
 234  *       0 -- need more data (partial parse)
 235  *      -1 -- parse error
 236  */
 237 int
 238 HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
 239 {
 240     const char *parse_start = buf;
 241     int parse_len = len;
 242     const char *blk_start, *blk_end;
 243     const char **parse_end_ptr = &blk_end;
 244     assert(parse_start);
 245     assert(pstate < psParsed);
 246
 247     *parse_end_ptr = parse_start;
 248
 249     PROF_start(HttpMsg_httpMsgParseStep);
 250
 251     if (pstate == psReadyToParseStartLine) {
 252         if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
 253             PROF_stop(HttpMsg_httpMsgParseStep);
 254             return 0;
 255         }
 256
 257         if (!parseFirstLine(blk_start, blk_end)) {
 258             PROF_stop(HttpMsg_httpMsgParseStep);
 259             return httpMsgParseError();
 260         }
 261
 262         *parse_end_ptr = parse_start;
 263
 264         hdr_sz = *parse_end_ptr - buf;
 265         parse_len = parse_len - hdr_sz;
 266
 267         ++pstate;
 268     }
 269
 270     /*
 271      * XXX This code uses parse_start; but if we're incrementally parsing then
 272      * this code might not actually be given parse_start at the right spot (just
 273      * after headers.) Grr.
 274      */
 275     if (pstate == psReadyToParseHeaders) {
 276         if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
 277             if (atEnd) {
 278                 blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
 279             } else {
 280                 PROF_stop(HttpMsg_httpMsgParseStep);
 281                 return 0;
 282             }
 283         }
 284
 285         if (!header.parse(blk_start, blk_end)) {
 286             PROF_stop(HttpMsg_httpMsgParseStep);
 287             return httpMsgParseError();
 288         }
 289
 290         hdrCacheInit();
 291
 292         *parse_end_ptr = parse_start;
 293
 294         hdr_sz = *parse_end_ptr - buf;
 295
 296         ++pstate;
 297     }
 298
 299     PROF_stop(HttpMsg_httpMsgParseStep);
 300     return 1;
 301 }
 302
 303 /* handy: resets and returns -1 */
 304 int
 305 HttpMsg::httpMsgParseError()
 306 {
 307     reset();
 308     return -1;
 309 }
 310
 311 void
 312 HttpMsg::setContentLength(int64_t clen)
 313 {
 314     header.delById(HDR_CONTENT_LENGTH); // if any
 315     header.putInt64(HDR_CONTENT_LENGTH, clen);
 316     content_length = clen;
 317 }
 318
 319 bool
 320 HttpMsg::persistent() const
 321 {
 322     if (http_ver > HttpVersion(1, 0)) {
 323         /*
 324          * for modern versions of HTTP: persistent unless there is
 325          * a "Connection: close" header.
 326          */
 327         return !httpHeaderHasConnDir(&header, "close");
 328     } else {
 329         /* for old versions of HTTP: persistent if has "keep-alive" */
 330         return httpHeaderHasConnDir(&header, "keep-alive");
 331     }
 332 }
 333
 334 void HttpMsg::packInto(Packer *p, bool full_uri) const
 335 {
 336     packFirstLineInto(p, full_uri);
 337     header.packInto(p);
 338     packerAppend(p, "\r\n", 2);
 339 }
 340
 341 void HttpMsg::hdrCacheInit()
 342 {
 343     content_length = header.getInt64(HDR_CONTENT_LENGTH);
 344     assert(NULL == cache_control);
 345     cache_control = header.getCc();
 346 }
 347
 348 /*
 349  * useful for debugging
 350  */
 351 void HttpMsg::firstLineBuf(MemBuf& mb)
 352 {
 353     Packer p;
 354     packerToMemInit(&p, &mb);
 355     packFirstLineInto(&p, true);
 356     packerClean(&p);
 357 }
 358
 359 // use HTTPMSGLOCK() instead of calling this directly
 360 HttpMsg *
 361 HttpMsg::_lock()
 362 {
 363     lock_count++;
 364     return this;
 365 }
 366
 367 // use HTTPMSGUNLOCK() instead of calling this directly
 368 void
 369 HttpMsg::_unlock()
 370 {
 371     assert(lock_count > 0);
 372     --lock_count;
 373
 374     if (0 == lock_count)
 375         delete this;
 376 }
 377
 378
 379 void
 380 HttpParserInit(HttpParser *hdr, const char *buf, int bufsiz)
 381 {
 382     hdr->state = 1;
 383     hdr->request_parse_status = HTTP_STATUS_NONE;
 384     hdr->buf = buf;
 385     hdr->bufsiz = bufsiz;
 386     hdr->req_start = hdr->req_end = -1;
 387     hdr->hdr_start = hdr->hdr_end = -1;
 388     debugs(74, 5, "httpParseInit: Request buffer is " << buf);
 389     hdr->m_start = hdr->m_end = -1;
 390     hdr->u_start = hdr->u_end = -1;
 391     hdr->v_start = hdr->v_end = -1;
 392     hdr->v_maj = hdr->v_min = 0;
 393 }
 394
 395 #if MSGDODEBUG
 396 /* XXX This should eventually turn into something inlined or #define'd */
 397 int
 398 HttpParserReqSz(HttpParser *hp)
 399 {
 400     assert(hp->state == 1);
 401     assert(hp->req_start != -1);
 402     assert(hp->req_end != -1);
 403     return hp->req_end - hp->req_start + 1;
 404 }
 405
 406
 407 /*
 408  * This +1 makes it 'right' but won't make any sense if
 409  * there's a 0 byte header? This won't happen normally - a valid header
 410  * is at -least- a blank line (\n, or \r\n.)
 411  */
 412 int
 413 HttpParserHdrSz(HttpParser *hp)
 414 {
 415     assert(hp->state == 1);
 416     assert(hp->hdr_start != -1);
 417     assert(hp->hdr_end != -1);
 418     return hp->hdr_end - hp->hdr_start + 1;
 419 }
 420
 421 const char *
 422 HttpParserHdrBuf(HttpParser *hp)
 423 {
 424     assert(hp->state == 1);
 425     assert(hp->hdr_start != -1);
 426     assert(hp->hdr_end != -1);
 427     return hp->buf + hp->hdr_start;
 428 }
 429
 430 int
 431 HttpParserRequestLen(HttpParser *hp)
 432 {
 433     return hp->hdr_end - hp->req_start + 1;
 434 }
 435 #endif
 436
 437 int
 438 HttpParser::parseRequestFirstLine()
 439 {
 440     int second_word = -1; // track the suspected URI start
 441     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
 442     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
 443
 444     debugs(74, 5, HERE << "parsing possible request: " << buf);
 445
 446     // Single-pass parse: (provided we have the whole line anyways)
 447
 448     req_start = 0;
 449     if (Config.onoff.relaxed_header_parser) {
 450         if (Config.onoff.relaxed_header_parser < 0 && buf[req_start] == ' ')
 451             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 452                    "Whitespace bytes received ahead of method. " <<
 453                    "Ignored due to relaxed_header_parser.");
 454         // Be tolerant of prefix spaces (other bytes are valid method values)
 455         for (; req_start < bufsiz && buf[req_start] == ' '; req_start++);
 456     }
 457     req_end = -1;
 458     for (int i = 0; i < bufsiz; i++) {
 459         // track first and last whitespace (SP only)
 460         if (buf[i] == ' ') {
 461             last_whitespace = i;
 462             if (first_whitespace < req_start)
 463                 first_whitespace = i;
 464         }
 465
 466         // track next non-SP/non-HT byte after first_whitespace
 467         if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
 468             second_word = i;
 469         }
 470
 471         // locate line terminator
 472         if (buf[i] == '\n') {
 473             req_end = i;
 474             line_end = i - 1;
 475             break;
 476         }
 477         if (i < bufsiz - 1 && buf[i] == '\r') {
 478             if (Config.onoff.relaxed_header_parser) {
 479                 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
 480                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 481                            "Series of carriage-return bytes received prior to line terminator. " <<
 482                            "Ignored due to relaxed_header_parser.");
 483
 484                 // Be tolerant of invalid multiple \r prior to terminal \n
 485                 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
 486                     line_end = i - 1;
 487                 while (i < bufsiz - 1 && buf[i + 1] == '\r')
 488                     i++;
 489
 490                 if (buf[i + 1] == '\n') {
 491                     req_end = i + 1;
 492                     break;
 493                 }
 494             } else {
 495                 if (buf[i + 1] == '\n') {
 496                     req_end = i + 1;
 497                     line_end = i - 1;
 498                     break;
 499                 }
 500             }
 501
 502             // RFC 2616 section 5.1
 503             // "No CR or LF is allowed except in the final CRLF sequence"
 504             request_parse_status = HTTP_BAD_REQUEST;
 505             return -1;
 506         }
 507     }
 508     if (req_end == -1) {
 509         debugs(74, 5, "Parser: retval 0: from " << req_start <<
 510                "->" << req_end << ": needs more data to complete first line.");
 511         return 0;
 512     }
 513
 514     // NP: we have now seen EOL, more-data (0) cannot occur.
 515     //     From here on any failure is -1, success is 1
 516
 517
 518     // Input Validation:
 519
 520     // Process what we now know about the line structure into field offsets
 521     // generating HTTP status for any aborts as we go.
 522
 523     // First non-whitespace = beginning of method
 524     if (req_start > line_end) {
 525         request_parse_status = HTTP_BAD_REQUEST;
 526         return -1;
 527     }
 528     m_start = req_start;
 529
 530     // First whitespace = end of method
 531     if (first_whitespace > line_end || first_whitespace < req_start) {
 532         request_parse_status = HTTP_BAD_REQUEST; // no method
 533         return -1;
 534     }
 535     m_end = first_whitespace - 1;
 536     if (m_end < m_start) {
 537         request_parse_status = HTTP_BAD_REQUEST; // missing URI?
 538         return -1;
 539     }
 540
 541     // First non-whitespace after first SP = beginning of URL+Version
 542     if (second_word > line_end || second_word < req_start) {
 543         request_parse_status = HTTP_BAD_REQUEST; // missing URI
 544         return -1;
 545     }
 546     u_start = second_word;
 547
 548     // RFC 1945: SP and version following URI are optional, marking version 0.9
 549     // we identify this by the last whitespace being earlier than URI start
 550     if (last_whitespace < second_word && last_whitespace >= req_start) {
 551         v_maj = 0;
 552         v_min = 9;
 553         u_end = line_end;
 554         request_parse_status = HTTP_OK; // HTTP/0.9
 555         return 1;
 556     } else {
 557         // otherwise last whitespace is somewhere after end of URI.
 558         u_end = last_whitespace;
 559         // crop any trailing whitespace in the area we think of as URI
 560         for (; u_end >= u_start && xisspace(buf[u_end]); u_end--);
 561     }
 562     if (u_end < u_start) {
 563         request_parse_status = HTTP_BAD_REQUEST; // missing URI
 564         return -1;
 565     }
 566
 567     // Last whitespace SP = before start of protocol/version
 568     if (last_whitespace >= line_end) {
 569         request_parse_status = HTTP_BAD_REQUEST; // missing version
 570         return -1;
 571     }
 572     v_start = last_whitespace + 1;
 573     v_end = line_end;
 574
 575     // We only accept HTTP protocol requests right now.
 576     // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
 577     if ((v_end - v_start +1) < 5 || strncasecmp(&buf[v_start], "HTTP/", 5) != 0) {
 578 #if USE_HTTP_VIOLATIONS
 579         // being lax; old parser accepted strange versions
 580         // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
 581         v_maj = 0;
 582         v_min = 9;
 583         u_end = line_end;
 584         request_parse_status = HTTP_OK; // treat as HTTP/0.9
 585         return 1;
 586 #else
 587         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; // protocol not supported / implemented.
 588         return -1;
 589 #endif
 590     }
 591
 592     int i = v_start + sizeof("HTTP/") -1;
 593
 594     /* next should be 1 or more digits */
 595     if (!isdigit(buf[i])) {
 596         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 597         return -1;
 598     }
 599     int maj = 0;
 600     for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; i++) {
 601         maj = maj * 10;
 602         maj = maj + (buf[i]) - '0';
 603     }
 604     // catch too-big values or missing remainders
 605     if (maj >= 65536 || i > line_end) {
 606         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 607         return -1;
 608     }
 609     v_maj = maj;
 610
 611     /* next should be .; we -have- to have this as we have a whole line.. */
 612     if (buf[i] != '.') {
 613         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 614         return -1;
 615     }
 616     // catch missing minor part
 617     if (++i > line_end) {
 618         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 619         return -1;
 620     }
 621
 622     /* next should be one or more digits */
 623     if (!isdigit(buf[i])) {
 624         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 625         return -1;
 626     }
 627     int min = 0;
 628     for (; i <= line_end && (isdigit(buf[i])) && min < 65536; i++) {
 629         min = min * 10;
 630         min = min + (buf[i]) - '0';
 631     }
 632     // catch too-big values or trailing garbage
 633     if (min >= 65536 || i < line_end) {
 634         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 635         return -1;
 636     }
 637     v_min = min;
 638
 639     /*
 640      * Rightio - we have all the schtuff. Return true; we've got enough.
 641      */
 642     request_parse_status = HTTP_OK;
 643     return 1;
 644 }
 645
 646 int
 647 HttpParserParseReqLine(HttpParser *hmsg)
 648 {
 649     PROF_start(HttpParserParseReqLine);
 650     int retcode = hmsg->parseRequestFirstLine();
 651     debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
 652            "->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
 653            hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
 654            "; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << hmsg->v_maj <<
 655            "/" << hmsg->v_min << ")");
 656     PROF_stop(HttpParserParseReqLine);
 657     return retcode;
 658 }