[thirdparty/squid.git] / src / HttpMsg.cc


/*
 * $Id$
 *
 * DEBUG: section 74    HTTP Message
 * AUTHOR: Alex Rousskov
 *
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
 * ----------------------------------------------------------
 *
 *  Squid is the result of efforts by numerous individuals from
 *  the Internet community; see the CONTRIBUTORS file for full
 *  details.   Many organizations have provided support for Squid's
 *  development; see the SPONSORS file for full details.  Squid is
 *  Copyrighted (C) 2001 by the Regents of the University of
 *  California; see the COPYRIGHT file for full details.  Squid
 *  incorporates software developed and/or copyrighted by other
 *  sources; see the CREDITS file for full details.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 */

#include "squid.h"
#include "HttpMsg.h"
#include "MemBuf.h"

HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
        cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
        pstate(psReadyToParseStartLine), lock_count(0)
{}

HttpMsg::~HttpMsg()
{
    assert(lock_count == 0);
    assert(!body_pipe);
}

HttpMsgParseState &operator++ (HttpMsgParseState &aState)
{
    int tmp = (int)aState;
    aState = (HttpMsgParseState)(++tmp);
    return aState;
}

/* find end of headers */
int
httpMsgIsolateHeaders(const char **parse_start, int l, const char **blk_start, const char **blk_end)
{
    /*
     * parse_start points to the first line of HTTP message *headers*,
     * not including the request or status lines
     */
    size_t end = headersEnd(*parse_start, l);
    int nnl;

    if (end) {
        *blk_start = *parse_start;
        *blk_end = *parse_start + end - 1;
        /*
         * leave blk_end pointing to the first character after the
         * first newline which terminates the headers
         */
        assert(**blk_end == '\n');

        while (*(*blk_end - 1) == '\r')
            (*blk_end)--;

        assert(*(*blk_end - 1) == '\n');

        *parse_start += end;

        return 1;
    }

    /*
     * If we didn't find the end of headers, and parse_start does
     * NOT point to a CR or NL character, then return failure
     */
    if (**parse_start != '\r' && **parse_start != '\n')
        return 0;		/* failure */

    /*
     * If we didn't find the end of headers, and parse_start does point
     * to an empty line, then we have empty headers.  Skip all CR and
     * NL characters up to the first NL.  Leave parse_start pointing at
     * the first character after the first NL.
     */
    *blk_start = *parse_start;

    *blk_end = *blk_start;

    for (nnl = 0; nnl == 0; (*parse_start)++) {
        if (**parse_start == '\r')
            (void) 0;
        else if (**parse_start == '\n')
            nnl++;
        else
            break;
    }

    return 1;
}

/* find first CRLF */
static int
httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
{
    int slen = strcspn(*parse_start, "\r\n");

    if (!(*parse_start)[slen])  /* no CRLF found */
        return 0;

    *blk_start = *parse_start;

    *blk_end = *blk_start + slen;

    while (**blk_end == '\r')   /* CR */
        (*blk_end)++;

    if (**blk_end == '\n')      /* LF */
        (*blk_end)++;

    *parse_start = *blk_end;

    return 1;
}

// negative return is the negated HTTP_ error code
// zero return means need more data
// positive return is the size of parsed headers
bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error)
{
    assert(error);
    *error = HTTP_STATUS_NONE;

    // httpMsgParseStep() and debugging require 0-termination, unfortunately
    buf->terminate(); // does not affect content size

    // find the end of headers
    const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());

    // sanity check the start line to see if this is in fact an HTTP message
    if (!sanityCheckStartLine(buf, hdr_len, error)) {
        // NP: sanityCheck sets *error and sends debug warnings.
        return false;
    }

    // TODO: move to httpReplyParseStep()
    if (hdr_len > Config.maxReplyHeaderSize || (hdr_len <= 0 && (size_t)buf->contentSize() > Config.maxReplyHeaderSize)) {
        debugs(58, 1, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
        *error = HTTP_HEADER_TOO_LARGE;
        return false;
    }

    if (hdr_len <= 0) {
        debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf->content() << "'");

        if (eof) // iff we have seen the end, this is an error
            *error = HTTP_INVALID_HEADER;

        return false;
    }

    const int res = httpMsgParseStep(buf->content(), buf->contentSize(), eof);

    if (res < 0) { // error
        debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf->content() << "'");
        *error = HTTP_INVALID_HEADER;
        return false;
    }

    if (res == 0) {
        debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf->content() << "'");
        *error = HTTP_INVALID_HEADER;
        return false; // but this should not happen due to headersEnd() above
    }

    assert(res > 0);
    debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf->content() << "'");

    if (hdr_sz != (int)hdr_len) {
        debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
               hdr_sz << " != " << hdr_len);
        hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
    }

    return true;
}

/*
 * parseCharBuf() takes character buffer of HTTP headers (buf),
 * which may not be NULL-terminated, and fills in an HttpMsg
 * structure.  The parameter 'end' specifies the offset to
 * the end of the reply headers.  The caller may know where the
 * end is, but is unable to NULL-terminate the buffer.  This function
 * returns true on success.
 */
bool
HttpMsg::parseCharBuf(const char *buf, ssize_t end)
{
    MemBuf mb;
    int success;
    /* reset current state, because we are not used in incremental fashion */
    reset();
    mb.init();
    mb.append(buf, end);
    mb.terminate();
    success = httpMsgParseStep(mb.buf, mb.size, 0);
    mb.clean();
    return success == 1;
}

/*
 * parses a 0-terminating buffer into HttpMsg.
 * Returns:
 *      1 -- success
 *       0 -- need more data (partial parse)
 *      -1 -- parse error
 */
int
HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
{
    const char *parse_start = buf;
    int parse_len = len;
    const char *blk_start, *blk_end;
    const char **parse_end_ptr = &blk_end;
    assert(parse_start);
    assert(pstate < psParsed);

    *parse_end_ptr = parse_start;

    PROF_start(HttpMsg_httpMsgParseStep);

    if (pstate == psReadyToParseStartLine) {
        if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return 0;
        }

        if (!parseFirstLine(blk_start, blk_end)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return httpMsgParseError();
        }

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;
        parse_len = parse_len - hdr_sz;

        ++pstate;
    }

    /*
     * XXX This code uses parse_start; but if we're incrementally parsing then
     * this code might not actually be given parse_start at the right spot (just
     * after headers.) Grr.
     */
    if (pstate == psReadyToParseHeaders) {
        if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
            if (atEnd) {
                blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
            } else {
                PROF_stop(HttpMsg_httpMsgParseStep);
                return 0;
            }
        }

        if (!header.parse(blk_start, blk_end)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return httpMsgParseError();
        }

        hdrCacheInit();

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;

        ++pstate;
    }

    PROF_stop(HttpMsg_httpMsgParseStep);
    return 1;
}

/* handy: resets and returns -1 */
int
HttpMsg::httpMsgParseError()
{
    reset();
    return -1;
}

void
HttpMsg::setContentLength(int64_t clen)
{
    header.delById(HDR_CONTENT_LENGTH); // if any
    header.putInt64(HDR_CONTENT_LENGTH, clen);
    content_length = clen;
}

/* returns true if connection should be "persistent"
 * after processing this message */
int
httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr)
{
#if WHEN_SQUID_IS_HTTP1_1

    if ((http_ver.major >= 1) && (http_ver.minor >= 1)) {
        /*
         * for modern versions of HTTP: persistent unless there is
         * a "Connection: close" header.
         */
        return !httpHeaderHasConnDir(hdr, "close");
    } else
#else
    {
#endif
        /*
         * Persistent connections in Netscape 3.x are allegedly broken,
         * return false if it is a browser connection.  If there is a
         * VIA header, then we assume this is NOT a browser connection.
         */
        const char *agent = hdr->getStr(HDR_USER_AGENT);

    if (agent && !hdr->has(HDR_VIA)) {
        if (!strncasecmp(agent, "Mozilla/3.", 10))
            return 0;

        if (!strncasecmp(agent, "Netscape/3.", 11))
            return 0;
    }

    /* for old versions of HTTP: persistent if has "keep-alive" */
    return httpHeaderHasConnDir(hdr, "keep-alive");
}
}

void HttpMsg::packInto(Packer *p, bool full_uri) const
{
    packFirstLineInto(p, full_uri);
    header.packInto(p);
    packerAppend(p, "\r\n", 2);
}

void HttpMsg::hdrCacheInit()
{
    content_length = header.getInt64(HDR_CONTENT_LENGTH);
    assert(NULL == cache_control);
    cache_control = header.getCc();
}

/*
 * useful for debugging
 */
void HttpMsg::firstLineBuf(MemBuf& mb)
{
    Packer p;
    packerToMemInit(&p, &mb);
    packFirstLineInto(&p, true);
    packerClean(&p);
}

// use HTTPMSGLOCK() instead of calling this directly
HttpMsg *
HttpMsg::_lock()
{
    lock_count++;
    return this;
}

// use HTTPMSGUNLOCK() instead of calling this directly
void
HttpMsg::_unlock()
{
    assert(lock_count > 0);
    --lock_count;

    if (0 == lock_count)
        delete this;
}


void
HttpParserInit(HttpParser *hdr, const char *buf, int bufsiz)
{
    hdr->state = 1;
    hdr->buf = buf;
    hdr->bufsiz = bufsiz;
    hdr->req_start = hdr->req_end = -1;
    hdr->hdr_start = hdr->hdr_end = -1;
    debugs(74, 5, "httpParseInit: Request buffer is " << buf);
}

#if MSGDODEBUG
/* XXX This should eventually turn into something inlined or #define'd */
int
HttpParserReqSz(HttpParser *hp)
{
    assert(hp->state == 1);
    assert(hp->req_start != -1);
    assert(hp->req_end != -1);
    return hp->req_end - hp->req_start + 1;
}


/*
 * This +1 makes it 'right' but won't make any sense if
 * there's a 0 byte header? This won't happen normally - a valid header
 * is at -least- a blank line (\n, or \r\n.)
 */
int
HttpParserHdrSz(HttpParser *hp)
{
    assert(hp->state == 1);
    assert(hp->hdr_start != -1);
    assert(hp->hdr_end != -1);
    return hp->hdr_end - hp->hdr_start + 1;
}

const char *
HttpParserHdrBuf(HttpParser *hp)
{
    assert(hp->state == 1);
    assert(hp->hdr_start != -1);
    assert(hp->hdr_end != -1);
    return hp->buf + hp->hdr_start;
}

int
HttpParserRequestLen(HttpParser *hp)
{
    return hp->hdr_end - hp->req_start + 1;
}
#endif

/**
 * Attempt to parse the request line.
 *
 * This will set the values in hmsg that it determines. One may end up
 * with a partially-parsed buffer; the return value tells you whether
 * the values are valid or not.
 *
 * \retval	1 if parsed correctly
 * \retval	0 if more is needed
 * \retval	-1 if error
 *
 * TODO:
 *   * have it indicate "error" and "not enough" as two separate conditions!
 *   * audit this code as off-by-one errors are probably everywhere!
 */
int
HttpParserParseReqLine(HttpParser *hmsg)
{
    int i = 0;
    int retcode = 0;
    unsigned int maj = 0, min = 0;
    int last_whitespace = -1, line_end = -1;

    debugs(74, 5, "httpParserParseReqLine: parsing " << hmsg->buf);

    PROF_start(HttpParserParseReqLine);
    /* Find \r\n - end of URL+Version (and the request) */
    hmsg->req_end = -1;
    for (i = 0; i < hmsg->bufsiz; i++) {
        if (hmsg->buf[i] == '\n') {
            hmsg->req_end = i;
            break;
        }
        if (i < hmsg->bufsiz - 1 && hmsg->buf[i] == '\r' && hmsg->buf[i + 1] == '\n') {
            hmsg->req_end = i + 1;
            break;
        }
    }
    if (hmsg->req_end == -1) {
        retcode = 0;
        goto finish;
    }
    assert(hmsg->buf[hmsg->req_end] == '\n');
    /* Start at the beginning again */
    i = 0;

    /* Find first non-whitespace - beginning of method */
    for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
    if (i >= hmsg->req_end) {
        retcode = 0;
        goto finish;
    }
    hmsg->m_start = i;
    hmsg->req_start = i;

    /* Find first whitespace - end of method */
    for (; i < hmsg->req_end && (! xisspace(hmsg->buf[i])); i++);
    if (i >= hmsg->req_end) {
        retcode = 0;
        goto finish;
    }
    hmsg->m_end = i - 1;

    /* Find first non-whitespace - beginning of URL+Version */
    for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
    if (i >= hmsg->req_end) {
        retcode = 0;
        goto finish;
    }
    hmsg->u_start = i;

    /* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */
    for (; i <= hmsg->req_end; i++) {
        /* If \n - its end of line */
        if (hmsg->buf[i] == '\n') {
            line_end = i;
            break;
        }
        /* XXX could be off-by-one wrong! */
        if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') {
            line_end = i;
            break;
        }
        /* If its a whitespace, note it as it'll delimit our version */
        if (hmsg->buf[i] == ' ' || hmsg->buf[i] == '\t') {
            last_whitespace = i;
        }
    }
    if (i > hmsg->req_end) {
        retcode = 0;
        goto finish;
    }

    /* At this point we don't need the 'i' value; so we'll recycle it for version parsing */

    /*
     * At this point: line_end points to the first eol char (\r or \n);
     * last_whitespace points to the last whitespace char in the URL.
     * We know we have a full buffer here!
     */
    if (last_whitespace == -1) {
        maj = 0;
        min = 9;
        hmsg->u_end = line_end - 1;
        assert(hmsg->u_end >= hmsg->u_start);
    } else {
        /* Find the first non-whitespace after last_whitespace */
        /* XXX why <= vs < ? I do need to really re-audit all of this ..*/
        for (i = last_whitespace; i <= hmsg->req_end && xisspace(hmsg->buf[i]); i++);
        if (i > hmsg->req_end) {
            retcode = 0;
            goto finish;
        }

        /* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */
        if (i + 5 >= hmsg->req_end || (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) {
            maj = 0;
            min = 9;
            hmsg->u_end = line_end - 1;
            assert(hmsg->u_end >= hmsg->u_start);
        } else {
            /* Ok, lets try parsing! Yes, this needs refactoring! */
            hmsg->v_start = i;
            i += 5;

            /* next should be 1 or more digits */
            maj = 0;
            for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && maj < 65536; i++) {
                maj = maj * 10;
                maj = maj + (hmsg->buf[i]) - '0';
            }
            if (maj >= 65536) {
                retcode = -1;
                goto finish;
            }
            if (i >= hmsg->req_end) {
                retcode = 0;
                goto finish;
            }

            /* next should be .; we -have- to have this as we have a whole line.. */
            if (hmsg->buf[i] != '.') {
                retcode = 0;
                goto finish;
            }
            if (i + 1 >= hmsg->req_end) {
                retcode = 0;
                goto finish;
            }

            /* next should be one or more digits */
            i++;
            min = 0;
            for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && min < 65536; i++) {
                min = min * 10;
                min = min + (hmsg->buf[i]) - '0';
            }

            if (min >= 65536) {
                retcode = -1;
                goto finish;
            }

            /* Find whitespace, end of version */
            hmsg->v_end = i;
            hmsg->u_end = last_whitespace - 1;
        }
    }

    /*
     * Rightio - we have all the schtuff. Return true; we've got enough.
     */
    retcode = 1;

finish:
    hmsg->v_maj = maj;
    hmsg->v_min = min;
    PROF_stop(HttpParserParseReqLine);
    debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
           "->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
           hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
           "; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << maj <<
           "/" << min << ")");

    return retcode;
}
Commit	Line	Data
2246b732	1
2246b732	2	/*
262a0e14	3	* $Id$
2246b732	4	*
	5	* DEBUG: section 74 HTTP Message
	6	* AUTHOR: Alex Rousskov
	7	*
2b6662ba	8	* SQUID Web Proxy Cache http://www.squid-cache.org/
e25c139f	9	* ----------------------------------------------------------
2246b732	10	*
2b6662ba	11	* Squid is the result of efforts by numerous individuals from
	12	* the Internet community; see the CONTRIBUTORS file for full
	13	* details. Many organizations have provided support for Squid's
	14	* development; see the SPONSORS file for full details. Squid is
	15	* Copyrighted (C) 2001 by the Regents of the University of
	16	* California; see the COPYRIGHT file for full details. Squid
	17	* incorporates software developed and/or copyrighted by other
	18	* sources; see the CREDITS file for full details.
2246b732	19	*
	20	* This program is free software; you can redistribute it and/or modify
	21	* it under the terms of the GNU General Public License as published by
	22	* the Free Software Foundation; either version 2 of the License, or
	23	* (at your option) any later version.
26ac0430	24	*
2246b732	25	* This program is distributed in the hope that it will be useful,
	26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	28	* GNU General Public License for more details.
26ac0430	29	*
2246b732	30	* You should have received a copy of the GNU General Public License
2246b732	31	* along with this program; if not, write to the Free Software
cbdec147	32	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
e25c139f	33	*
2246b732	34	*/
	35
	36	#include "squid.h"
8596962e	37	#include "HttpMsg.h"
0eb49b6d	38	#include "MemBuf.h"
8596962e	39
	40	HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
	41	cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
4a56ee8d	42	pstate(psReadyToParseStartLine), lock_count(0)
8596962e	43	{}
8596962e	44
4a56ee8d	45	HttpMsg::~HttpMsg()
	46	{
	47	assert(lock_count == 0);
5f8252d2	48	assert(!body_pipe);
4a56ee8d	49	}
4a56ee8d	50
8596962e	51	HttpMsgParseState &operator++ (HttpMsgParseState &aState)
	52	{
	53	int tmp = (int)aState;
	54	aState = (HttpMsgParseState)(++tmp);
	55	return aState;
	56	}
	57
2246b732	58	/* find end of headers */
2246b732	59	int
666f514b	60	httpMsgIsolateHeaders(const char parse_start, int l, const char blk_start, const char **blk_end)
2246b732	61	{
bdb1a5d5	62	/*
	63	* parse_start points to the first line of HTTP message headers,
	64	* not including the request or status lines
	65	*/
bdb1a5d5	66	size_t end = headersEnd(*parse_start, l);
bdb1a5d5	67	int nnl;
62e76326	68
2246b732	69	if (end) {
62e76326	70	blk_start = parse_start;
	71	blk_end = parse_start + end - 1;
	72	/*
	73	* leave blk_end pointing to the first character after the
	74	* first newline which terminates the headers
	75	*/
	76	assert(**blk_end == '\n');
	77
	78	while ((blk_end - 1) == '\r')
	79	(*blk_end)--;
	80
	81	assert((blk_end - 1) == '\n');
	82
	83	*parse_start += end;
	84
	85	return 1;
2246b732	86	}
62e76326	87
bdb1a5d5	88	/*
	89	* If we didn't find the end of headers, and parse_start does
	90	* NOT point to a CR or NL character, then return failure
	91	*/
	92	if (parse_start != '\r' && parse_start != '\n')
62e76326	93	return 0; /* failure */
62e76326	94
bdb1a5d5	95	/*
	96	* If we didn't find the end of headers, and parse_start does point
	97	* to an empty line, then we have empty headers. Skip all CR and
	98	* NL characters up to the first NL. Leave parse_start pointing at
	99	* the first character after the first NL.
	100	*/
	101	blk_start = parse_start;
62e76326	102
bdb1a5d5	103	blk_end = blk_start;
62e76326	104
a4295415	105	for (nnl = 0; nnl == 0; (*parse_start)++) {
62e76326	106	if (**parse_start == '\r')
	107	(void) 0;
	108	else if (**parse_start == '\n')
	109	nnl++;
	110	else
	111	break;
2246b732	112	}
62e76326	113
bdb1a5d5	114	return 1;
2246b732	115	}
2246b732	116
8596962e	117	/* find first CRLF */
	118	static int
	119	httpMsgIsolateStart(const char parse_start, const char blk_start, const char **blk_end)
	120	{
	121	int slen = strcspn(*parse_start, "\r\n");
	122
	123	if (!(parse_start)[slen]) / no CRLF found */
	124	return 0;
	125
	126	blk_start = parse_start;
	127
	128	blk_end = blk_start + slen;
	129
	130	while (*blk_end == '\r') / CR */
	131	(*blk_end)++;
	132
	133	if (*blk_end == '\n') / LF */
	134	(*blk_end)++;
	135
	136	parse_start = blk_end;
	137
	138	return 1;
	139	}
	140
	141	// negative return is the negated HTTP_ error code
	142	// zero return means need more data
	143	// positive return is the size of parsed headers
	144	bool HttpMsg::parse(MemBuf buf, bool eof, http_status error)
	145	{
	146	assert(error);
	147	*error = HTTP_STATUS_NONE;
	148
	149	// httpMsgParseStep() and debugging require 0-termination, unfortunately
	150	buf->terminate(); // does not affect content size
	151
	152	// find the end of headers
8596962e	153	const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());
8596962e	154
96ee497f AJ	155	// sanity check the start line to see if this is in fact an HTTP message
96ee497f AJ	156	if (!sanityCheckStartLine(buf, hdr_len, error)) {
0246f6b8	157	// NP: sanityCheck sets *error and sends debug warnings.
96ee497f AJ	158	return false;
	159	}
	160
c81e4de5	161	// TODO: move to httpReplyParseStep()
96ee497f AJ	162	if (hdr_len > Config.maxReplyHeaderSize \|\| (hdr_len <= 0 && (size_t)buf->contentSize() > Config.maxReplyHeaderSize)) {
96ee497f AJ	163	debugs(58, 1, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
c81e4de5	164	*error = HTTP_HEADER_TOO_LARGE;
	165	return false;
	166	}
	167
8596962e	168	if (hdr_len <= 0) {
96ee497f	169	debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf->content() << "'");
8596962e	170
	171	if (eof) // iff we have seen the end, this is an error
	172	*error = HTTP_INVALID_HEADER;
	173
	174	return false;
	175	}
	176
666f514b	177	const int res = httpMsgParseStep(buf->content(), buf->contentSize(), eof);
8596962e	178
8596962e	179	if (res < 0) { // error
96ee497f	180	debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf->content() << "'");
8596962e	181	*error = HTTP_INVALID_HEADER;
	182	return false;
	183	}
	184
	185	if (res == 0) {
96ee497f	186	debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf->content() << "'");
39f44165	187	*error = HTTP_INVALID_HEADER;
8596962e	188	return false; // but this should not happen due to headersEnd() above
	189	}
	190
	191	assert(res > 0);
96ee497f	192	debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf->content() << "'");
8596962e	193
	194	if (hdr_sz != (int)hdr_len) {
	195	debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
	196	hdr_sz << " != " << hdr_len);
	197	hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
	198	}
	199
	200	return true;
	201	}
	202
59eed7dc	203	/*
bf9fb8ff	204	* parseCharBuf() takes character buffer of HTTP headers (buf),
59eed7dc	205	* which may not be NULL-terminated, and fills in an HttpMsg
	206	* structure. The parameter 'end' specifies the offset to
	207	* the end of the reply headers. The caller may know where the
	208	* end is, but is unable to NULL-terminate the buffer. This function
	209	* returns true on success.
	210	*/
	211	bool
	212	HttpMsg::parseCharBuf(const char *buf, ssize_t end)
	213	{
	214	MemBuf mb;
	215	int success;
	216	/* reset current state, because we are not used in incremental fashion */
	217	reset();
	218	mb.init();
	219	mb.append(buf, end);
	220	mb.terminate();
666f514b	221	success = httpMsgParseStep(mb.buf, mb.size, 0);
59eed7dc	222	mb.clean();
	223	return success == 1;
	224	}
8596962e	225
	226	/*
	227	* parses a 0-terminating buffer into HttpMsg.
	228	* Returns:
	229	* 1 -- success
	230	* 0 -- need more data (partial parse)
	231	* -1 -- parse error
	232	*/
	233	int
666f514b	234	HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
8596962e	235	{
8596962e	236	const char *parse_start = buf;
666f514b	237	int parse_len = len;
8596962e	238	const char blk_start, blk_end;
	239	const char **parse_end_ptr = &blk_end;
	240	assert(parse_start);
	241	assert(pstate < psParsed);
8596962e	242
	243	*parse_end_ptr = parse_start;
	244
9ea37c79	245	PROF_start(HttpMsg_httpMsgParseStep);
9ea37c79	246
8596962e	247	if (pstate == psReadyToParseStartLine) {
9ea37c79	248	if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
137e94fd AJ	249	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	250	return 0;
26ac0430	251	}
8596962e	252
9ea37c79	253	if (!parseFirstLine(blk_start, blk_end)) {
137e94fd AJ	254	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	255	return httpMsgParseError();
26ac0430	256	}
8596962e	257
	258	*parse_end_ptr = parse_start;
	259
	260	hdr_sz = *parse_end_ptr - buf;
26ac0430	261	parse_len = parse_len - hdr_sz;
8596962e	262
	263	++pstate;
	264	}
	265
666f514b	266	/*
	267	* XXX This code uses parse_start; but if we're incrementally parsing then
	268	* this code might not actually be given parse_start at the right spot (just
	269	* after headers.) Grr.
	270	*/
8596962e	271	if (pstate == psReadyToParseHeaders) {
666f514b	272	if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
9ea37c79	273	if (atEnd) {
8596962e	274	blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
26ac0430	275	} else {
137e94fd AJ	276	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	277	return 0;
9ea37c79	278	}
8596962e	279	}
8596962e	280
137e94fd AJ	281	if (!header.parse(blk_start, blk_end)) {
137e94fd AJ	282	PROF_stop(HttpMsg_httpMsgParseStep);
8596962e	283	return httpMsgParseError();
137e94fd	284	}
8596962e	285
07947ad8	286	hdrCacheInit();
8596962e	287
	288	*parse_end_ptr = parse_start;
	289
	290	hdr_sz = *parse_end_ptr - buf;
	291
	292	++pstate;
	293	}
137e94fd	294
9ea37c79	295	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd	296	return 1;
8596962e	297	}
8596962e	298
8596962e	299	/* handy: resets and returns -1 */
	300	int
	301	HttpMsg::httpMsgParseError()
	302	{
	303	reset();
8596962e	304	return -1;
	305	}
	306
3ff65596 AR	307	void
	308	HttpMsg::setContentLength(int64_t clen)
	309	{
	310	header.delById(HDR_CONTENT_LENGTH); // if any
	311	header.putInt64(HDR_CONTENT_LENGTH, clen);
	312	content_length = clen;
	313	}
	314
62e76326	315	/* returns true if connection should be "persistent"
2246b732	316	* after processing this message */
2246b732	317	int
450e0c10	318	httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr)
2246b732	319	{
6f3e5833	320	#if WHEN_SQUID_IS_HTTP1_1
21b92762	321
bffee5af	322	if ((http_ver.major >= 1) && (http_ver.minor >= 1)) {
62e76326	323	/*
	324	* for modern versions of HTTP: persistent unless there is
	325	* a "Connection: close" header.
	326	*/
	327	return !httpHeaderHasConnDir(hdr, "close");
8596962e	328	} else
21b92762	329	#else
	330	{
	331	#endif
62e76326	332	/*
	333	* Persistent connections in Netscape 3.x are allegedly broken,
	334	* return false if it is a browser connection. If there is a
	335	* VIA header, then we assume this is NOT a browser connection.
	336	*/
a9925b40	337	const char *agent = hdr->getStr(HDR_USER_AGENT);
62e76326	338
a9925b40	339	if (agent && !hdr->has(HDR_VIA)) {
8596962e	340	if (!strncasecmp(agent, "Mozilla/3.", 10))
8596962e	341	return 0;
62e76326	342
8596962e	343	if (!strncasecmp(agent, "Netscape/3.", 11))
8596962e	344	return 0;
2246b732	345	}
8596962e	346
	347	/* for old versions of HTTP: persistent if has "keep-alive" */
	348	return httpHeaderHasConnDir(hdr, "keep-alive");
	349	}
2246b732	350	}
8596962e	351
	352	void HttpMsg::packInto(Packer *p, bool full_uri) const
	353	{
	354	packFirstLineInto(p, full_uri);
a9925b40	355	header.packInto(p);
8596962e	356	packerAppend(p, "\r\n", 2);
	357	}
	358
07947ad8	359	void HttpMsg::hdrCacheInit()
07947ad8	360	{
47f6e231	361	content_length = header.getInt64(HDR_CONTENT_LENGTH);
07947ad8	362	assert(NULL == cache_control);
a9925b40	363	cache_control = header.getCc();
07947ad8	364	}
3cfc19b3	365
	366	/*
	367	* useful for debugging
	368	*/
	369	void HttpMsg::firstLineBuf(MemBuf& mb)
	370	{
	371	Packer p;
	372	packerToMemInit(&p, &mb);
	373	packFirstLineInto(&p, true);
	374	packerClean(&p);
	375	}
4a56ee8d	376
6dd9f4bd	377	// use HTTPMSGLOCK() instead of calling this directly
96ee497f	378	HttpMsg *
6dd9f4bd	379	HttpMsg::_lock()
4a56ee8d	380	{
	381	lock_count++;
	382	return this;
	383	}
	384
6dd9f4bd	385	// use HTTPMSGUNLOCK() instead of calling this directly
4a56ee8d	386	void
6dd9f4bd	387	HttpMsg::_unlock()
4a56ee8d	388	{
	389	assert(lock_count > 0);
	390	--lock_count;
	391
	392	if (0 == lock_count)
	393	delete this;
	394	}
a5baffba	395
	396
	397	void
	398	HttpParserInit(HttpParser hdr, const char buf, int bufsiz)
	399	{
26ac0430 AJ	400	hdr->state = 1;
	401	hdr->buf = buf;
	402	hdr->bufsiz = bufsiz;
	403	hdr->req_start = hdr->req_end = -1;
	404	hdr->hdr_start = hdr->hdr_end = -1;
	405	debugs(74, 5, "httpParseInit: Request buffer is " << buf);
a5baffba	406	}
a5baffba	407
52512f28	408	#if MSGDODEBUG
a5baffba	409	/* XXX This should eventually turn into something inlined or #define'd */
	410	int
	411	HttpParserReqSz(HttpParser *hp)
	412	{
26ac0430 AJ	413	assert(hp->state == 1);
	414	assert(hp->req_start != -1);
	415	assert(hp->req_end != -1);
	416	return hp->req_end - hp->req_start + 1;
a5baffba	417	}
	418
	419
26ac0430	420	/*
a5baffba	421	* This +1 makes it 'right' but won't make any sense if
	422	* there's a 0 byte header? This won't happen normally - a valid header
	423	* is at -least- a blank line (\n, or \r\n.)
	424	*/
	425	int
	426	HttpParserHdrSz(HttpParser *hp)
	427	{
26ac0430 AJ	428	assert(hp->state == 1);
	429	assert(hp->hdr_start != -1);
	430	assert(hp->hdr_end != -1);
	431	return hp->hdr_end - hp->hdr_start + 1;
a5baffba	432	}
	433
	434	const char *
	435	HttpParserHdrBuf(HttpParser *hp)
	436	{
26ac0430 AJ	437	assert(hp->state == 1);
	438	assert(hp->hdr_start != -1);
	439	assert(hp->hdr_end != -1);
	440	return hp->buf + hp->hdr_start;
a5baffba	441	}
	442
	443	int
	444	HttpParserRequestLen(HttpParser *hp)
	445	{
26ac0430	446	return hp->hdr_end - hp->req_start + 1;
a5baffba	447	}
52512f28	448	#endif
a5baffba	449
daa61305	450	/**
84cc2635	451	* Attempt to parse the request line.
84cc2635	452	*
26ac0430	453	* This will set the values in hmsg that it determines. One may end up
84cc2635	454	* with a partially-parsed buffer; the return value tells you whether
	455	* the values are valid or not.
	456	*
daa61305 AJ	457	* \retval 1 if parsed correctly
	458	* \retval 0 if more is needed
	459	* \retval -1 if error
84cc2635	460	*
	461	* TODO:
	462	* * have it indicate "error" and "not enough" as two separate conditions!
	463	* * audit this code as off-by-one errors are probably everywhere!
	464	*/
	465	int
	466	HttpParserParseReqLine(HttpParser *hmsg)
	467	{
26ac0430 AJ	468	int i = 0;
26ac0430 AJ	469	int retcode = 0;
6f96622b	470	unsigned int maj = 0, min = 0;
26ac0430 AJ	471	int last_whitespace = -1, line_end = -1;
	472
	473	debugs(74, 5, "httpParserParseReqLine: parsing " << hmsg->buf);
	474
	475	PROF_start(HttpParserParseReqLine);
	476	/* Find \r\n - end of URL+Version (and the request) */
	477	hmsg->req_end = -1;
	478	for (i = 0; i < hmsg->bufsiz; i++) {
	479	if (hmsg->buf[i] == '\n') {
	480	hmsg->req_end = i;
	481	break;
	482	}
	483	if (i < hmsg->bufsiz - 1 && hmsg->buf[i] == '\r' && hmsg->buf[i + 1] == '\n') {
	484	hmsg->req_end = i + 1;
	485	break;
	486	}
	487	}
	488	if (hmsg->req_end == -1) {
	489	retcode = 0;
	490	goto finish;
	491	}
	492	assert(hmsg->buf[hmsg->req_end] == '\n');
	493	/* Start at the beginning again */
	494	i = 0;
	495
	496	/* Find first non-whitespace - beginning of method */
	497	for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
	498	if (i >= hmsg->req_end) {
	499	retcode = 0;
	500	goto finish;
	501	}
	502	hmsg->m_start = i;
	503	hmsg->req_start = i;
	504
	505	/* Find first whitespace - end of method */
	506	for (; i < hmsg->req_end && (! xisspace(hmsg->buf[i])); i++);
	507	if (i >= hmsg->req_end) {
	508	retcode = 0;
	509	goto finish;
	510	}
	511	hmsg->m_end = i - 1;
	512
	513	/* Find first non-whitespace - beginning of URL+Version */
	514	for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
	515	if (i >= hmsg->req_end) {
	516	retcode = 0;
	517	goto finish;
	518	}
	519	hmsg->u_start = i;
	520
	521	/* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */
	522	for (; i <= hmsg->req_end; i++) {
	523	/* If \n - its end of line */
	524	if (hmsg->buf[i] == '\n') {
	525	line_end = i;
	526	break;
	527	}
	528	/* XXX could be off-by-one wrong! */
	529	if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') {
	530	line_end = i;
	531	break;
	532	}
	533	/* If its a whitespace, note it as it'll delimit our version */
	534	if (hmsg->buf[i] == ' ' \|\| hmsg->buf[i] == '\t') {
535	last_whitespace = i;
536	}
537	}
538	if (i > hmsg->req_end) {
539	retcode = 0;
540	goto finish;
541	}
542
543	/* At this point we don't need the 'i' value; so we'll recycle it for version parsing */
544
545	/*
546	* At this point: line_end points to the first eol char (\r or \n);
547	* last_whitespace points to the last whitespace char in the URL.
548	* We know we have a full buffer here!
549	*/
550	if (last_whitespace == -1) {
551	maj = 0;
552	min = 9;
553	hmsg->u_end = line_end - 1;
554	assert(hmsg->u_end >= hmsg->u_start);
555	} else {
556	/* Find the first non-whitespace after last_whitespace */
557	/* XXX why <= vs < ? I do need to really re-audit all of this ..*/
558	for (i = last_whitespace; i <= hmsg->req_end && xisspace(hmsg->buf[i]); i++);
559	if (i > hmsg->req_end) {
560	retcode = 0;
561	goto finish;
562	}
563
564	/* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */
565	if (i + 5 >= hmsg->req_end \|\| (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) {
566	maj = 0;
567	min = 9;
568	hmsg->u_end = line_end - 1;
569	assert(hmsg->u_end >= hmsg->u_start);
570	} else {
571	/* Ok, lets try parsing! Yes, this needs refactoring! */
572	hmsg->v_start = i;
573	i += 5;
574
575	/* next should be 1 or more digits */
576	maj = 0;
6f96622b	577	for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && maj < 65536; i++) {
26ac0430 AJ	578	maj = maj * 10;
	579	maj = maj + (hmsg->buf[i]) - '0';
	580	}
6f96622b AJ	581	if (maj >= 65536) {
	582	retcode = -1;
	583	goto finish;
	584	}
26ac0430 AJ	585	if (i >= hmsg->req_end) {
	586	retcode = 0;
	587	goto finish;
	588	}
	589
	590	/* next should be .; we -have- to have this as we have a whole line.. */
	591	if (hmsg->buf[i] != '.') {
	592	retcode = 0;
	593	goto finish;
	594	}
	595	if (i + 1 >= hmsg->req_end) {
	596	retcode = 0;
	597	goto finish;
	598	}
	599
	600	/* next should be one or more digits */
	601	i++;
	602	min = 0;
6f96622b	603	for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && min < 65536; i++) {
26ac0430 AJ	604	min = min * 10;
	605	min = min + (hmsg->buf[i]) - '0';
	606	}
	607
6f96622b AJ	608	if (min >= 65536) {
	609	retcode = -1;
	610	goto finish;
	611	}
	612
26ac0430 AJ	613	/* Find whitespace, end of version */
	614	hmsg->v_end = i;
	615	hmsg->u_end = last_whitespace - 1;
	616	}
	617	}
	618
	619	/*
	620	* Rightio - we have all the schtuff. Return true; we've got enough.
	621	*/
	622	retcode = 1;
84cc2635	623
84cc2635	624	finish:
26ac0430 AJ	625	hmsg->v_maj = maj;
	626	hmsg->v_min = min;
	627	PROF_stop(HttpParserParseReqLine);
	628	debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
	629	"->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
	630	hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
	631	"; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << maj <<
	632	"/" << min << ")");
	633
	634	return retcode;
84cc2635	635	}
84cc2635	636