[thirdparty/squid.git] / src / HttpMsg.cc


/*
 * $Id: HttpMsg.cc,v 1.17 2005/09/15 20:19:41 wessels Exp $
 *
 * DEBUG: section 74    HTTP Message
 * AUTHOR: Alex Rousskov
 *
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
 * ----------------------------------------------------------
 *
 *  Squid is the result of efforts by numerous individuals from
 *  the Internet community; see the CONTRIBUTORS file for full
 *  details.   Many organizations have provided support for Squid's
 *  development; see the SPONSORS file for full details.  Squid is
 *  Copyrighted (C) 2001 by the Regents of the University of
 *  California; see the COPYRIGHT file for full details.  Squid
 *  incorporates software developed and/or copyrighted by other
 *  sources; see the CREDITS file for full details.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 */

#include "squid.h"
#include "HttpMsg.h"
#include "HttpRequest.h"
#include "HttpReply.h"

HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
        cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
        pstate(psReadyToParseStartLine)
{}


HttpMsgParseState &operator++ (HttpMsgParseState &aState)
{
    int tmp = (int)aState;
    aState = (HttpMsgParseState)(++tmp);
    return aState;
}


/* find end of headers */
int
httpMsgIsolateHeaders(const char **parse_start, const char **blk_start, const char **blk_end)
{
    /*
     * parse_start points to the first line of HTTP message *headers*,
     * not including the request or status lines
     */
    size_t l = strlen(*parse_start);
    size_t end = headersEnd(*parse_start, l);
    int nnl;

    if (end) {
        *blk_start = *parse_start;
        *blk_end = *parse_start + end - 1;
        /*
         * leave blk_end pointing to the first character after the
         * first newline which terminates the headers
         */
        assert(**blk_end == '\n');

        while (*(*blk_end - 1) == '\r')
            (*blk_end)--;

        assert(*(*blk_end - 1) == '\n');

        *parse_start += end;

        return 1;
    }

    /*
     * If we didn't find the end of headers, and parse_start does
     * NOT point to a CR or NL character, then return failure
     */
    if (**parse_start != '\r' && **parse_start != '\n')
        return 0;		/* failure */

    /*
     * If we didn't find the end of headers, and parse_start does point
     * to an empty line, then we have empty headers.  Skip all CR and
     * NL characters up to the first NL.  Leave parse_start pointing at
     * the first character after the first NL.
     */
    *blk_start = *parse_start;

    *blk_end = *blk_start;

    for (nnl = 0; nnl == 0; (*parse_start)++) {
        if (**parse_start == '\r')
            (void) 0;
        else if (**parse_start == '\n')
            nnl++;
        else
            break;
    }

    return 1;
}

/* find first CRLF */
static int
httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
{
    int slen = strcspn(*parse_start, "\r\n");

    if (!(*parse_start)[slen])  /* no CRLF found */
        return 0;

    *blk_start = *parse_start;

    *blk_end = *blk_start + slen;

    while (**blk_end == '\r')   /* CR */
        (*blk_end)++;

    if (**blk_end == '\n')      /* LF */
        (*blk_end)++;

    *parse_start = *blk_end;

    return 1;
}

// negative return is the negated HTTP_ error code
// zero return means need more data
// positive return is the size of parsed headers
bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error)
{
    assert(error);
    *error = HTTP_STATUS_NONE;

    // httpMsgParseStep() and debugging require 0-termination, unfortunately
    buf->terminate(); // does not affect content size

    // find the end of headers
    // TODO: Remove? httpReplyParseStep() should do similar checks
    const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());

    if (hdr_len <= 0) {
        debugs(58, 3, "HttpMsg::parse: failed to find end of headers " <<
               "(eof: " << eof << ") in '" << buf->content() << "'");

        if (eof) // iff we have seen the end, this is an error
            *error = HTTP_INVALID_HEADER;

        return false;
    }

    // TODO: move to httpReplyParseStep()
    if (hdr_len > Config.maxReplyHeaderSize) {
        debugs(58, 1, "HttpMsg::parse: Too large reply header (" <<
               hdr_len << " > " << Config.maxReplyHeaderSize);
        *error = HTTP_HEADER_TOO_LARGE;
        return false;
    }

    if (!sanityCheckStartLine(buf, error))	// redundant; could be remvoed
        return false;

    const int res = httpMsgParseStep(buf->content(), eof);

    if (res < 0) { // error
        debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers " <<
               "in '" << buf->content() << "'");
        *error = HTTP_INVALID_HEADER;
        return false;
    }

    if (res == 0) {
        debugs(58, 2, "HttpMsg::parse: strange, need more data near '" <<
               buf->content() << "'");
        return false; // but this should not happen due to headersEnd() above
    }

    assert(res > 0);
    debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) " <<
           "near '" << buf->content() << "'");

    if (hdr_sz != (int)hdr_len) {
        debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
               hdr_sz << " != " << hdr_len);
        hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
    }

    return true;
}


/*
 * parses a 0-terminating buffer into HttpMsg.
 * Returns:
 *      1 -- success
 *       0 -- need more data (partial parse)
 *      -1 -- parse error
 */
int
HttpMsg::httpMsgParseStep(const char *buf, int atEnd)
{
    const char *parse_start = buf;
    const char *blk_start, *blk_end;
    const char **parse_end_ptr = &blk_end;
    assert(parse_start);
    assert(pstate < psParsed);

    *parse_end_ptr = parse_start;

    if (pstate == psReadyToParseStartLine) {
        if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end))
            return 0;

        if (!parseFirstLine(blk_start, blk_end))
            return httpMsgParseError();

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;

        ++pstate;
    }

    if (pstate == psReadyToParseHeaders) {
        if (!httpMsgIsolateHeaders(&parse_start, &blk_start, &blk_end)) {
            if (atEnd)
                blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
            else
                return 0;
        }

        if (!httpHeaderParse(&header, blk_start, blk_end))
            return httpMsgParseError();

        hdrCacheInit();

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;

        ++pstate;
    }

    return 1;
}


/* handy: resets and returns -1 */
int
HttpMsg::httpMsgParseError()
{
    reset();
    /* indicate an error */

    if (HttpReply *rep = dynamic_cast<HttpReply*>(this))
        rep->sline.status = HTTP_INVALID_HEADER;

    return -1;
}


/* returns true if connection should be "persistent"
 * after processing this message */
int
httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr)
{
#if WHEN_SQUID_IS_NOT_HTTP1_1

    if ((http_ver.major >= 1) && (http_ver.minor >= 1)) {
        /*
         * for modern versions of HTTP: persistent unless there is
         * a "Connection: close" header.
         */
        return !httpHeaderHasConnDir(hdr, "close");
    } else
#else
    {
#endif
        /*
         * Persistent connections in Netscape 3.x are allegedly broken,
         * return false if it is a browser connection.  If there is a
         * VIA header, then we assume this is NOT a browser connection.
         */
        const char *agent = httpHeaderGetStr(hdr, HDR_USER_AGENT);

    if (agent && !httpHeaderHas(hdr, HDR_VIA)) {
        if (!strncasecmp(agent, "Mozilla/3.", 10))
            return 0;

        if (!strncasecmp(agent, "Netscape/3.", 11))
            return 0;
    }

    /* for old versions of HTTP: persistent if has "keep-alive" */
    return httpHeaderHasConnDir(hdr, "keep-alive");
}
}

void HttpMsg::packInto(Packer *p, bool full_uri) const
{
    packFirstLineInto(p, full_uri);
    httpHeaderPackInto(&header, p);
    packerAppend(p, "\r\n", 2);
}

void HttpMsg::hdrCacheInit()
{
    content_length = httpHeaderGetInt(&header, HDR_CONTENT_LENGTH);
    assert(NULL == cache_control);
    cache_control = httpHeaderGetCc(&header);
}
Commit	Line	Data
2246b732	1
2246b732	2	/*
429f7150	3	* $Id: HttpMsg.cc,v 1.17 2005/09/15 20:19:41 wessels Exp $
2246b732	4	*
	5	* DEBUG: section 74 HTTP Message
	6	* AUTHOR: Alex Rousskov
	7	*
2b6662ba	8	* SQUID Web Proxy Cache http://www.squid-cache.org/
e25c139f	9	* ----------------------------------------------------------
2246b732	10	*
2b6662ba	11	* Squid is the result of efforts by numerous individuals from
	12	* the Internet community; see the CONTRIBUTORS file for full
	13	* details. Many organizations have provided support for Squid's
	14	* development; see the SPONSORS file for full details. Squid is
	15	* Copyrighted (C) 2001 by the Regents of the University of
	16	* California; see the COPYRIGHT file for full details. Squid
	17	* incorporates software developed and/or copyrighted by other
	18	* sources; see the CREDITS file for full details.
2246b732	19	*
	20	* This program is free software; you can redistribute it and/or modify
	21	* it under the terms of the GNU General Public License as published by
	22	* the Free Software Foundation; either version 2 of the License, or
	23	* (at your option) any later version.
	24	*
	25	* This program is distributed in the hope that it will be useful,
	26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	28	* GNU General Public License for more details.
	29	*
	30	* You should have received a copy of the GNU General Public License
	31	* along with this program; if not, write to the Free Software
cbdec147	32	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
e25c139f	33	*
2246b732	34	*/
	35
	36	#include "squid.h"
8596962e	37	#include "HttpMsg.h"
	38	#include "HttpRequest.h"
	39	#include "HttpReply.h"
	40
	41	HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner),
	42	cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE),
	43	pstate(psReadyToParseStartLine)
	44	{}
	45
	46
	47	HttpMsgParseState &operator++ (HttpMsgParseState &aState)
	48	{
	49	int tmp = (int)aState;
	50	aState = (HttpMsgParseState)(++tmp);
	51	return aState;
	52	}
	53
2246b732	54
	55	/* find end of headers */
	56	int
	57	httpMsgIsolateHeaders(const char parse_start, const char blk_start, const char **blk_end)
	58	{
bdb1a5d5	59	/*
	60	* parse_start points to the first line of HTTP message headers,
	61	* not including the request or status lines
	62	*/
	63	size_t l = strlen(*parse_start);
	64	size_t end = headersEnd(*parse_start, l);
	65	int nnl;
62e76326	66
2246b732	67	if (end) {
62e76326	68	blk_start = parse_start;
	69	blk_end = parse_start + end - 1;
	70	/*
	71	* leave blk_end pointing to the first character after the
	72	* first newline which terminates the headers
	73	*/
	74	assert(**blk_end == '\n');
	75
	76	while ((blk_end - 1) == '\r')
	77	(*blk_end)--;
	78
	79	assert((blk_end - 1) == '\n');
	80
	81	*parse_start += end;
	82
	83	return 1;
2246b732	84	}
62e76326	85
bdb1a5d5	86	/*
	87	* If we didn't find the end of headers, and parse_start does
	88	* NOT point to a CR or NL character, then return failure
	89	*/
	90	if (parse_start != '\r' && parse_start != '\n')
62e76326	91	return 0; /* failure */
62e76326	92
bdb1a5d5	93	/*
	94	* If we didn't find the end of headers, and parse_start does point
	95	* to an empty line, then we have empty headers. Skip all CR and
	96	* NL characters up to the first NL. Leave parse_start pointing at
	97	* the first character after the first NL.
	98	*/
	99	blk_start = parse_start;
62e76326	100
bdb1a5d5	101	blk_end = blk_start;
62e76326	102
a4295415	103	for (nnl = 0; nnl == 0; (*parse_start)++) {
62e76326	104	if (**parse_start == '\r')
	105	(void) 0;
	106	else if (**parse_start == '\n')
	107	nnl++;
	108	else
	109	break;
2246b732	110	}
62e76326	111
bdb1a5d5	112	return 1;
2246b732	113	}
2246b732	114
8596962e	115	/* find first CRLF */
	116	static int
	117	httpMsgIsolateStart(const char parse_start, const char blk_start, const char **blk_end)
	118	{
	119	int slen = strcspn(*parse_start, "\r\n");
	120
	121	if (!(parse_start)[slen]) / no CRLF found */
	122	return 0;
	123
	124	blk_start = parse_start;
	125
	126	blk_end = blk_start + slen;
	127
	128	while (*blk_end == '\r') / CR */
	129	(*blk_end)++;
	130
	131	if (*blk_end == '\n') / LF */
	132	(*blk_end)++;
	133
	134	parse_start = blk_end;
	135
	136	return 1;
	137	}
	138
	139	// negative return is the negated HTTP_ error code
	140	// zero return means need more data
	141	// positive return is the size of parsed headers
	142	bool HttpMsg::parse(MemBuf buf, bool eof, http_status error)
	143	{
	144	assert(error);
	145	*error = HTTP_STATUS_NONE;
	146
	147	// httpMsgParseStep() and debugging require 0-termination, unfortunately
	148	buf->terminate(); // does not affect content size
	149
	150	// find the end of headers
	151	// TODO: Remove? httpReplyParseStep() should do similar checks
	152	const size_t hdr_len = headersEnd(buf->content(), buf->contentSize());
	153
	154	if (hdr_len <= 0) {
	155	debugs(58, 3, "HttpMsg::parse: failed to find end of headers " <<
	156	"(eof: " << eof << ") in '" << buf->content() << "'");
	157
	158	if (eof) // iff we have seen the end, this is an error
	159	*error = HTTP_INVALID_HEADER;
	160
	161	return false;
	162	}
	163
	164	// TODO: move to httpReplyParseStep()
	165	if (hdr_len > Config.maxReplyHeaderSize) {
	166	debugs(58, 1, "HttpMsg::parse: Too large reply header (" <<
	167	hdr_len << " > " << Config.maxReplyHeaderSize);
	168	*error = HTTP_HEADER_TOO_LARGE;
	169	return false;
	170	}
	171
	172	if (!sanityCheckStartLine(buf, error)) // redundant; could be remvoed
	173	return false;
	174
	175	const int res = httpMsgParseStep(buf->content(), eof);
	176
	177	if (res < 0) { // error
	178	debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers " <<
179	"in '" << buf->content() << "'");
180	*error = HTTP_INVALID_HEADER;
181	return false;
182	}
183
184	if (res == 0) {
185	debugs(58, 2, "HttpMsg::parse: strange, need more data near '" <<
186	buf->content() << "'");
187	return false; // but this should not happen due to headersEnd() above
188	}
189
190	assert(res > 0);
191	debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) " <<
192	"near '" << buf->content() << "'");
193
194	if (hdr_sz != (int)hdr_len) {
195	debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
196	hdr_sz << " != " << hdr_len);
197	hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
198	}
199
200	return true;
201	}
202
203
204
205	/*
206	* parses a 0-terminating buffer into HttpMsg.
207	* Returns:
208	* 1 -- success
209	* 0 -- need more data (partial parse)
210	* -1 -- parse error
211	*/
212	int
213	HttpMsg::httpMsgParseStep(const char *buf, int atEnd)
214	{
215	const char *parse_start = buf;
216	const char blk_start, blk_end;
217	const char **parse_end_ptr = &blk_end;
218	assert(parse_start);
219	assert(pstate < psParsed);
8596962e	220
	221	*parse_end_ptr = parse_start;
	222
	223	if (pstate == psReadyToParseStartLine) {
	224	if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end))
	225	return 0;
	226
429f7150	227	if (!parseFirstLine(blk_start, blk_end))
429f7150	228	return httpMsgParseError();
8596962e	229
	230	*parse_end_ptr = parse_start;
	231
	232	hdr_sz = *parse_end_ptr - buf;
	233
	234	++pstate;
	235	}
	236
	237	if (pstate == psReadyToParseHeaders) {
	238	if (!httpMsgIsolateHeaders(&parse_start, &blk_start, &blk_end)) {
	239	if (atEnd)
	240	blk_start = parse_start, blk_end = blk_start + strlen(blk_start);
	241	else
	242	return 0;
	243	}
	244
	245	if (!httpHeaderParse(&header, blk_start, blk_end))
	246	return httpMsgParseError();
	247
07947ad8	248	hdrCacheInit();
8596962e	249
	250	*parse_end_ptr = parse_start;
	251
	252	hdr_sz = *parse_end_ptr - buf;
	253
	254	++pstate;
	255	}
	256
	257	return 1;
	258	}
	259
	260
	261	/* handy: resets and returns -1 */
	262	int
	263	HttpMsg::httpMsgParseError()
	264	{
	265	reset();
	266	/* indicate an error */
	267
	268	if (HttpReply rep = dynamic_cast<HttpReply>(this))
	269	rep->sline.status = HTTP_INVALID_HEADER;
	270
	271	return -1;
	272	}
	273
	274
	275
62e76326	276	/* returns true if connection should be "persistent"
2246b732	277	* after processing this message */
2246b732	278	int
450e0c10	279	httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr)
2246b732	280	{
21b92762	281	#if WHEN_SQUID_IS_NOT_HTTP1_1
21b92762	282
bffee5af	283	if ((http_ver.major >= 1) && (http_ver.minor >= 1)) {
62e76326	284	/*
	285	* for modern versions of HTTP: persistent unless there is
	286	* a "Connection: close" header.
	287	*/
	288	return !httpHeaderHasConnDir(hdr, "close");
8596962e	289	} else
21b92762	290	#else
	291	{
	292	#endif
62e76326	293	/*
	294	* Persistent connections in Netscape 3.x are allegedly broken,
	295	* return false if it is a browser connection. If there is a
	296	* VIA header, then we assume this is NOT a browser connection.
	297	*/
	298	const char *agent = httpHeaderGetStr(hdr, HDR_USER_AGENT);
	299
8596962e	300	if (agent && !httpHeaderHas(hdr, HDR_VIA)) {
	301	if (!strncasecmp(agent, "Mozilla/3.", 10))
	302	return 0;
62e76326	303
8596962e	304	if (!strncasecmp(agent, "Netscape/3.", 11))
8596962e	305	return 0;
2246b732	306	}
8596962e	307
	308	/* for old versions of HTTP: persistent if has "keep-alive" */
	309	return httpHeaderHasConnDir(hdr, "keep-alive");
	310	}
2246b732	311	}
8596962e	312
	313	void HttpMsg::packInto(Packer *p, bool full_uri) const
	314	{
	315	packFirstLineInto(p, full_uri);
	316	httpHeaderPackInto(&header, p);
	317	packerAppend(p, "\r\n", 2);
	318	}
	319
07947ad8	320	void HttpMsg::hdrCacheInit()
	321	{
	322	content_length = httpHeaderGetInt(&header, HDR_CONTENT_LENGTH);
	323	assert(NULL == cache_control);
	324	cache_control = httpHeaderGetCc(&header);
	325	}