[thirdparty/squid.git] / src / HttpMsg.cc

/*
 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

/* DEBUG: section 74    HTTP Message */

#include "squid.h"
#include "Debug.h"
#include "http/one/Parser.h"
#include "HttpHeaderTools.h"
#include "HttpMsg.h"
#include "MemBuf.h"
#include "mime_header.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"

HttpMsg::HttpMsg(http_hdr_owner_type owner):
    http_ver(Http::ProtocolVersion()),
    header(owner),
    cache_control(NULL),
    hdr_sz(0),
    content_length(0),
    pstate(psReadyToParseStartLine),
    sources(0)
{}

HttpMsg::~HttpMsg()
{
    assert(!body_pipe);
}

HttpMsgParseState &operator++ (HttpMsgParseState &aState)
{
    int tmp = (int)aState;
    aState = (HttpMsgParseState)(++tmp);
    return aState;
}

/* find end of headers */
static int
httpMsgIsolateHeaders(const char **parse_start, int l, const char **blk_start, const char **blk_end)
{
    /*
     * parse_start points to the first line of HTTP message *headers*,
     * not including the request or status lines
     */
    size_t end = headersEnd(*parse_start, l);
    int nnl;

    if (end) {
        *blk_start = *parse_start;
        *blk_end = *parse_start + end - 1;
        /*
         * leave blk_end pointing to the first character after the
         * first newline which terminates the headers
         */
        assert(**blk_end == '\n');

        while (*(*blk_end - 1) == '\r')
            --(*blk_end);

        assert(*(*blk_end - 1) == '\n');

        *parse_start += end;

        return 1;
    }

    /*
     * If we didn't find the end of headers, and parse_start does
     * NOT point to a CR or NL character, then return failure
     */
    if (**parse_start != '\r' && **parse_start != '\n')
        return 0;       /* failure */

    /*
     * If we didn't find the end of headers, and parse_start does point
     * to an empty line, then we have empty headers.  Skip all CR and
     * NL characters up to the first NL.  Leave parse_start pointing at
     * the first character after the first NL.
     */
    *blk_start = *parse_start;

    *blk_end = *blk_start;

    for (nnl = 0; nnl == 0; ++(*parse_start)) {
        if (**parse_start == '\r')
            (void) 0;
        else if (**parse_start == '\n')
            ++nnl;
        else
            break;
    }

    return 1;
}

/* find first CRLF */
static int
httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end)
{
    int slen = strcspn(*parse_start, "\r\n");

    if (!(*parse_start)[slen])  /* no CRLF found */
        return 0;

    *blk_start = *parse_start;

    *blk_end = *blk_start + slen;

    while (**blk_end == '\r')   /* CR */
        ++(*blk_end);

    if (**blk_end == '\n')      /* LF */
        ++(*blk_end);

    *parse_start = *blk_end;

    return 1;
}

// negative return is the negated Http::StatusCode error code
// zero return means need more data
// positive return is the size of parsed headers
bool
HttpMsg::parse(const char *buf, const size_t sz, bool eof, Http::StatusCode *error)
{
    assert(error);
    *error = Http::scNone;

    // find the end of headers
    const size_t hdr_len = headersEnd(buf, sz);

    // sanity check the start line to see if this is in fact an HTTP message
    if (!sanityCheckStartLine(buf, hdr_len, error)) {
        // NP: sanityCheck sets *error and sends debug warnings on syntax errors.
        // if we have seen the connection close, this is an error too
        if (eof && *error == Http::scNone)
            *error = Http::scInvalidHeader;

        return false;
    }

    if (hdr_len > Config.maxReplyHeaderSize || (hdr_len <= 0 && sz > Config.maxReplyHeaderSize)) {
        debugs(58, DBG_IMPORTANT, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
        *error = Http::scHeaderTooLarge;
        return false;
    }

    if (hdr_len <= 0) {
        debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf << "'");

        if (eof) // iff we have seen the end, this is an error
            *error = Http::scInvalidHeader;

        return false;
    }

    const int res = httpMsgParseStep(buf, sz, eof);

    if (res < 0) { // error
        debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf << "'");
        *error = Http::scInvalidHeader;
        return false;
    }

    if (res == 0) {
        debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf << "'");
        *error = Http::scInvalidHeader;
        return false; // but this should not happen due to headersEnd() above
    }

    assert(res > 0);
    debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf << "'");

    if (hdr_sz != (int)hdr_len) {
        debugs(58, DBG_IMPORTANT, "internal HttpMsg::parse vs. headersEnd error: " <<
               hdr_sz << " != " << hdr_len);
        hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
    }

    return true;
}

/*
 * parseCharBuf() takes character buffer of HTTP headers (buf),
 * which may not be NULL-terminated, and fills in an HttpMsg
 * structure.  The parameter 'end' specifies the offset to
 * the end of the reply headers.  The caller may know where the
 * end is, but is unable to NULL-terminate the buffer.  This function
 * returns true on success.
 */
bool
HttpMsg::parseCharBuf(const char *buf, ssize_t end)
{
    MemBuf mb;
    int success;
    /* reset current state, because we are not used in incremental fashion */
    reset();
    mb.init();
    mb.append(buf, end);
    mb.terminate();
    success = httpMsgParseStep(mb.buf, mb.size, 0);
    mb.clean();
    return success == 1;
}

/*
 * parses a 0-terminating buffer into HttpMsg.
 * Returns:
 *      1 -- success
 *       0 -- need more data (partial parse)
 *      -1 -- parse error
 */
int
HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
{
    const char *parse_start = buf;
    int parse_len = len;
    const char *blk_start, *blk_end;
    const char **parse_end_ptr = &blk_end;
    assert(parse_start);
    assert(pstate < psParsed);

    *parse_end_ptr = parse_start;

    PROF_start(HttpMsg_httpMsgParseStep);

    if (pstate == psReadyToParseStartLine) {
        if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return 0;
        }

        if (!parseFirstLine(blk_start, blk_end)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return httpMsgParseError();
        }

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;
        parse_len = parse_len - hdr_sz;

        ++pstate;
    }

    /*
     * XXX This code uses parse_start; but if we're incrementally parsing then
     * this code might not actually be given parse_start at the right spot (just
     * after headers.) Grr.
     */
    if (pstate == psReadyToParseHeaders) {
        if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
            if (atEnd) {
                blk_start = parse_start;
                blk_end = blk_start + strlen(blk_start);
            } else {
                PROF_stop(HttpMsg_httpMsgParseStep);
                return 0;
            }
        }

        if (!header.parse(blk_start, blk_end-blk_start)) {
            PROF_stop(HttpMsg_httpMsgParseStep);
            return httpMsgParseError();
        }

        hdrCacheInit();

        *parse_end_ptr = parse_start;

        hdr_sz = *parse_end_ptr - buf;

        ++pstate;
    }

    PROF_stop(HttpMsg_httpMsgParseStep);
    return 1;
}

bool
HttpMsg::parseHeader(Http1::Parser &hp)
{
    // HTTP/1 message contains "zero or more header fields"
    // zero does not need parsing
    // XXX: c_str() reallocates. performance regression.
    if (hp.headerBlockSize() && !header.parse(hp.mimeHeader().c_str(), hp.headerBlockSize())) {
        pstate = psError;
        return false;
    }

    pstate = psParsed;
    hdrCacheInit();
    return true;
}

/* handy: resets and returns -1 */
int
HttpMsg::httpMsgParseError()
{
    reset();
    return -1;
}

void
HttpMsg::setContentLength(int64_t clen)
{
    header.delById(Http::HdrType::CONTENT_LENGTH); // if any
    header.putInt64(Http::HdrType::CONTENT_LENGTH, clen);
    content_length = clen;
}

bool
HttpMsg::persistent() const
{
    if (http_ver > Http::ProtocolVersion(1,0)) {
        /*
         * for modern versions of HTTP: persistent unless there is
         * a "Connection: close" header.
         */
        return !httpHeaderHasConnDir(&header, "close");
    } else {
        /* for old versions of HTTP: persistent if has "keep-alive" */
        return httpHeaderHasConnDir(&header, "keep-alive");
    }
}

void HttpMsg::packInto(Packable *p, bool full_uri) const
{
    packFirstLineInto(p, full_uri);
    header.packInto(p);
    p->append("\r\n", 2);
}

void HttpMsg::hdrCacheInit()
{
    content_length = header.getInt64(Http::HdrType::CONTENT_LENGTH);
    assert(NULL == cache_control);
    cache_control = header.getCc();
}

/*
 * useful for debugging
 */
void HttpMsg::firstLineBuf(MemBuf& mb)
{
    packFirstLineInto(&mb, true);
}
Commit	Line	Data
2246b732	1	/*
ef57eb7b	2	* Copyright (C) 1996-2016 The Squid Software Foundation and contributors
e25c139f	3	*
bbc27441 AJ	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
2246b732	7	*/
2246b732	8
bbc27441 AJ	9	/* DEBUG: section 74 HTTP Message */
bbc27441 AJ	10
582c2af2 FC	11	#include "squid.h"
582c2af2 FC	12	#include "Debug.h"
af2980f3	13	#include "http/one/Parser.h"
a5bac1d2	14	#include "HttpHeaderTools.h"
8596962e	15	#include "HttpMsg.h"
0eb49b6d	16	#include "MemBuf.h"
b6149797	17	#include "mime_header.h"
582c2af2	18	#include "profiler/Profiler.h"
4d5904f7	19	#include "SquidConfig.h"
8596962e	20
ce867f0a	21	HttpMsg::HttpMsg(http_hdr_owner_type owner):
f53969cc SM	22	http_ver(Http::ProtocolVersion()),
	23	header(owner),
	24	cache_control(NULL),
	25	hdr_sz(0),
	26	content_length(0),
88df846b CT	27	pstate(psReadyToParseStartLine),
88df846b CT	28	sources(0)
8596962e	29	{}
8596962e	30
4a56ee8d	31	HttpMsg::~HttpMsg()
4a56ee8d	32	{
5f8252d2	33	assert(!body_pipe);
4a56ee8d	34	}
4a56ee8d	35
8596962e	36	HttpMsgParseState &operator++ (HttpMsgParseState &aState)
	37	{
	38	int tmp = (int)aState;
	39	aState = (HttpMsgParseState)(++tmp);
	40	return aState;
	41	}
	42
2246b732	43	/* find end of headers */
784619e6	44	static int
666f514b	45	httpMsgIsolateHeaders(const char parse_start, int l, const char blk_start, const char **blk_end)
2246b732	46	{
bdb1a5d5	47	/*
	48	* parse_start points to the first line of HTTP message headers,
	49	* not including the request or status lines
	50	*/
bdb1a5d5	51	size_t end = headersEnd(*parse_start, l);
bdb1a5d5	52	int nnl;
62e76326	53
2246b732	54	if (end) {
62e76326	55	blk_start = parse_start;
	56	blk_end = parse_start + end - 1;
	57	/*
	58	* leave blk_end pointing to the first character after the
	59	* first newline which terminates the headers
	60	*/
	61	assert(**blk_end == '\n');
	62
	63	while ((blk_end - 1) == '\r')
5e263176	64	--(*blk_end);
62e76326	65
	66	assert((blk_end - 1) == '\n');
	67
	68	*parse_start += end;
	69
	70	return 1;
2246b732	71	}
62e76326	72
bdb1a5d5	73	/*
	74	* If we didn't find the end of headers, and parse_start does
	75	* NOT point to a CR or NL character, then return failure
	76	*/
	77	if (parse_start != '\r' && parse_start != '\n')
f53969cc	78	return 0; /* failure */
62e76326	79
bdb1a5d5	80	/*
	81	* If we didn't find the end of headers, and parse_start does point
	82	* to an empty line, then we have empty headers. Skip all CR and
	83	* NL characters up to the first NL. Leave parse_start pointing at
	84	* the first character after the first NL.
	85	*/
	86	blk_start = parse_start;
62e76326	87
bdb1a5d5	88	blk_end = blk_start;
62e76326	89
95dc7ff4	90	for (nnl = 0; nnl == 0; ++(*parse_start)) {
62e76326	91	if (**parse_start == '\r')
	92	(void) 0;
	93	else if (**parse_start == '\n')
95dc7ff4	94	++nnl;
62e76326	95	else
62e76326	96	break;
2246b732	97	}
62e76326	98
bdb1a5d5	99	return 1;
2246b732	100	}
2246b732	101
8596962e	102	/* find first CRLF */
	103	static int
	104	httpMsgIsolateStart(const char parse_start, const char blk_start, const char **blk_end)
	105	{
	106	int slen = strcspn(*parse_start, "\r\n");
	107
	108	if (!(parse_start)[slen]) / no CRLF found */
	109	return 0;
	110
	111	blk_start = parse_start;
	112
	113	blk_end = blk_start + slen;
	114
	115	while (*blk_end == '\r') / CR */
95dc7ff4	116	++(*blk_end);
8596962e	117
8596962e	118	if (*blk_end == '\n') / LF */
95dc7ff4	119	++(*blk_end);
8596962e	120
	121	parse_start = blk_end;
	122
	123	return 1;
	124	}
	125
955394ce	126	// negative return is the negated Http::StatusCode error code
8596962e	127	// zero return means need more data
8596962e	128	// positive return is the size of parsed headers
955394ce	129	bool
84ae6223	130	HttpMsg::parse(const char buf, const size_t sz, bool eof, Http::StatusCode error)
8596962e	131	{
8596962e	132	assert(error);
955394ce	133	*error = Http::scNone;
8596962e	134
8596962e	135	// find the end of headers
84ae6223	136	const size_t hdr_len = headersEnd(buf, sz);
8596962e	137
96ee497f AJ	138	// sanity check the start line to see if this is in fact an HTTP message
96ee497f AJ	139	if (!sanityCheckStartLine(buf, hdr_len, error)) {
281832c6 AJ	140	// NP: sanityCheck sets *error and sends debug warnings on syntax errors.
281832c6 AJ	141	// if we have seen the connection close, this is an error too
955394ce AJ	142	if (eof && *error == Http::scNone)
955394ce AJ	143	*error = Http::scInvalidHeader;
281832c6	144
96ee497f AJ	145	return false;
	146	}
	147
84ae6223	148	if (hdr_len > Config.maxReplyHeaderSize \|\| (hdr_len <= 0 && sz > Config.maxReplyHeaderSize)) {
e0236918	149	debugs(58, DBG_IMPORTANT, "HttpMsg::parse: Too large reply header (" << hdr_len << " > " << Config.maxReplyHeaderSize);
955394ce	150	*error = Http::scHeaderTooLarge;
c81e4de5	151	return false;
	152	}
	153
8596962e	154	if (hdr_len <= 0) {
84ae6223	155	debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof << ") in '" << buf << "'");
8596962e	156
8596962e	157	if (eof) // iff we have seen the end, this is an error
955394ce	158	*error = Http::scInvalidHeader;
8596962e	159
	160	return false;
	161	}
	162
84ae6223	163	const int res = httpMsgParseStep(buf, sz, eof);
8596962e	164
8596962e	165	if (res < 0) { // error
84ae6223	166	debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf << "'");
955394ce	167	*error = Http::scInvalidHeader;
8596962e	168	return false;
	169	}
	170
	171	if (res == 0) {
84ae6223	172	debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf << "'");
955394ce	173	*error = Http::scInvalidHeader;
8596962e	174	return false; // but this should not happen due to headersEnd() above
	175	}
	176
	177	assert(res > 0);
84ae6223	178	debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) near '" << buf << "'");
8596962e	179
8596962e	180	if (hdr_sz != (int)hdr_len) {
e0236918	181	debugs(58, DBG_IMPORTANT, "internal HttpMsg::parse vs. headersEnd error: " <<
8596962e	182	hdr_sz << " != " << hdr_len);
	183	hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len
	184	}
	185
	186	return true;
	187	}
	188
59eed7dc	189	/*
bf9fb8ff	190	* parseCharBuf() takes character buffer of HTTP headers (buf),
59eed7dc	191	* which may not be NULL-terminated, and fills in an HttpMsg
	192	* structure. The parameter 'end' specifies the offset to
	193	* the end of the reply headers. The caller may know where the
	194	* end is, but is unable to NULL-terminate the buffer. This function
	195	* returns true on success.
	196	*/
	197	bool
	198	HttpMsg::parseCharBuf(const char *buf, ssize_t end)
	199	{
	200	MemBuf mb;
	201	int success;
	202	/* reset current state, because we are not used in incremental fashion */
	203	reset();
	204	mb.init();
	205	mb.append(buf, end);
	206	mb.terminate();
666f514b	207	success = httpMsgParseStep(mb.buf, mb.size, 0);
59eed7dc	208	mb.clean();
	209	return success == 1;
	210	}
8596962e	211
	212	/*
	213	* parses a 0-terminating buffer into HttpMsg.
	214	* Returns:
	215	* 1 -- success
	216	* 0 -- need more data (partial parse)
	217	* -1 -- parse error
	218	*/
	219	int
666f514b	220	HttpMsg::httpMsgParseStep(const char *buf, int len, int atEnd)
8596962e	221	{
8596962e	222	const char *parse_start = buf;
666f514b	223	int parse_len = len;
8596962e	224	const char blk_start, blk_end;
	225	const char **parse_end_ptr = &blk_end;
	226	assert(parse_start);
	227	assert(pstate < psParsed);
8596962e	228
	229	*parse_end_ptr = parse_start;
	230
9ea37c79	231	PROF_start(HttpMsg_httpMsgParseStep);
9ea37c79	232
8596962e	233	if (pstate == psReadyToParseStartLine) {
9ea37c79	234	if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) {
137e94fd AJ	235	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	236	return 0;
26ac0430	237	}
8596962e	238
9ea37c79	239	if (!parseFirstLine(blk_start, blk_end)) {
137e94fd AJ	240	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	241	return httpMsgParseError();
26ac0430	242	}
8596962e	243
	244	*parse_end_ptr = parse_start;
	245
	246	hdr_sz = *parse_end_ptr - buf;
26ac0430	247	parse_len = parse_len - hdr_sz;
8596962e	248
	249	++pstate;
	250	}
	251
666f514b	252	/*
	253	* XXX This code uses parse_start; but if we're incrementally parsing then
	254	* this code might not actually be given parse_start at the right spot (just
	255	* after headers.) Grr.
	256	*/
8596962e	257	if (pstate == psReadyToParseHeaders) {
666f514b	258	if (!httpMsgIsolateHeaders(&parse_start, parse_len, &blk_start, &blk_end)) {
9ea37c79	259	if (atEnd) {
784619e6 AJ	260	blk_start = parse_start;
784619e6 AJ	261	blk_end = blk_start + strlen(blk_start);
26ac0430	262	} else {
137e94fd AJ	263	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd AJ	264	return 0;
9ea37c79	265	}
8596962e	266	}
8596962e	267
784619e6	268	if (!header.parse(blk_start, blk_end-blk_start)) {
137e94fd	269	PROF_stop(HttpMsg_httpMsgParseStep);
8596962e	270	return httpMsgParseError();
137e94fd	271	}
8596962e	272
07947ad8	273	hdrCacheInit();
8596962e	274
	275	*parse_end_ptr = parse_start;
	276
	277	hdr_sz = *parse_end_ptr - buf;
	278
	279	++pstate;
	280	}
137e94fd	281
9ea37c79	282	PROF_stop(HttpMsg_httpMsgParseStep);
137e94fd	283	return 1;
8596962e	284	}
8596962e	285
af2980f3 AJ	286	bool
	287	HttpMsg::parseHeader(Http1::Parser &hp)
	288	{
	289	// HTTP/1 message contains "zero or more header fields"
	290	// zero does not need parsing
af2980f3	291	// XXX: c_str() reallocates. performance regression.
563afef6 AR	292	if (hp.headerBlockSize() && !header.parse(hp.mimeHeader().c_str(), hp.headerBlockSize())) {
	293	pstate = psError;
	294	return false;
af2980f3 AJ	295	}
af2980f3 AJ	296
563afef6 AR	297	pstate = psParsed;
	298	hdrCacheInit();
	299	return true;
af2980f3 AJ	300	}
af2980f3 AJ	301
8596962e	302	/* handy: resets and returns -1 */
	303	int
	304	HttpMsg::httpMsgParseError()
	305	{
	306	reset();
8596962e	307	return -1;
	308	}
	309
3ff65596 AR	310	void
	311	HttpMsg::setContentLength(int64_t clen)
	312	{
789217a2 FC	313	header.delById(Http::HdrType::CONTENT_LENGTH); // if any
789217a2 FC	314	header.putInt64(Http::HdrType::CONTENT_LENGTH, clen);
3ff65596 AR	315	content_length = clen;
	316	}
	317
4a1acc56 AJ	318	bool
4a1acc56 AJ	319	HttpMsg::persistent() const
2246b732	320	{
2592bc70	321	if (http_ver > Http::ProtocolVersion(1,0)) {
62e76326	322	/*
	323	* for modern versions of HTTP: persistent unless there is
	324	* a "Connection: close" header.
	325	*/
ef84c0fb	326	return !httpHeaderHasConnDir(&header, "close");
3872be7c	327	} else {
3872be7c	328	/* for old versions of HTTP: persistent if has "keep-alive" */
ef84c0fb	329	return httpHeaderHasConnDir(&header, "keep-alive");
3872be7c	330	}
2246b732	331	}
8596962e	332
17802cf1	333	void HttpMsg::packInto(Packable *p, bool full_uri) const
8596962e	334	{
8596962e	335	packFirstLineInto(p, full_uri);
a9925b40	336	header.packInto(p);
785b508d	337	p->append("\r\n", 2);
8596962e	338	}
8596962e	339
07947ad8	340	void HttpMsg::hdrCacheInit()
07947ad8	341	{
789217a2	342	content_length = header.getInt64(Http::HdrType::CONTENT_LENGTH);
07947ad8	343	assert(NULL == cache_control);
a9925b40	344	cache_control = header.getCc();
07947ad8	345	}
3cfc19b3	346
	347	/*
	348	* useful for debugging
	349	*/
	350	void HttpMsg::firstLineBuf(MemBuf& mb)
	351	{
10201568	352	packFirstLineInto(&mb, true);
3cfc19b3	353	}
f53969cc	354