[thirdparty/squid.git] / src / ChunkedCodingParser.cc

/*
 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

#include "squid.h"
#include "base/TextException.h"
#include "ChunkedCodingParser.h"
#include "Debug.h"
#include "MemBuf.h"
#include "Parsing.h"

ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;

ChunkedCodingParser::ChunkedCodingParser()
{
    reset();
}

void ChunkedCodingParser::reset()
{
    theStep = psChunkSize;
    theChunkSize = theLeftBodySize = 0;
    doNeedMoreData = false;
    theIn = theOut = NULL;
    useOriginBody = -1;
    inQuoted = inSlashed = false;
}

bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
{
    Must(rawData && parsedContent);
    theIn = rawData;
    theOut = parsedContent;

    // we must reset this all the time so that mayContinue() lets us
    // output more content if we stopped due to needsMoreSpace() before
    doNeedMoreData = !theIn->hasContent();

    while (mayContinue()) {
        (this->*theStep)();
    }

    return theStep == psMessageEnd;
}

bool ChunkedCodingParser::needsMoreData() const
{
    return doNeedMoreData;
}

bool ChunkedCodingParser::needsMoreSpace() const
{
    assert(theOut);
    return theStep == psChunkBody && !theOut->hasPotentialSpace();
}

bool ChunkedCodingParser::mayContinue() const
{
    return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
}

void ChunkedCodingParser::parseChunkSize()
{
    Must(theChunkSize <= 0); // Should(), really

    const char *p = theIn->content();
    while (p < theIn->space() && xisxdigit(*p)) ++p;
    if (p >= theIn->space()) {
        doNeedMoreData = true;
        return;
    }

    int64_t size = -1;
    if (StringToInt64(theIn->content(), size, &p, 16)) {
        if (size < 0)
            throw TexcHere("negative chunk size");

        theChunkSize = theLeftBodySize = size;
        debugs(94,7, "found chunk: " << theChunkSize);
        // parse chunk extensions only in the last-chunk
        if (theChunkSize)
            theStep = psUnusedChunkExtension;
        else {
            theIn->consume(p - theIn->content());
            theStep = psLastChunkExtension;
        }
    } else
        throw TexcHere("corrupted chunk size");
}

void ChunkedCodingParser::parseUnusedChunkExtension()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;
    if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
        inQuoted = inSlashed = false;
        theIn->consume(crlfEnd);
        theStep = theChunkSize ? psChunkBody : psTrailer;
    } else {
        theIn->consume(theIn->contentSize());
        doNeedMoreData = true;
    }
}

void ChunkedCodingParser::parseChunkBody()
{
    Must(theLeftBodySize > 0); // Should, really

    const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
    const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());

    doNeedMoreData = availSize < theLeftBodySize;
    // and we may also need more space

    theOut->append(theIn->content(), safeSize);
    theIn->consume(safeSize);
    theLeftBodySize -= safeSize;

    if (theLeftBodySize == 0)
        theStep = psChunkEnd;
    else
        Must(needsMoreData() || needsMoreSpace());
}

void ChunkedCodingParser::parseChunkEnd()
{
    Must(theLeftBodySize == 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg != 0) {
            throw TexcHere("found data between chunk end and CRLF");
            return;
        }

        theIn->consume(crlfEnd);
        theChunkSize = 0; // done with the current chunk
        theStep = psChunkSize;
        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseTrailer()
{
    Must(theChunkSize == 0); // Should(), really

    while (mayContinue())
        parseTrailerHeader();
}

void ChunkedCodingParser::parseTrailerHeader()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {

#if TRAILERS_ARE_SUPPORTED
        if (crlfBeg > 0)
            theTrailer.append(theIn->content(), crlfEnd);
#endif

        theIn->consume(crlfEnd);

        if (crlfBeg == 0)
            theStep = psMessageEnd;

        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseMessageEnd()
{
    // termination step, should not be called
    Must(false); // Should(), really
}

/// Finds next CRLF. Does not store parsing state.
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
{
    bool quoted = false;
    bool slashed = false;
    return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
}

/// Finds next CRLF. Parsing state stored in quoted and slashed
/// parameters. Incremental: can resume when more data is available.
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
{
    // XXX: This code was copied, with permission, from another software.
    // There is a similar and probably better code inside httpHeaderParse
    // but it seems difficult to isolate due to parsing-unrelated bloat.
    // Such isolation should probably be done before this class is used
    // for handling of traffic "more external" than ICAP.

    const char *buf = theIn->content();
    size_t size = theIn->contentSize();

    ssize_t crOff = -1;

    for (size_t i = 0; i < size; ++i) {
        if (slashed) {
            slashed = false;
            continue;
        }

        const char c = buf[i];

        // handle quoted strings
        if (quoted) {
            if (c == '\\')
                slashed = true;
            else if (c == '"')
                quoted = false;

            continue;
        } else if (c == '"') {
            quoted = true;
            crOff = -1;
            continue;
        }

        if (crOff < 0) { // looking for the first CR or LF

            if (c == '\n') {
                crlfBeg = i;
                crlfEnd = ++i;
                return true;
            }

            if (c == '\r')
                crOff = i;
        } else { // skipping CRs, looking for the first LF

            if (c == '\n') {
                crlfBeg = crOff;
                crlfEnd = ++i;
                return true;
            }

            if (c != '\r')
                crOff = -1;
        }
    }

    return false;
}

// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
void ChunkedCodingParser::parseLastChunkExtension()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (!findCrlf(crlfBeg, crlfEnd)) {
        doNeedMoreData = true;
        return;
    }

    const char *const startExt = theIn->content();
    const char *const endExt = theIn->content() + crlfBeg;

    // chunk-extension starts at startExt and ends with LF at endEx
    for (const char *p = startExt; p < endExt;) {

        while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'

        if (*p++ != ';') // each ext name=value pair is preceded with ';'
            break;

        while (*p == ' ' || *p == '\t') ++p; // skip spaces before name

        if (p >= endExt)
            break; // malformed extension: ';' without ext name=value pair

        const int extSize = endExt - p;
        // TODO: we need debugData() stream manipulator to dump data
        debugs(94,7, "Found chunk extension; size=" << extSize);

        // TODO: support implied *LWS around '='
        if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
            (void)StringToInt64(p+18, useOriginBody, &p, 10);
            debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
            break; // remove to support more than just use-original-body
        } else {
            debugs(94, 5, HERE << "skipping unknown chunk extension");
            // TODO: support quoted-string chunk-ext-val
            while (p < endExt && *p != ';') ++p; // skip until the next ';'
        }
    }

    theIn->consume(crlfEnd);
    theStep = theChunkSize ? psChunkBody : psTrailer;
}
Commit	Line	Data
bbc27441	1	/*
bde978a6	2	* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
bbc27441 AJ	3	*
	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
	7	*/
	8
582c2af2	9	#include "squid.h"
3d93a84d	10	#include "base/TextException.h"
774c051c	11	#include "ChunkedCodingParser.h"
602d9612	12	#include "Debug.h"
774c051c	13	#include "MemBuf.h"
602d9612	14	#include "Parsing.h"
774c051c	15
5c550f5f AR	16	ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
	17	ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
	18	ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
774c051c	19	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
	20	ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
	21	ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
	22	ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
	23
	24	ChunkedCodingParser::ChunkedCodingParser()
	25	{
	26	reset();
	27	}
	28
	29	void ChunkedCodingParser::reset()
	30	{
5c550f5f	31	theStep = psChunkSize;
774c051c	32	theChunkSize = theLeftBodySize = 0;
774c051c	33	doNeedMoreData = false;
774c051c	34	theIn = theOut = NULL;
83c51da9	35	useOriginBody = -1;
5c550f5f	36	inQuoted = inSlashed = false;
774c051c	37	}
	38
	39	bool ChunkedCodingParser::parse(MemBuf rawData, MemBuf parsedContent)
	40	{
	41	Must(rawData && parsedContent);
	42	theIn = rawData;
	43	theOut = parsedContent;
	44
	45	// we must reset this all the time so that mayContinue() lets us
	46	// output more content if we stopped due to needsMoreSpace() before
	47	doNeedMoreData = !theIn->hasContent();
	48
	49	while (mayContinue()) {
	50	(this->*theStep)();
	51	}
	52
	53	return theStep == psMessageEnd;
	54	}
	55
	56	bool ChunkedCodingParser::needsMoreData() const
	57	{
	58	return doNeedMoreData;
	59	}
	60
	61	bool ChunkedCodingParser::needsMoreSpace() const
	62	{
	63	assert(theOut);
	64	return theStep == psChunkBody && !theOut->hasPotentialSpace();
	65	}
	66
	67	bool ChunkedCodingParser::mayContinue() const
	68	{
	69	return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
	70	}
	71
5c550f5f	72	void ChunkedCodingParser::parseChunkSize()
774c051c	73	{
	74	Must(theChunkSize <= 0); // Should(), really
	75
5c550f5f AR	76	const char *p = theIn->content();
	77	while (p < theIn->space() && xisxdigit(*p)) ++p;
	78	if (p >= theIn->space()) {
	79	doNeedMoreData = true;
	80	return;
	81	}
83c51da9	82
5c550f5f AR	83	int64_t size = -1;
	84	if (StringToInt64(theIn->content(), size, &p, 16)) {
	85	if (size < 0)
	86	throw TexcHere("negative chunk size");
	87
	88	theChunkSize = theLeftBodySize = size;
	89	debugs(94,7, "found chunk: " << theChunkSize);
	90	// parse chunk extensions only in the last-chunk
	91	if (theChunkSize)
	92	theStep = psUnusedChunkExtension;
	93	else {
	94	theIn->consume(p - theIn->content());
	95	theStep = psLastChunkExtension;
774c051c	96	}
5c550f5f	97	} else
774c051c	98	throw TexcHere("corrupted chunk size");
5c550f5f	99	}
774c051c	100
5c550f5f AR	101	void ChunkedCodingParser::parseUnusedChunkExtension()
	102	{
	103	size_t crlfBeg = 0;
	104	size_t crlfEnd = 0;
	105	if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
	106	inQuoted = inSlashed = false;
	107	theIn->consume(crlfEnd);
	108	theStep = theChunkSize ? psChunkBody : psTrailer;
	109	} else {
	110	theIn->consume(theIn->contentSize());
	111	doNeedMoreData = true;
	112	}
774c051c	113	}
	114
	115	void ChunkedCodingParser::parseChunkBody()
	116	{
	117	Must(theLeftBodySize > 0); // Should, really
	118
d85c3078 AJ	119	const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
d85c3078 AJ	120	const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
774c051c	121
	122	doNeedMoreData = availSize < theLeftBodySize;
	123	// and we may also need more space
	124
	125	theOut->append(theIn->content(), safeSize);
	126	theIn->consume(safeSize);
	127	theLeftBodySize -= safeSize;
	128
	129	if (theLeftBodySize == 0)
	130	theStep = psChunkEnd;
	131	else
	132	Must(needsMoreData() \|\| needsMoreSpace());
	133	}
	134
	135	void ChunkedCodingParser::parseChunkEnd()
	136	{
	137	Must(theLeftBodySize == 0); // Should(), really
	138
	139	size_t crlfBeg = 0;
	140	size_t crlfEnd = 0;
	141
	142	if (findCrlf(crlfBeg, crlfEnd)) {
	143	if (crlfBeg != 0) {
5e956603	144	throw TexcHere("found data between chunk end and CRLF");
774c051c	145	return;
	146	}
	147
	148	theIn->consume(crlfEnd);
	149	theChunkSize = 0; // done with the current chunk
5c550f5f	150	theStep = psChunkSize;
774c051c	151	return;
	152	}
	153
	154	doNeedMoreData = true;
	155	}
	156
	157	void ChunkedCodingParser::parseTrailer()
	158	{
	159	Must(theChunkSize == 0); // Should(), really
	160
	161	while (mayContinue())
	162	parseTrailerHeader();
	163	}
	164
	165	void ChunkedCodingParser::parseTrailerHeader()
	166	{
	167	size_t crlfBeg = 0;
	168	size_t crlfEnd = 0;
	169
	170	if (findCrlf(crlfBeg, crlfEnd)) {
774c051c	171
d7e8bdf7 AR	172	#if TRAILERS_ARE_SUPPORTED
	173	if (crlfBeg > 0)
	174	theTrailer.append(theIn->content(), crlfEnd);
d8b258a9	175	#endif
774c051c	176
	177	theIn->consume(crlfEnd);
	178
	179	if (crlfBeg == 0)
	180	theStep = psMessageEnd;
	181
	182	return;
	183	}
	184
	185	doNeedMoreData = true;
	186	}
	187
	188	void ChunkedCodingParser::parseMessageEnd()
	189	{
	190	// termination step, should not be called
	191	Must(false); // Should(), really
	192	}
	193
5c550f5f	194	/// Finds next CRLF. Does not store parsing state.
774c051c	195	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
5c550f5f AR	196	{
	197	bool quoted = false;
	198	bool slashed = false;
	199	return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
	200	}
	201
	202	/// Finds next CRLF. Parsing state stored in quoted and slashed
	203	/// parameters. Incremental: can resume when more data is available.
	204	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
774c051c	205	{
	206	// XXX: This code was copied, with permission, from another software.
	207	// There is a similar and probably better code inside httpHeaderParse
	208	// but it seems difficult to isolate due to parsing-unrelated bloat.
	209	// Such isolation should probably be done before this class is used
	210	// for handling of traffic "more external" than ICAP.
	211
	212	const char *buf = theIn->content();
	213	size_t size = theIn->contentSize();
	214
	215	ssize_t crOff = -1;
774c051c	216
	217	for (size_t i = 0; i < size; ++i) {
	218	if (slashed) {
	219	slashed = false;
	220	continue;
	221	}
	222
	223	const char c = buf[i];
	224
	225	// handle quoted strings
	226	if (quoted) {
	227	if (c == '\\')
	228	slashed = true;
e1381638 AJ	229	else if (c == '"')
e1381638 AJ	230	quoted = false;
774c051c	231
774c051c	232	continue;
e1381638 AJ	233	} else if (c == '"') {
	234	quoted = true;
	235	crOff = -1;
	236	continue;
	237	}
774c051c	238
	239	if (crOff < 0) { // looking for the first CR or LF
	240
	241	if (c == '\n') {
	242	crlfBeg = i;
	243	crlfEnd = ++i;
	244	return true;
	245	}
	246
	247	if (c == '\r')
	248	crOff = i;
	249	} else { // skipping CRs, looking for the first LF
	250
	251	if (c == '\n') {
	252	crlfBeg = crOff;
	253	crlfEnd = ++i;
	254	return true;
	255	}
	256
	257	if (c != '\r')
	258	crOff = -1;
	259	}
	260	}
	261
	262	return false;
	263	}
	264
83c51da9	265	// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
5c550f5f	266	void ChunkedCodingParser::parseLastChunkExtension()
83c51da9	267	{
5c550f5f AR	268	size_t crlfBeg = 0;
	269	size_t crlfEnd = 0;
	270
	271	if (!findCrlf(crlfBeg, crlfEnd)) {
	272	doNeedMoreData = true;
	273	return;
	274	}
	275
	276	const char *const startExt = theIn->content();
	277	const char *const endExt = theIn->content() + crlfBeg;
	278
83c51da9 CT	279	// chunk-extension starts at startExt and ends with LF at endEx
	280	for (const char *p = startExt; p < endExt;) {
	281
	282	while (p == ' ' \|\| p == '\t') ++p; // skip spaces before ';'
7ddcfbab	283
83c51da9	284	if (*p++ != ';') // each ext name=value pair is preceded with ';'
5c550f5f	285	break;
7ddcfbab	286
83c51da9	287	while (p == ' ' \|\| p == '\t') ++p; // skip spaces before name
7ddcfbab	288
83c51da9	289	if (p >= endExt)
5c550f5f	290	break; // malformed extension: ';' without ext name=value pair
83c51da9 CT	291
	292	const int extSize = endExt - p;
	293	// TODO: we need debugData() stream manipulator to dump data
	294	debugs(94,7, "Found chunk extension; size=" << extSize);
	295
	296	// TODO: support implied *LWS around '='
	297	if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
	298	(void)StringToInt64(p+18, useOriginBody, &p, 10);
	299	debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
5c550f5f	300	break; // remove to support more than just use-original-body
83c51da9 CT	301	} else {
	302	debugs(94, 5, HERE << "skipping unknown chunk extension");
	303	// TODO: support quoted-string chunk-ext-val
	304	while (p < endExt && *p != ';') ++p; // skip until the next ';'
	305	}
	306	}
5c550f5f AR	307
	308	theIn->consume(crlfEnd);
	309	theStep = theChunkSize ? psChunkBody : psTrailer;
83c51da9	310	}
f53969cc	311