[thirdparty/squid.git] / src / ChunkedCodingParser.cc

#include "squid.h"
#include "base/TextException.h"
#include "ChunkedCodingParser.h"
#include "Debug.h"
#include "MemBuf.h"
#include "Parsing.h"

ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;

ChunkedCodingParser::ChunkedCodingParser()
{
    reset();
}

void ChunkedCodingParser::reset()
{
    theStep = psChunkSize;
    theChunkSize = theLeftBodySize = 0;
    doNeedMoreData = false;
    theIn = theOut = NULL;
    useOriginBody = -1;
    inQuoted = inSlashed = false;
}

bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
{
    Must(rawData && parsedContent);
    theIn = rawData;
    theOut = parsedContent;

    // we must reset this all the time so that mayContinue() lets us
    // output more content if we stopped due to needsMoreSpace() before
    doNeedMoreData = !theIn->hasContent();

    while (mayContinue()) {
        (this->*theStep)();
    }

    return theStep == psMessageEnd;
}

bool ChunkedCodingParser::needsMoreData() const
{
    return doNeedMoreData;
}

bool ChunkedCodingParser::needsMoreSpace() const
{
    assert(theOut);
    return theStep == psChunkBody && !theOut->hasPotentialSpace();
}

bool ChunkedCodingParser::mayContinue() const
{
    return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
}

void ChunkedCodingParser::parseChunkSize()
{
    Must(theChunkSize <= 0); // Should(), really

    const char *p = theIn->content();
    while (p < theIn->space() && xisxdigit(*p)) ++p;
    if (p >= theIn->space()) {
        doNeedMoreData = true;
        return;
    }

    int64_t size = -1;
    if (StringToInt64(theIn->content(), size, &p, 16)) {
        if (size < 0)
            throw TexcHere("negative chunk size");

        theChunkSize = theLeftBodySize = size;
        debugs(94,7, "found chunk: " << theChunkSize);
        // parse chunk extensions only in the last-chunk
        if (theChunkSize)
            theStep = psUnusedChunkExtension;
        else {
            theIn->consume(p - theIn->content());
            theStep = psLastChunkExtension;
        }
    } else
        throw TexcHere("corrupted chunk size");
}

void ChunkedCodingParser::parseUnusedChunkExtension()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;
    if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
        inQuoted = inSlashed = false;
        theIn->consume(crlfEnd);
        theStep = theChunkSize ? psChunkBody : psTrailer;
    } else {
        theIn->consume(theIn->contentSize());
        doNeedMoreData = true;
    }
}

void ChunkedCodingParser::parseChunkBody()
{
    Must(theLeftBodySize > 0); // Should, really

    const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
    const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());

    doNeedMoreData = availSize < theLeftBodySize;
    // and we may also need more space

    theOut->append(theIn->content(), safeSize);
    theIn->consume(safeSize);
    theLeftBodySize -= safeSize;

    if (theLeftBodySize == 0)
        theStep = psChunkEnd;
    else
        Must(needsMoreData() || needsMoreSpace());
}

void ChunkedCodingParser::parseChunkEnd()
{
    Must(theLeftBodySize == 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg != 0) {
            throw TexcHere("found data between chunk end and CRLF");
            return;
        }

        theIn->consume(crlfEnd);
        theChunkSize = 0; // done with the current chunk
        theStep = psChunkSize;
        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseTrailer()
{
    Must(theChunkSize == 0); // Should(), really

    while (mayContinue())
        parseTrailerHeader();
}

void ChunkedCodingParser::parseTrailerHeader()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {

#if TRAILERS_ARE_SUPPORTED
        if (crlfBeg > 0)
            theTrailer.append(theIn->content(), crlfEnd);
#endif

        theIn->consume(crlfEnd);

        if (crlfBeg == 0)
            theStep = psMessageEnd;

        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseMessageEnd()
{
    // termination step, should not be called
    Must(false); // Should(), really
}

/// Finds next CRLF. Does not store parsing state.
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
{
    bool quoted = false;
    bool slashed = false;
    return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
}

/// Finds next CRLF. Parsing state stored in quoted and slashed
/// parameters. Incremental: can resume when more data is available.
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
{
    // XXX: This code was copied, with permission, from another software.
    // There is a similar and probably better code inside httpHeaderParse
    // but it seems difficult to isolate due to parsing-unrelated bloat.
    // Such isolation should probably be done before this class is used
    // for handling of traffic "more external" than ICAP.

    const char *buf = theIn->content();
    size_t size = theIn->contentSize();

    ssize_t crOff = -1;

    for (size_t i = 0; i < size; ++i) {
        if (slashed) {
            slashed = false;
            continue;
        }

        const char c = buf[i];

        // handle quoted strings
        if (quoted) {
            if (c == '\\')
                slashed = true;
            else if (c == '"')
                quoted = false;

            continue;
        } else if (c == '"') {
            quoted = true;
            crOff = -1;
            continue;
        }

        if (crOff < 0) { // looking for the first CR or LF

            if (c == '\n') {
                crlfBeg = i;
                crlfEnd = ++i;
                return true;
            }

            if (c == '\r')
                crOff = i;
        } else { // skipping CRs, looking for the first LF

            if (c == '\n') {
                crlfBeg = crOff;
                crlfEnd = ++i;
                return true;
            }

            if (c != '\r')
                crOff = -1;
        }
    }

    return false;
}

// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
void ChunkedCodingParser::parseLastChunkExtension()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (!findCrlf(crlfBeg, crlfEnd)) {
        doNeedMoreData = true;
        return;
    }

    const char *const startExt = theIn->content();
    const char *const endExt = theIn->content() + crlfBeg;

    // chunk-extension starts at startExt and ends with LF at endEx
    for (const char *p = startExt; p < endExt;) {

        while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'

        if (*p++ != ';') // each ext name=value pair is preceded with ';'
            break;

        while (*p == ' ' || *p == '\t') ++p; // skip spaces before name

        if (p >= endExt)
            break; // malformed extension: ';' without ext name=value pair

        const int extSize = endExt - p;
        // TODO: we need debugData() stream manipulator to dump data
        debugs(94,7, "Found chunk extension; size=" << extSize);

        // TODO: support implied *LWS around '='
        if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
            (void)StringToInt64(p+18, useOriginBody, &p, 10);
            debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
            break; // remove to support more than just use-original-body
        } else {
            debugs(94, 5, HERE << "skipping unknown chunk extension");
            // TODO: support quoted-string chunk-ext-val
            while (p < endExt && *p != ';') ++p; // skip until the next ';'
        }
    }

    theIn->consume(crlfEnd);
    theStep = theChunkSize ? psChunkBody : psTrailer;
}
Commit	Line	Data
582c2af2	1	#include "squid.h"
3d93a84d	2	#include "base/TextException.h"
774c051c	3	#include "ChunkedCodingParser.h"
602d9612	4	#include "Debug.h"
774c051c	5	#include "MemBuf.h"
602d9612	6	#include "Parsing.h"
774c051c	7
5c550f5f AR	8	ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
	9	ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
	10	ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
774c051c	11	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
	12	ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
	13	ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
	14	ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
	15
	16	ChunkedCodingParser::ChunkedCodingParser()
	17	{
	18	reset();
	19	}
	20
	21	void ChunkedCodingParser::reset()
	22	{
5c550f5f	23	theStep = psChunkSize;
774c051c	24	theChunkSize = theLeftBodySize = 0;
774c051c	25	doNeedMoreData = false;
774c051c	26	theIn = theOut = NULL;
83c51da9	27	useOriginBody = -1;
5c550f5f	28	inQuoted = inSlashed = false;
774c051c	29	}
	30
	31	bool ChunkedCodingParser::parse(MemBuf rawData, MemBuf parsedContent)
	32	{
	33	Must(rawData && parsedContent);
	34	theIn = rawData;
	35	theOut = parsedContent;
	36
	37	// we must reset this all the time so that mayContinue() lets us
	38	// output more content if we stopped due to needsMoreSpace() before
	39	doNeedMoreData = !theIn->hasContent();
	40
	41	while (mayContinue()) {
	42	(this->*theStep)();
	43	}
	44
	45	return theStep == psMessageEnd;
	46	}
	47
	48	bool ChunkedCodingParser::needsMoreData() const
	49	{
	50	return doNeedMoreData;
	51	}
	52
	53	bool ChunkedCodingParser::needsMoreSpace() const
	54	{
	55	assert(theOut);
	56	return theStep == psChunkBody && !theOut->hasPotentialSpace();
	57	}
	58
	59	bool ChunkedCodingParser::mayContinue() const
	60	{
	61	return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
	62	}
	63
5c550f5f	64	void ChunkedCodingParser::parseChunkSize()
774c051c	65	{
	66	Must(theChunkSize <= 0); // Should(), really
	67
5c550f5f AR	68	const char *p = theIn->content();
	69	while (p < theIn->space() && xisxdigit(*p)) ++p;
	70	if (p >= theIn->space()) {
	71	doNeedMoreData = true;
	72	return;
	73	}
83c51da9	74
5c550f5f AR	75	int64_t size = -1;
	76	if (StringToInt64(theIn->content(), size, &p, 16)) {
	77	if (size < 0)
	78	throw TexcHere("negative chunk size");
	79
	80	theChunkSize = theLeftBodySize = size;
	81	debugs(94,7, "found chunk: " << theChunkSize);
	82	// parse chunk extensions only in the last-chunk
	83	if (theChunkSize)
	84	theStep = psUnusedChunkExtension;
	85	else {
	86	theIn->consume(p - theIn->content());
	87	theStep = psLastChunkExtension;
774c051c	88	}
5c550f5f	89	} else
774c051c	90	throw TexcHere("corrupted chunk size");
5c550f5f	91	}
774c051c	92
5c550f5f AR	93	void ChunkedCodingParser::parseUnusedChunkExtension()
	94	{
	95	size_t crlfBeg = 0;
	96	size_t crlfEnd = 0;
	97	if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
	98	inQuoted = inSlashed = false;
	99	theIn->consume(crlfEnd);
	100	theStep = theChunkSize ? psChunkBody : psTrailer;
	101	} else {
	102	theIn->consume(theIn->contentSize());
	103	doNeedMoreData = true;
	104	}
774c051c	105	}
	106
	107	void ChunkedCodingParser::parseChunkBody()
	108	{
	109	Must(theLeftBodySize > 0); // Should, really
	110
d85c3078 AJ	111	const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
d85c3078 AJ	112	const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
774c051c	113
	114	doNeedMoreData = availSize < theLeftBodySize;
	115	// and we may also need more space
	116
	117	theOut->append(theIn->content(), safeSize);
	118	theIn->consume(safeSize);
	119	theLeftBodySize -= safeSize;
	120
	121	if (theLeftBodySize == 0)
	122	theStep = psChunkEnd;
	123	else
	124	Must(needsMoreData() \|\| needsMoreSpace());
	125	}
	126
	127	void ChunkedCodingParser::parseChunkEnd()
	128	{
	129	Must(theLeftBodySize == 0); // Should(), really
	130
	131	size_t crlfBeg = 0;
	132	size_t crlfEnd = 0;
	133
	134	if (findCrlf(crlfBeg, crlfEnd)) {
	135	if (crlfBeg != 0) {
5e956603	136	throw TexcHere("found data between chunk end and CRLF");
774c051c	137	return;
	138	}
	139
	140	theIn->consume(crlfEnd);
	141	theChunkSize = 0; // done with the current chunk
5c550f5f	142	theStep = psChunkSize;
774c051c	143	return;
	144	}
	145
	146	doNeedMoreData = true;
	147	}
	148
	149	void ChunkedCodingParser::parseTrailer()
	150	{
	151	Must(theChunkSize == 0); // Should(), really
	152
	153	while (mayContinue())
	154	parseTrailerHeader();
	155	}
	156
	157	void ChunkedCodingParser::parseTrailerHeader()
	158	{
	159	size_t crlfBeg = 0;
	160	size_t crlfEnd = 0;
	161
	162	if (findCrlf(crlfBeg, crlfEnd)) {
774c051c	163
d7e8bdf7 AR	164	#if TRAILERS_ARE_SUPPORTED
	165	if (crlfBeg > 0)
	166	theTrailer.append(theIn->content(), crlfEnd);
d8b258a9	167	#endif
774c051c	168
	169	theIn->consume(crlfEnd);
	170
	171	if (crlfBeg == 0)
	172	theStep = psMessageEnd;
	173
	174	return;
	175	}
	176
	177	doNeedMoreData = true;
	178	}
	179
	180	void ChunkedCodingParser::parseMessageEnd()
	181	{
	182	// termination step, should not be called
	183	Must(false); // Should(), really
	184	}
	185
5c550f5f	186	/// Finds next CRLF. Does not store parsing state.
774c051c	187	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
5c550f5f AR	188	{
	189	bool quoted = false;
	190	bool slashed = false;
	191	return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
	192	}
	193
	194	/// Finds next CRLF. Parsing state stored in quoted and slashed
	195	/// parameters. Incremental: can resume when more data is available.
	196	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
774c051c	197	{
	198	// XXX: This code was copied, with permission, from another software.
	199	// There is a similar and probably better code inside httpHeaderParse
	200	// but it seems difficult to isolate due to parsing-unrelated bloat.
	201	// Such isolation should probably be done before this class is used
	202	// for handling of traffic "more external" than ICAP.
	203
	204	const char *buf = theIn->content();
	205	size_t size = theIn->contentSize();
	206
	207	ssize_t crOff = -1;
774c051c	208
	209	for (size_t i = 0; i < size; ++i) {
	210	if (slashed) {
	211	slashed = false;
	212	continue;
	213	}
	214
	215	const char c = buf[i];
	216
	217	// handle quoted strings
	218	if (quoted) {
	219	if (c == '\\')
	220	slashed = true;
e1381638 AJ	221	else if (c == '"')
e1381638 AJ	222	quoted = false;
774c051c	223
774c051c	224	continue;
e1381638 AJ	225	} else if (c == '"') {
	226	quoted = true;
	227	crOff = -1;
	228	continue;
	229	}
774c051c	230
	231	if (crOff < 0) { // looking for the first CR or LF
	232
	233	if (c == '\n') {
	234	crlfBeg = i;
	235	crlfEnd = ++i;
	236	return true;
	237	}
	238
	239	if (c == '\r')
	240	crOff = i;
	241	} else { // skipping CRs, looking for the first LF
	242
	243	if (c == '\n') {
	244	crlfBeg = crOff;
	245	crlfEnd = ++i;
	246	return true;
	247	}
	248
	249	if (c != '\r')
	250	crOff = -1;
	251	}
	252	}
	253
	254	return false;
	255	}
	256
83c51da9	257	// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
5c550f5f	258	void ChunkedCodingParser::parseLastChunkExtension()
83c51da9	259	{
5c550f5f AR	260	size_t crlfBeg = 0;
	261	size_t crlfEnd = 0;
	262
	263	if (!findCrlf(crlfBeg, crlfEnd)) {
	264	doNeedMoreData = true;
	265	return;
	266	}
	267
	268	const char *const startExt = theIn->content();
	269	const char *const endExt = theIn->content() + crlfBeg;
	270
83c51da9 CT	271	// chunk-extension starts at startExt and ends with LF at endEx
	272	for (const char *p = startExt; p < endExt;) {
	273
	274	while (p == ' ' \|\| p == '\t') ++p; // skip spaces before ';'
7ddcfbab	275
83c51da9	276	if (*p++ != ';') // each ext name=value pair is preceded with ';'
5c550f5f	277	break;
7ddcfbab	278
83c51da9	279	while (p == ' ' \|\| p == '\t') ++p; // skip spaces before name
7ddcfbab	280
83c51da9	281	if (p >= endExt)
5c550f5f	282	break; // malformed extension: ';' without ext name=value pair
83c51da9 CT	283
	284	const int extSize = endExt - p;
	285	// TODO: we need debugData() stream manipulator to dump data
	286	debugs(94,7, "Found chunk extension; size=" << extSize);
	287
	288	// TODO: support implied *LWS around '='
	289	if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
	290	(void)StringToInt64(p+18, useOriginBody, &p, 10);
	291	debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
5c550f5f	292	break; // remove to support more than just use-original-body
83c51da9 CT	293	} else {
	294	debugs(94, 5, HERE << "skipping unknown chunk extension");
	295	// TODO: support quoted-string chunk-ext-val
	296	while (p < endExt && *p != ';') ++p; // skip until the next ';'
	297	}
	298	}
5c550f5f AR	299
	300	theIn->consume(crlfEnd);
	301	theStep = theChunkSize ? psChunkBody : psTrailer;
83c51da9	302	}