[thirdparty/squid.git] / src / ChunkedCodingParser.cc

#include "squid.h"
#include "Parsing.h"
#include "TextException.h"
#include "ChunkedCodingParser.h"
#include "MemBuf.h"

ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;

ChunkedCodingParser::ChunkedCodingParser()
{
    reset();
}

void ChunkedCodingParser::reset()
{
    theStep = psChunkBeg;
    theChunkSize = theLeftBodySize = 0;
    doNeedMoreData = false;
    theIn = theOut = NULL;
}

bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
{
    Must(rawData && parsedContent);
    theIn = rawData;
    theOut = parsedContent;

    // we must reset this all the time so that mayContinue() lets us
    // output more content if we stopped due to needsMoreSpace() before
    doNeedMoreData = !theIn->hasContent();

    while (mayContinue()) {
        (this->*theStep)();
    }

    return theStep == psMessageEnd;
}

bool ChunkedCodingParser::needsMoreData() const
{
    return doNeedMoreData;
}

bool ChunkedCodingParser::needsMoreSpace() const
{
    assert(theOut);
    return theStep == psChunkBody && !theOut->hasPotentialSpace();
}

bool ChunkedCodingParser::mayContinue() const
{
    return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
}

void ChunkedCodingParser::parseChunkBeg()
{
    Must(theChunkSize <= 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
        int64_t size = -1;
        const char *p = 0;

        if (StringToInt64(theIn->content(), size, &p, 16)) {
            if (size < 0) {
                throw TexcHere("negative chunk size");
                return;
            }

            theIn->consume(crlfEnd);
            theChunkSize = theLeftBodySize = size;
            debugs(94,7, "found chunk: " << theChunkSize);
            theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
            return;
        }

        throw TexcHere("corrupted chunk size");
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseChunkBody()
{
    Must(theLeftBodySize > 0); // Should, really

    const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
    const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());

    doNeedMoreData = availSize < theLeftBodySize;
    // and we may also need more space

    theOut->append(theIn->content(), safeSize);
    theIn->consume(safeSize);
    theLeftBodySize -= safeSize;

    if (theLeftBodySize == 0)
        theStep = psChunkEnd;
    else
        Must(needsMoreData() || needsMoreSpace());
}

void ChunkedCodingParser::parseChunkEnd()
{
    Must(theLeftBodySize == 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg != 0) {
            throw TexcHere("found data bewteen chunk end and CRLF");
            return;
        }

        theIn->consume(crlfEnd);
        theChunkSize = 0; // done with the current chunk
        theStep = psChunkBeg;
        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseTrailer()
{
    Must(theChunkSize == 0); // Should(), really

    while (mayContinue())
        parseTrailerHeader();
}

void ChunkedCodingParser::parseTrailerHeader()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg > 0)

            ; //theTrailer.append(theIn->content(), crlfEnd);

        theIn->consume(crlfEnd);

        if (crlfBeg == 0)
            theStep = psMessageEnd;

        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseMessageEnd()
{
    // termination step, should not be called
    Must(false); // Should(), really
}

// finds next CRLF
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
{
    // XXX: This code was copied, with permission, from another software.
    // There is a similar and probably better code inside httpHeaderParse
    // but it seems difficult to isolate due to parsing-unrelated bloat.
    // Such isolation should probably be done before this class is used
    // for handling of traffic "more external" than ICAP.

    const char *buf = theIn->content();
    size_t size = theIn->contentSize();

    ssize_t crOff = -1;
    bool quoted = false;
    bool slashed = false;

    for (size_t i = 0; i < size; ++i) {
        if (slashed) {
            slashed = false;
            continue;
        }

        const char c = buf[i];

        // handle quoted strings
        if (quoted) {
            if (c == '\\')
                slashed = true;
            else if (c == '"')
                quoted = false;

            continue;
        } else if (c == '"') {
            quoted = true;
            crOff = -1;
            continue;
        }

        if (crOff < 0) { // looking for the first CR or LF

            if (c == '\n') {
                crlfBeg = i;
                crlfEnd = ++i;
                return true;
            }

            if (c == '\r')
                crOff = i;
        } else { // skipping CRs, looking for the first LF

            if (c == '\n') {
                crlfBeg = crOff;
                crlfEnd = ++i;
                return true;
            }

            if (c != '\r')
                crOff = -1;
        }
    }

    return false;
}
Commit	Line	Data
774c051c	1	#include "squid.h"
	2	#include "Parsing.h"
	3	#include "TextException.h"
	4	#include "ChunkedCodingParser.h"
	5	#include "MemBuf.h"
	6
	7	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
	8	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
	9	ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
	10	ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
	11	ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
	12
	13	ChunkedCodingParser::ChunkedCodingParser()
	14	{
	15	reset();
	16	}
	17
	18	void ChunkedCodingParser::reset()
	19	{
	20	theStep = psChunkBeg;
	21	theChunkSize = theLeftBodySize = 0;
	22	doNeedMoreData = false;
774c051c	23	theIn = theOut = NULL;
	24	}
	25
	26	bool ChunkedCodingParser::parse(MemBuf rawData, MemBuf parsedContent)
	27	{
	28	Must(rawData && parsedContent);
	29	theIn = rawData;
	30	theOut = parsedContent;
	31
	32	// we must reset this all the time so that mayContinue() lets us
	33	// output more content if we stopped due to needsMoreSpace() before
	34	doNeedMoreData = !theIn->hasContent();
	35
	36	while (mayContinue()) {
	37	(this->*theStep)();
	38	}
	39
	40	return theStep == psMessageEnd;
	41	}
	42
	43	bool ChunkedCodingParser::needsMoreData() const
	44	{
	45	return doNeedMoreData;
	46	}
	47
	48	bool ChunkedCodingParser::needsMoreSpace() const
	49	{
	50	assert(theOut);
	51	return theStep == psChunkBody && !theOut->hasPotentialSpace();
	52	}
	53
	54	bool ChunkedCodingParser::mayContinue() const
	55	{
	56	return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
	57	}
	58
	59	void ChunkedCodingParser::parseChunkBeg()
	60	{
	61	Must(theChunkSize <= 0); // Should(), really
	62
	63	size_t crlfBeg = 0;
	64	size_t crlfEnd = 0;
	65
	66	if (findCrlf(crlfBeg, crlfEnd)) {
81c07a23	67	debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
47f6e231	68	int64_t size = -1;
774c051c	69	const char *p = 0;
774c051c	70
47f6e231	71	if (StringToInt64(theIn->content(), size, &p, 16)) {
774c051c	72	if (size < 0) {
	73	throw TexcHere("negative chunk size");
	74	return;
	75	}
	76
774c051c	77	theIn->consume(crlfEnd);
774c051c	78	theChunkSize = theLeftBodySize = size;
81c07a23	79	debugs(94,7, "found chunk: " << theChunkSize);
774c051c	80	theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
	81	return;
	82	}
	83
	84	throw TexcHere("corrupted chunk size");
	85	}
	86
	87	doNeedMoreData = true;
	88	}
	89
	90	void ChunkedCodingParser::parseChunkBody()
	91	{
	92	Must(theLeftBodySize > 0); // Should, really
	93
d85c3078 AJ	94	const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
d85c3078 AJ	95	const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
774c051c	96
	97	doNeedMoreData = availSize < theLeftBodySize;
	98	// and we may also need more space
	99
	100	theOut->append(theIn->content(), safeSize);
	101	theIn->consume(safeSize);
	102	theLeftBodySize -= safeSize;
	103
	104	if (theLeftBodySize == 0)
	105	theStep = psChunkEnd;
	106	else
	107	Must(needsMoreData() \|\| needsMoreSpace());
	108	}
	109
	110	void ChunkedCodingParser::parseChunkEnd()
	111	{
	112	Must(theLeftBodySize == 0); // Should(), really
	113
	114	size_t crlfBeg = 0;
	115	size_t crlfEnd = 0;
	116
	117	if (findCrlf(crlfBeg, crlfEnd)) {
	118	if (crlfBeg != 0) {
	119	throw TexcHere("found data bewteen chunk end and CRLF");
	120	return;
	121	}
	122
	123	theIn->consume(crlfEnd);
	124	theChunkSize = 0; // done with the current chunk
	125	theStep = psChunkBeg;
	126	return;
	127	}
	128
	129	doNeedMoreData = true;
	130	}
	131
	132	void ChunkedCodingParser::parseTrailer()
	133	{
	134	Must(theChunkSize == 0); // Should(), really
	135
	136	while (mayContinue())
	137	parseTrailerHeader();
	138	}
	139
	140	void ChunkedCodingParser::parseTrailerHeader()
	141	{
	142	size_t crlfBeg = 0;
	143	size_t crlfEnd = 0;
	144
	145	if (findCrlf(crlfBeg, crlfEnd)) {
	146	if (crlfBeg > 0)
	147
	148	; //theTrailer.append(theIn->content(), crlfEnd);
	149
	150	theIn->consume(crlfEnd);
	151
	152	if (crlfBeg == 0)
	153	theStep = psMessageEnd;
	154
	155	return;
	156	}
	157
	158	doNeedMoreData = true;
	159	}
160
161	void ChunkedCodingParser::parseMessageEnd()
162	{
163	// termination step, should not be called
164	Must(false); // Should(), really
165	}
166
167	// finds next CRLF
168	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
169	{
170	// XXX: This code was copied, with permission, from another software.
171	// There is a similar and probably better code inside httpHeaderParse
172	// but it seems difficult to isolate due to parsing-unrelated bloat.
173	// Such isolation should probably be done before this class is used
174	// for handling of traffic "more external" than ICAP.
175
176	const char *buf = theIn->content();
177	size_t size = theIn->contentSize();
178
179	ssize_t crOff = -1;
180	bool quoted = false;
181	bool slashed = false;
182
183	for (size_t i = 0; i < size; ++i) {
184	if (slashed) {
185	slashed = false;
186	continue;
187	}
188
189	const char c = buf[i];
190
191	// handle quoted strings
192	if (quoted) {
193	if (c == '\\')
194	slashed = true;
e1381638 AJ	195	else if (c == '"')
e1381638 AJ	196	quoted = false;
774c051c	197
774c051c	198	continue;
e1381638 AJ	199	} else if (c == '"') {
	200	quoted = true;
	201	crOff = -1;
	202	continue;
	203	}
774c051c	204
	205	if (crOff < 0) { // looking for the first CR or LF
	206
	207	if (c == '\n') {
	208	crlfBeg = i;
	209	crlfEnd = ++i;
	210	return true;
	211	}
	212
	213	if (c == '\r')
	214	crOff = i;
	215	} else { // skipping CRs, looking for the first LF
	216
	217	if (c == '\n') {
	218	crlfBeg = crOff;
	219	crlfEnd = ++i;
	220	return true;
	221	}
	222
	223	if (c != '\r')
	224	crOff = -1;
	225	}
	226	}
	227
	228	return false;
	229	}
	230