[thirdparty/squid.git] / src / ICAP / ChunkedCodingParser.cc

#include "squid.h"
#include "Parsing.h"
#include "TextException.h"
#include "ChunkedCodingParser.h"
#include "MemBuf.h"

ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;

ChunkedCodingParser::ChunkedCodingParser()
{
    reset();
}

void ChunkedCodingParser::reset()
{
    theStep = psChunkBeg;
    theChunkSize = theLeftBodySize = 0;
    doNeedMoreData = false;
    sawIeof = false;
    theIn = theOut = NULL;
}

bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
{
    Must(rawData && parsedContent);
    theIn = rawData;
    theOut = parsedContent;

    // we must reset this all the time so that mayContinue() lets us
    // output more content if we stopped due to needsMoreSpace() before
    doNeedMoreData = !theIn->hasContent();

    while (mayContinue()) {
        (this->*theStep)();
    }

    return theStep == psMessageEnd;
}

bool ChunkedCodingParser::needsMoreData() const
{
    return doNeedMoreData;
}

bool ChunkedCodingParser::needsMoreSpace() const
{
    assert(theOut);
    return theStep == psChunkBody && !theOut->hasPotentialSpace();
}

bool ChunkedCodingParser::mayContinue() const
{
    return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
}

void ChunkedCodingParser::parseChunkBeg()
{
    Must(theChunkSize <= 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        debugs(93,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
        int64_t size = -1;
        const char *p = 0;

        if (StringToInt64(theIn->content(), size, &p, 16)) {
            if (size < 0) {
                throw TexcHere("negative chunk size");
                return;
            }

            // check for ieof chunk extension in the last-chunk
            if (size == 0 && p && *p++ == ';') {
                const char *e = theIn->content() + crlfBeg; // end of extension

                while (p < e && xisspace(*p))
                    ++p; // skip space

                sawIeof = e - p >= 4 &&
                          strncmp(p, "ieof", 4) == 0 &&
                          xisspace(p[4]);
            }

            theIn->consume(crlfEnd);
            theChunkSize = theLeftBodySize = size;
            debugs(93,7, "found chunk: " << theChunkSize);
            theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
            return;
        }

        throw TexcHere("corrupted chunk size");
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseChunkBody()
{
    Must(theLeftBodySize > 0); // Should, really

    const size_t availSize = XMIN(theLeftBodySize, (uint64_t)theIn->contentSize());
    const size_t safeSize = XMIN(availSize, (size_t)theOut->potentialSpaceSize());

    doNeedMoreData = availSize < theLeftBodySize;
    // and we may also need more space

    theOut->append(theIn->content(), safeSize);
    theIn->consume(safeSize);
    theLeftBodySize -= safeSize;

    if (theLeftBodySize == 0)
        theStep = psChunkEnd;
    else
        Must(needsMoreData() || needsMoreSpace());
}

void ChunkedCodingParser::parseChunkEnd()
{
    Must(theLeftBodySize == 0); // Should(), really

    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg != 0) {
            throw TexcHere("found data bewteen chunk end and CRLF");
            return;
        }

        theIn->consume(crlfEnd);
        theChunkSize = 0; // done with the current chunk
        theStep = psChunkBeg;
        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseTrailer()
{
    Must(theChunkSize == 0); // Should(), really

    while (mayContinue())
        parseTrailerHeader();
}

void ChunkedCodingParser::parseTrailerHeader()
{
    size_t crlfBeg = 0;
    size_t crlfEnd = 0;

    if (findCrlf(crlfBeg, crlfEnd)) {
        if (crlfBeg > 0)

            ; //theTrailer.append(theIn->content(), crlfEnd);

        theIn->consume(crlfEnd);

        if (crlfBeg == 0)
            theStep = psMessageEnd;

        return;
    }

    doNeedMoreData = true;
}

void ChunkedCodingParser::parseMessageEnd()
{
    // termination step, should not be called
    Must(false); // Should(), really
}

// finds next CRLF
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
{
    // XXX: This code was copied, with permission, from another software.
    // There is a similar and probably better code inside httpHeaderParse
    // but it seems difficult to isolate due to parsing-unrelated bloat.
    // Such isolation should probably be done before this class is used
    // for handling of traffic "more external" than ICAP.

    const char *buf = theIn->content();
    size_t size = theIn->contentSize();

    ssize_t crOff = -1;
    bool quoted = false;
    bool slashed = false;

    for (size_t i = 0; i < size; ++i) {
        if (slashed) {
            slashed = false;
            continue;
        }

        const char c = buf[i];

        // handle quoted strings
        if (quoted) {
            if (c == '\\')
                slashed = true;
            else
                if (c == '"')
                    quoted = false;

            continue;
        } else
            if (c == '"') {
                quoted = true;
                crOff = -1;
                continue;
            }

        if (crOff < 0) { // looking for the first CR or LF

            if (c == '\n') {
                crlfBeg = i;
                crlfEnd = ++i;
                return true;
            }

            if (c == '\r')
                crOff = i;
        } else { // skipping CRs, looking for the first LF

            if (c == '\n') {
                crlfBeg = crOff;
                crlfEnd = ++i;
                return true;
            }

            if (c != '\r')
                crOff = -1;
        }
    }

    return false;
}
Commit	Line	Data
774c051c	1	#include "squid.h"
	2	#include "Parsing.h"
	3	#include "TextException.h"
	4	#include "ChunkedCodingParser.h"
	5	#include "MemBuf.h"
	6
	7	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
	8	ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
	9	ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
	10	ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
	11	ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
	12
	13	ChunkedCodingParser::ChunkedCodingParser()
	14	{
	15	reset();
	16	}
	17
	18	void ChunkedCodingParser::reset()
	19	{
	20	theStep = psChunkBeg;
	21	theChunkSize = theLeftBodySize = 0;
	22	doNeedMoreData = false;
	23	sawIeof = false;
	24	theIn = theOut = NULL;
	25	}
	26
	27	bool ChunkedCodingParser::parse(MemBuf rawData, MemBuf parsedContent)
	28	{
	29	Must(rawData && parsedContent);
	30	theIn = rawData;
	31	theOut = parsedContent;
	32
	33	// we must reset this all the time so that mayContinue() lets us
	34	// output more content if we stopped due to needsMoreSpace() before
	35	doNeedMoreData = !theIn->hasContent();
	36
	37	while (mayContinue()) {
	38	(this->*theStep)();
	39	}
	40
	41	return theStep == psMessageEnd;
	42	}
	43
	44	bool ChunkedCodingParser::needsMoreData() const
	45	{
	46	return doNeedMoreData;
	47	}
	48
	49	bool ChunkedCodingParser::needsMoreSpace() const
	50	{
	51	assert(theOut);
	52	return theStep == psChunkBody && !theOut->hasPotentialSpace();
	53	}
	54
	55	bool ChunkedCodingParser::mayContinue() const
	56	{
	57	return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
	58	}
	59
	60	void ChunkedCodingParser::parseChunkBeg()
	61	{
	62	Must(theChunkSize <= 0); // Should(), really
	63
	64	size_t crlfBeg = 0;
65	size_t crlfEnd = 0;
66
67	if (findCrlf(crlfBeg, crlfEnd)) {
c99de607	68	debugs(93,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
47f6e231	69	int64_t size = -1;
774c051c	70	const char *p = 0;
774c051c	71
47f6e231	72	if (StringToInt64(theIn->content(), size, &p, 16)) {
774c051c	73	if (size < 0) {
	74	throw TexcHere("negative chunk size");
	75	return;
	76	}
	77
	78	// check for ieof chunk extension in the last-chunk
	79	if (size == 0 && p && *p++ == ';') {
	80	const char *e = theIn->content() + crlfBeg; // end of extension
	81
e4755e29	82	while (p < e && xisspace(*p))
774c051c	83	++p; // skip space
	84
	85	sawIeof = e - p >= 4 &&
	86	strncmp(p, "ieof", 4) == 0 &&
e4755e29	87	xisspace(p[4]);
774c051c	88	}
	89
	90	theIn->consume(crlfEnd);
	91	theChunkSize = theLeftBodySize = size;
c99de607	92	debugs(93,7, "found chunk: " << theChunkSize);
774c051c	93	theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
	94	return;
	95	}
	96
	97	throw TexcHere("corrupted chunk size");
	98	}
	99
	100	doNeedMoreData = true;
	101	}
	102
	103	void ChunkedCodingParser::parseChunkBody()
	104	{
	105	Must(theLeftBodySize > 0); // Should, really
	106
47f6e231	107	const size_t availSize = XMIN(theLeftBodySize, (uint64_t)theIn->contentSize());
774c051c	108	const size_t safeSize = XMIN(availSize, (size_t)theOut->potentialSpaceSize());
	109
	110	doNeedMoreData = availSize < theLeftBodySize;
	111	// and we may also need more space
	112
	113	theOut->append(theIn->content(), safeSize);
	114	theIn->consume(safeSize);
	115	theLeftBodySize -= safeSize;
	116
	117	if (theLeftBodySize == 0)
	118	theStep = psChunkEnd;
	119	else
	120	Must(needsMoreData() \|\| needsMoreSpace());
	121	}
	122
	123	void ChunkedCodingParser::parseChunkEnd()
	124	{
	125	Must(theLeftBodySize == 0); // Should(), really
	126
	127	size_t crlfBeg = 0;
	128	size_t crlfEnd = 0;
	129
	130	if (findCrlf(crlfBeg, crlfEnd)) {
	131	if (crlfBeg != 0) {
	132	throw TexcHere("found data bewteen chunk end and CRLF");
	133	return;
	134	}
	135
	136	theIn->consume(crlfEnd);
	137	theChunkSize = 0; // done with the current chunk
	138	theStep = psChunkBeg;
	139	return;
	140	}
	141
	142	doNeedMoreData = true;
	143	}
	144
	145	void ChunkedCodingParser::parseTrailer()
	146	{
	147	Must(theChunkSize == 0); // Should(), really
	148
	149	while (mayContinue())
	150	parseTrailerHeader();
	151	}
	152
	153	void ChunkedCodingParser::parseTrailerHeader()
	154	{
	155	size_t crlfBeg = 0;
	156	size_t crlfEnd = 0;
	157
	158	if (findCrlf(crlfBeg, crlfEnd)) {
	159	if (crlfBeg > 0)
	160
	161	; //theTrailer.append(theIn->content(), crlfEnd);
	162
	163	theIn->consume(crlfEnd);
	164
	165	if (crlfBeg == 0)
	166	theStep = psMessageEnd;
	167
	168	return;
	169	}
	170
	171	doNeedMoreData = true;
172	}
173
174	void ChunkedCodingParser::parseMessageEnd()
175	{
176	// termination step, should not be called
177	Must(false); // Should(), really
178	}
179
180	// finds next CRLF
181	bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
182	{
183	// XXX: This code was copied, with permission, from another software.
184	// There is a similar and probably better code inside httpHeaderParse
185	// but it seems difficult to isolate due to parsing-unrelated bloat.
186	// Such isolation should probably be done before this class is used
187	// for handling of traffic "more external" than ICAP.
188
189	const char *buf = theIn->content();
190	size_t size = theIn->contentSize();
191
192	ssize_t crOff = -1;
193	bool quoted = false;
194	bool slashed = false;
195
196	for (size_t i = 0; i < size; ++i) {
197	if (slashed) {
198	slashed = false;
199	continue;
200	}
201
202	const char c = buf[i];
203
204	// handle quoted strings
205	if (quoted) {
206	if (c == '\\')
207	slashed = true;
208	else
209	if (c == '"')
210	quoted = false;
211
212	continue;
213	} else
214	if (c == '"') {
215	quoted = true;
216	crOff = -1;
217	continue;
218	}
219
220	if (crOff < 0) { // looking for the first CR or LF
221
222	if (c == '\n') {
223	crlfBeg = i;
224	crlfEnd = ++i;
225	return true;
226	}
227
228	if (c == '\r')
229	crOff = i;
230	} else { // skipping CRs, looking for the first LF
231
232	if (c == '\n') {
233	crlfBeg = crOff;
234	crlfEnd = ++i;
235	return true;
236	}
237
238	if (c != '\r')
239	crOff = -1;
240	}
241	}
242
243	return false;
244	}
245