From 5c550f5f4660635437e2564187e9918d776e65b5 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Tue, 31 Aug 2010 17:50:57 -0600 Subject: [PATCH] Compliance: Ignore unused chunk-extensions to correctly handle large ones. Chunk parser did not advance until it got a complete chunk-extension. HttpStateData::maybeReadVirginBody() does not grow the buffer if there is no space available for the [chunked] body so the transaction with a large chunk-extension would stall. The default HttpStateData input buffer size is just 2KB so it does not take a "very large" extension to stall the transaction. Somewhat ironically, we are not even interested in the HTTP chunk-extension itself. After the change, Squid skips the chunk-extension data as soon as it gets it (except for the last-chunk, see below). Incrementally ignoring data requires handling quoted strings correctly, to avoid mis-detecting a quoted CRLF. Thus, we now preserve the quoted string parsing state in ChunkedCodingParser. Last-chunk chunk-extension is useful for ICAP. We parse it instead of ignoring. This parsing is done as before and may still lead to connection hanging, but a proper fix is outside this patch scope. Similarly, other stalling reasons are outside this patch scope. Co-Advisor test case: test_case/rfc2616/chunked-1p0-longValExt-16385-toClt --- src/ChunkedCodingParser.cc | 104 ++++++++++++++++++++++++------------- src/ChunkedCodingParser.h | 11 +++- 2 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/ChunkedCodingParser.cc b/src/ChunkedCodingParser.cc index 80001f6611..733526d5a9 100644 --- a/src/ChunkedCodingParser.cc +++ b/src/ChunkedCodingParser.cc @@ -4,7 +4,9 @@ #include "ChunkedCodingParser.h" #include "MemBuf.h" -ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg; +ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize; +ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension; +ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension; ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody; ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd; ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer; @@ -17,11 +19,12 @@ ChunkedCodingParser::ChunkedCodingParser() void ChunkedCodingParser::reset() { - theStep = psChunkBeg; + theStep = psChunkSize; theChunkSize = theLeftBodySize = 0; doNeedMoreData = false; theIn = theOut = NULL; useOriginBody = -1; + inQuoted = inSlashed = false; } bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent) @@ -57,39 +60,47 @@ bool ChunkedCodingParser::mayContinue() const return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd; } -void ChunkedCodingParser::parseChunkBeg() +void ChunkedCodingParser::parseChunkSize() { Must(theChunkSize <= 0); // Should(), really - size_t crlfBeg = 0; - size_t crlfEnd = 0; - - if (findCrlf(crlfBeg, crlfEnd)) { - debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd); - int64_t size = -1; - const char *p = 0; - - if (StringToInt64(theIn->content(), size, &p, 16)) { - if (size < 0) { - throw TexcHere("negative chunk size"); - return; - } - - // to allow chunk extensions in any chunk, remove this size check - if (size == 0) // is this the last-chunk? - parseChunkExtension(p, theIn->content() + crlfBeg); + const char *p = theIn->content(); + while (p < theIn->space() && xisxdigit(*p)) ++p; + if (p >= theIn->space()) { + doNeedMoreData = true; + return; + } - theIn->consume(crlfEnd); - theChunkSize = theLeftBodySize = size; - debugs(94,7, "found chunk: " << theChunkSize); - theStep = theChunkSize == 0 ? psTrailer : psChunkBody; - return; + int64_t size = -1; + if (StringToInt64(theIn->content(), size, &p, 16)) { + if (size < 0) + throw TexcHere("negative chunk size"); + + theChunkSize = theLeftBodySize = size; + debugs(94,7, "found chunk: " << theChunkSize); + // parse chunk extensions only in the last-chunk + if (theChunkSize) + theStep = psUnusedChunkExtension; + else { + theIn->consume(p - theIn->content()); + theStep = psLastChunkExtension; } - + } else throw TexcHere("corrupted chunk size"); - } +} - doNeedMoreData = true; +void ChunkedCodingParser::parseUnusedChunkExtension() +{ + size_t crlfBeg = 0; + size_t crlfEnd = 0; + if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) { + inQuoted = inSlashed = false; + theIn->consume(crlfEnd); + theStep = theChunkSize ? psChunkBody : psTrailer; + } else { + theIn->consume(theIn->contentSize()); + doNeedMoreData = true; + } } void ChunkedCodingParser::parseChunkBody() @@ -127,7 +138,7 @@ void ChunkedCodingParser::parseChunkEnd() theIn->consume(crlfEnd); theChunkSize = 0; // done with the current chunk - theStep = psChunkBeg; + theStep = psChunkSize; return; } @@ -169,8 +180,17 @@ void ChunkedCodingParser::parseMessageEnd() Must(false); // Should(), really } -// finds next CRLF +/// Finds next CRLF. Does not store parsing state. bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) +{ + bool quoted = false; + bool slashed = false; + return findCrlf(crlfBeg, crlfEnd, quoted, slashed); +} + +/// Finds next CRLF. Parsing state stored in quoted and slashed +/// parameters. Incremental: can resume when more data is available. +bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed) { // XXX: This code was copied, with permission, from another software. // There is a similar and probably better code inside httpHeaderParse @@ -182,8 +202,6 @@ bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) size_t size = theIn->contentSize(); ssize_t crOff = -1; - bool quoted = false; - bool slashed = false; for (size_t i = 0; i < size; ++i) { if (slashed) { @@ -234,20 +252,31 @@ bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) } // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) -void ChunkedCodingParser::parseChunkExtension(const char *startExt, const char *endExt) +void ChunkedCodingParser::parseLastChunkExtension() { + size_t crlfBeg = 0; + size_t crlfEnd = 0; + + if (!findCrlf(crlfBeg, crlfEnd)) { + doNeedMoreData = true; + return; + } + + const char *const startExt = theIn->content(); + const char *const endExt = theIn->content() + crlfBeg; + // chunk-extension starts at startExt and ends with LF at endEx for (const char *p = startExt; p < endExt;) { while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';' if (*p++ != ';') // each ext name=value pair is preceded with ';' - return; + break; while (*p == ' ' || *p == '\t') ++p; // skip spaces before name if (p >= endExt) - return; // malformed extension: ';' without ext name=value pair + break; // malformed extension: ';' without ext name=value pair const int extSize = endExt - p; // TODO: we need debugData() stream manipulator to dump data @@ -257,11 +286,14 @@ void ChunkedCodingParser::parseChunkExtension(const char *startExt, const char * if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) { (void)StringToInt64(p+18, useOriginBody, &p, 10); debugs(94, 3, HERE << "use-original-body=" << useOriginBody); - return; // remove to support more than just use-original-body + break; // remove to support more than just use-original-body } else { debugs(94, 5, HERE << "skipping unknown chunk extension"); // TODO: support quoted-string chunk-ext-val while (p < endExt && *p != ';') ++p; // skip until the next ';' } } + + theIn->consume(crlfEnd); + theStep = theChunkSize ? psChunkBody : psTrailer; } diff --git a/src/ChunkedCodingParser.h b/src/ChunkedCodingParser.h index 5b34c75998..3a3725bf1a 100644 --- a/src/ChunkedCodingParser.h +++ b/src/ChunkedCodingParser.h @@ -73,6 +73,9 @@ private: private: bool mayContinue() const; + void parseChunkSize(); + void parseUnusedChunkExtension(); + void parseLastChunkExtension(); void parseChunkBeg(); void parseChunkBody(); void parseChunkEnd(); @@ -80,11 +83,13 @@ private: void parseTrailerHeader(); void parseMessageEnd(); - void parseChunkExtension(const char *, const char * ); bool findCrlf(size_t &crlfBeg, size_t &crlfEnd); + bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed); private: - static Step psChunkBeg; + static Step psChunkSize; + static Step psUnusedChunkExtension; + static Step psLastChunkExtension; static Step psChunkBody; static Step psChunkEnd; static Step psTrailer; @@ -97,6 +102,8 @@ private: uint64_t theChunkSize; uint64_t theLeftBodySize; bool doNeedMoreData; + bool inQuoted; ///< stores parsing state for incremental findCrlf + bool inSlashed; ///< stores parsing state for incremental findCrlf public: int64_t useOriginBody; -- 2.47.2