From: Amos Jeffries <squid3@treenet.co.nz>
Date: Sat, 9 May 2009 07:03:02 +0000 (+1200)
Subject: Author: Alex Rousskov <rousskov@measurement-factory.com>
X-Git-Tag: SQUID_3_0_STABLE16_RC1~4
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=37b098278b504815806dccc3f2262b354c6ee461;p=thirdparty%2Fsquid.git

Author: Alex Rousskov <rousskov@measurement-factory.com>
Added temporary hack to support chunked HTTP/1.1 requests.

A broken or confused HTTP/1.1 client may send a chunked HTTP request to Squid.
Squid does not have full support for that feature yet. To cope with such
requests, Squid now buffers the entire request and then dechunks request body
to create a plain HTTP/1.0 request with a known content length. The plain
request is then used by the rest of Squid code as usual.

The newly added chunked_request_body_max_size option specifies the maximum
size of the buffer used to hold the chunked request before the conversion. If
the chunked request size exceeds the specified limit, the conversion fails,
and the client receives an "unsupported request" error, as if dechunking was
disabled.

Dechunking is enabled by default. To disable conversion of chunked requests,
set chunked_request_body_max_size to zero.

Squid cannot buffer chunked requests exceeding MEM_BUF_MAX_SIZE, which is
currently 2*1000*1024*1024 bytes. The default chunked_request_body_max_size
value is 64KB.

Request dechunking feature is a temporary hack. When chunking requests and
responses are fully supported, there will be no need to buffer a chunked
request. Client_side code contains a FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS
section that can be used as a starting point for proper support. That code was
working in simple tests.

This dechunking code requires ICAP support in Squid v3.0 because of the
ICAP/ChunkedCodingParser dependency.
---

diff --git a/src/HttpMsg.cc b/src/HttpMsg.cc
index 964832729d..d9712ccb43 100644
--- a/src/HttpMsg.cc
+++ b/src/HttpMsg.cc
@@ -309,6 +309,14 @@ HttpMsg::httpMsgParseError()
     return -1;
 }
 
+void
+HttpMsg::setContentLength(int64_t clen)
+{
+    header.delById(HDR_CONTENT_LENGTH); // if any
+    header.putInt64(HDR_CONTENT_LENGTH, clen);
+    content_length = clen;
+}
+
 /* returns true if connection should be "persistent"
  * after processing this message */
 int
diff --git a/src/HttpMsg.h b/src/HttpMsg.h
index 719c00a6b7..007316b298 100644
--- a/src/HttpMsg.h
+++ b/src/HttpMsg.h
@@ -55,6 +55,9 @@ public:
     virtual HttpMsg *_lock();	// please use HTTPMSGLOCK()
     virtual void _unlock();	// please use HTTPMSGUNLOCK()
 
+    /// [re]sets Content-Length header and cached value
+    void setContentLength(int64_t clen);
+
 public:
     HttpVersion http_ver;
 
@@ -104,7 +107,8 @@ protected:
 };
 
 /* Temporary parsing state; might turn into the replacement parser later on */
-struct _HttpParser {
+class HttpParser {
+public:
 	char state;
 	const char *buf;
 	int bufsiz;
@@ -115,7 +119,6 @@ struct _HttpParser {
 	int v_start, v_end;
 	int v_maj, v_min;
 };
-typedef struct _HttpParser HttpParser;
 
 extern void HttpParserInit(HttpParser *, const char *buf, int len);
 extern int HttpParserParseReqLine(HttpParser *hp);
diff --git a/src/cf.data.pre b/src/cf.data.pre
index 07ea2ca53a..eed64c5e73 100644
--- a/src/cf.data.pre
+++ b/src/cf.data.pre
@@ -2826,6 +2826,35 @@ DOC_START
 	be no limit imposed.
 DOC_END
 
+NAME: chunked_request_body_max_size
+COMMENT: (bytes)
+TYPE: b_int64_t
+DEFAULT: 64 KB
+LOC: Config.maxChunkedRequestBodySize
+DOC_START
+	A broken or confused HTTP/1.1 client may send a chunked HTTP
+	request to Squid. Squid does not have full support for that
+	feature yet. To cope with such requests, Squid buffers the
+	entire request and then dechunks request body to create a
+	plain HTTP/1.0 request with a known content length. The plain
+	request is then used by the rest of Squid code as usual.
+
+	The option value specifies the maximum size of the buffer used
+	to hold the request before the conversion. If the chunked
+	request size exceeds the specified limit, the conversion
+	fails, and the client receives an "unsupported request" error,
+	as if dechunking was disabled.
+
+	Dechunking is enabled by default. To disable conversion of
+	chunked requests, set the maximum to zero.
+
+	Request dechunking feature and this option in particular are a
+	temporary hack. When chunking requests and responses are fully
+	supported, there will be no need to buffer a chunked request.
+	Dechunking requires ICAP support in Squid v3.0 but not in later
+	versions (see --enable-icap-client configure option).
+DOC_END
+
 NAME: broken_posts
 TYPE: acl_access
 DEFAULT: none
diff --git a/src/client_side.cc b/src/client_side.cc
index 5290420bd8..6fb14abf52 100755
--- a/src/client_side.cc
+++ b/src/client_side.cc
@@ -76,6 +76,10 @@
 #include "MemBuf.h"
 #include "SquidTime.h"
 
+#if ICAP_CLIENT   
+#include "ICAP/ChunkedCodingParser.h"
+#endif
+
 #if LINGERING_CLOSE
 #define comm_close comm_lingering_close
 #endif
@@ -1820,6 +1824,17 @@ prepareTransparentURL(ConnStateData::Pointer & conn, ClientHttpRequest *http, ch
     }
 }
 
+// Temporary hack helper: determine whether the request is chunked, expensive
+static bool
+isChunkedRequest(const HttpParser *hp) {
+    HttpRequest request;
+    if (!request.parseHeader(HttpParserHdrBuf(hp), HttpParserHdrSz(hp)))
+        return false;
+
+    return request.header.has(HDR_TRANSFER_ENCODING) &&
+        request.header.hasListMember(HDR_TRANSFER_ENCODING, "chunked", ',');
+}
+
 /*
  *  parseHttpRequest()
  * 
@@ -1832,7 +1847,6 @@ prepareTransparentURL(ConnStateData::Pointer & conn, ClientHttpRequest *http, ch
 static ClientSocketContext *
 parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * method_p, HttpVersion *http_ver)
 {
-    char *url = NULL;
     char *req_hdr = NULL;
     char *end;
     size_t req_sz;
@@ -1908,17 +1922,6 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho
         return parseHttpRequestAbort(conn, "error:unsupported-request-method");
     }
 
-    /* set url */
-    /*
-     * XXX this should eventually not use a malloc'ed buffer; the transformation code
-     * below needs to be modified to not expect a mutable nul-terminated string.
-     */
-    url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
-
-    memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
-
-    url[hp->u_end - hp->u_start + 1] = '\0';
-
     /*
      * Process headers after request line
      * TODO: Use httpRequestParse here.
@@ -1938,7 +1941,6 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho
      */
     if ( squid_strnstr(req_hdr, "\r\r\n", req_sz) ) {
         debugs(33, 1, "WARNING: suspicious HTTP request contains double CR");
-        xfree(url);
         return parseHttpRequestAbort(conn, "error:double-CR");
     }
 
@@ -1946,6 +1948,41 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho
            (int) HttpParserRequestLen(hp) << ", req_line_sz = " <<
            HttpParserReqSz(hp));
 
+    // Temporary hack: We might receive a chunked body from a broken HTTP/1.1
+    // client that sends chunked requests to HTTP/1.0 Squid. If the request
+    // might have a chunked body, parse the headers early to look for the
+    // "Transfer-Encoding: chunked" header. If we find it, wait until the
+    // entire body is available so that we can set the content length and
+    // forward the request without chunks. The primary reason for this is
+    // to avoid forwarding a chunked request because the server side lacks
+    // logic to determine when it is valid to do so. The secondary reason
+    // is that we should not send chunked requests if we cannot handle 
+    // chunked responses and Squid v3.0 cannot.
+    // FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS below will replace this hack.
+    if (hp->v_min == 1 && hp->v_maj == 1 && // broken client, may send chunks
+#if ICAP_CLIENT   
+        Config.maxChunkedRequestBodySize > 0 && // configured to dechunk
+#else
+        false && // ICAP required for v3.0 because of ICAP/ChunkedCodingParser
+#endif
+        (*method_p == METHOD_PUT || *method_p == METHOD_POST)) {
+
+        // check only once per request because isChunkedRequest is expensive
+        if (conn->in.dechunkingState == ConnStateData::chunkUnknown) {
+            if (isChunkedRequest(hp))
+                conn->startDechunkingRequest(hp);
+            else
+                conn->in.dechunkingState = ConnStateData::chunkNone;
+        }
+
+        if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+            if (conn->parseRequestChunks(hp)) // parses newly read chunks
+                return NULL; // wait for more data
+            debugs(33, 5, HERE << "Got complete chunked request or err.");
+            assert(conn->in.dechunkingState != ConnStateData::chunkParsing);
+        }
+    }
+
     /* Ok, all headers are received */
     http = new ClientHttpRequest(conn);
 
@@ -1962,6 +1999,17 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho
 
     debugs(33, 5, "parseHttpRequest: Request Header is\n" <<(hp->buf) + hp->hdr_start);
 
+    /* set url */
+    /*
+     * XXX this should eventually not use a malloc'ed buffer; the transformation code
+     * below needs to be modified to not expect a mutable nul-terminated string.
+     */
+    char *url = (char *)xmalloc(hp->u_end - hp->u_start + 16);
+
+    memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1);
+
+    url[hp->u_end - hp->u_start + 1] = '\0';
+
 #if THIS_VIOLATES_HTTP_SPECS_ON_URL_TRANSFORMATION
 
     if ((t = strchr(url, '#')))	/* remove HTML anchors */
@@ -2099,6 +2147,11 @@ connNoteUseOfBuffer(ConnStateData* conn, size_t byteCount)
 int
 connKeepReadingIncompleteRequest(ConnStateData::Pointer & conn)
 {
+    // when we read chunked requests, the entire body is buffered
+    // XXX: this check ignores header size and its limits.
+    if (conn->in.dechunkingState == ConnStateData::chunkParsing)
+        return conn->in.notYetUsed < Config.maxChunkedRequestBodySize;
+
     return conn->in.notYetUsed >= Config.maxRequestHeaderSize ? 0 : 1;
 }
 
@@ -2108,8 +2161,13 @@ connCancelIncompleteRequests(ConnStateData::Pointer & conn)
     ClientSocketContext *context = parseHttpRequestAbort(conn, "error:request-too-large");
     clientStreamNode *node = context->getClientReplyContext();
     assert(!connKeepReadingIncompleteRequest(conn));
-    debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
-    debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+    if (conn->in.dechunkingState == ConnStateData::chunkParsing) {
+        debugs(33, 1, "Chunked request is too large (" << conn->in.notYetUsed << " bytes)");
+        debugs(33, 1, "Config 'chunked_request_body_max_size'= " << Config.maxChunkedRequestBodySize << " bytes.");
+    } else {
+        debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)");
+        debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes.");
+    }
     clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
     assert (repContext);
     repContext->setReplyToError(ERR_TOO_BIG,
@@ -2155,6 +2213,9 @@ clientProcessRequest(ConnStateData::Pointer &conn, HttpParser *hp, ClientSocketC
     ClientHttpRequest *http = context->http;
     HttpRequest *request = NULL;
     bool notedUseOfBuffer = false;
+    bool tePresent = false;
+    bool deChunked = false;
+    bool unsupportedTe = false;
 
     /* We have an initial client stream in place should it be needed */
     /* setup our private context */
@@ -2238,8 +2299,17 @@ clientProcessRequest(ConnStateData::Pointer &conn, HttpParser *hp, ClientSocketC
     request->my_port = ntohs(conn->me.sin_port);
     request->http_ver = http_ver;
 
-    if (!urlCheckRequest(request) ||
-            request->header.has(HDR_TRANSFER_ENCODING)) {
+    tePresent = request->header.has(HDR_TRANSFER_ENCODING);
+    deChunked = conn->in.dechunkingState == ConnStateData::chunkReady;
+    if (deChunked) {
+        assert(tePresent);
+        request->setContentLength(conn->in.dechunked.contentSize());
+        request->header.delById(HDR_TRANSFER_ENCODING);
+        conn->finishDechunkingRequest(hp);
+    }
+
+    unsupportedTe = tePresent && !deChunked;
+    if (!urlCheckRequest(request) || unsupportedTe) {
         clientStreamNode *node = context->getClientReplyContext();
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
@@ -2569,13 +2639,74 @@ ConnStateData::handleRequestBodyData()
 {
     assert(bodyPipe != NULL);
 
-    if (const size_t putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed))
-        connNoteUseOfBuffer(this, putSize);
+    size_t putSize = 0;    
+
+#if FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS
+   // The code below works, in principle, but we cannot do dechunking 
+   // on-the-fly because that would mean sending chunked requests to
+   // the next hop. Squid lacks logic to determine which servers can
+   // receive chunk requests. Squid v3.0 code cannot even handle chunked
+   // responses which we may encourage by sending chunked requests.
+   // The error generation code probably needs more work.
+    if (in.bodyParser) { // chunked body
+        debugs(33,5, HERE << "handling chunked request body for FD " << fd);
+        bool malformedChunks = false;
+
+        MemBuf raw; // ChunkedCodingParser only works with MemBufs
+        raw.init(in.notYetUsed, in.notYetUsed);
+        raw.append(in.buf, in.notYetUsed);
+        try { // the parser will throw on errors
+            const mb_size_t wasContentSize = raw.contentSize();
+            BodyPipeCheckout bpc(*bodyPipe);
+            const bool parsed = in.bodyParser->parse(&raw, &bpc.buf);
+            bpc.checkIn();
+            putSize = wasContentSize - raw.contentSize();
+
+            if (parsed) {
+                stopProducingFor(bodyPipe, true); // this makes bodySize known
+            } else {
+                // parser needy state must imply body pipe needy state
+                if (in.bodyParser->needsMoreData() &&
+                    !bodyPipe->mayNeedMoreData())
+                    malformedChunks = true;
+                // XXX: if bodyParser->needsMoreSpace, how can we guarantee it?
+            }
+        } catch (...) { // XXX: be more specific
+            malformedChunks = true;
+        }
 
-    if (!bodyPipe->mayNeedMoreData()) {
-        // BodyPipe will clear us automagically when we produced everything
-        bodyPipe = NULL;
+        if (malformedChunks) {
+            if (bodyPipe != NULL)
+                stopProducingFor(bodyPipe, false);
+
+            ClientSocketContext::Pointer context = getCurrentContext();
+            if (!context->http->out.offset) {
+                clientStreamNode *node = context->getClientReplyContext();
+                clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
+                assert (repContext);
+                repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST,
+                    METHOD_NONE, NULL, &peer.sin_addr,
+                    NULL, NULL, NULL);
+                context->pullData();
+            }
+            flags.readMoreRequests = false;
+            return; // XXX: is that sufficient to generate an error?
+        }
+    } else // identity encoding 
+#endif
+    {
+        debugs(33,5, HERE << "handling plain request body for FD " << fd);
+        putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed);
+        if (!bodyPipe->mayNeedMoreData()) {
+            // BodyPipe will clear us automagically when we produced everything
+            bodyPipe = NULL;
+        }
+    }
+
+    if (putSize > 0)
+        connNoteUseOfBuffer(this, putSize);
 
+    if (!bodyPipe) {
         debugs(33,5, HERE << "produced entire request body for FD " << fd);
 
         if (closing()) {
@@ -3281,17 +3412,129 @@ ConnStateData::startClosing(const char *reason)
     bodyPipe->enableAutoConsumption();
 }
 
+// initialize dechunking state
+void
+ConnStateData::startDechunkingRequest(HttpParser *hp)
+{
+    debugs(33, 5, HERE << "start dechunking at " << HttpParserRequestLen(hp));
+    assert(in.dechunkingState == chunkUnknown);
+    assert(!in.bodyParser);
+#if ICAP_CLIENT
+    in.bodyParser = new ChunkedCodingParser;
+#endif
+    in.chunkedSeen = HttpParserRequestLen(hp); // skip headers when dechunking
+    in.chunked.init();  // TODO: should we have a smaller-than-default limit?
+    in.dechunked.init();
+    in.dechunkingState = chunkParsing;
+}
+
+// put parsed content into input buffer and clean up
+void
+ConnStateData::finishDechunkingRequest(HttpParser *hp)
+{
+    debugs(33, 5, HERE << "finish dechunking; content: " << in.dechunked.contentSize());
+
+    assert(in.dechunkingState == chunkReady);
+    assert(in.bodyParser); 
+#if ICAP_CLIENT
+    delete in.bodyParser;
+#endif
+    in.bodyParser = NULL;
+
+    const mb_size_t headerSize = HttpParserRequestLen(hp);
+
+    // dechunking cannot make data bigger
+    assert(headerSize + in.dechunked.contentSize() + in.chunked.contentSize()
+        <= static_cast<mb_size_t>(in.notYetUsed));
+    assert(in.notYetUsed <= in.allocatedSize);
+
+    // copy dechunked content
+    char *end = in.buf + headerSize;
+    xmemmove(end, in.dechunked.content(), in.dechunked.contentSize());
+    end += in.dechunked.contentSize();
+
+    // copy post-chunks leftovers, if any, caused by request pipelining?
+    if (in.chunked.contentSize()) {
+        xmemmove(end, in.chunked.content(), in.chunked.contentSize());
+        end += in.chunked.contentSize();
+    }
+
+    in.notYetUsed = end - in.buf;
+
+    in.chunked.clean();
+    in.dechunked.clean();
+    in.dechunkingState = chunkUnknown;
+}
+
+// parse newly read request chunks and buffer them for finishDechunkingRequest
+// returns true iff needs more data
+bool
+ConnStateData::parseRequestChunks(HttpParser *)
+{
+#if ICAP_CLIENT   
+    debugs(33,5, HERE << "parsing chunked request body at " <<
+        in.chunkedSeen << " < " << in.notYetUsed);
+    assert(in.bodyParser);
+    assert(in.dechunkingState == chunkParsing);
+
+    assert(in.chunkedSeen <= in.notYetUsed);
+    const mb_size_t fresh = in.notYetUsed - in.chunkedSeen;
+
+    // be safe: count some chunked coding metadata towards the total body size
+    if (fresh + in.dechunked.contentSize() > Config.maxChunkedRequestBodySize) {
+        debugs(33,3, HERE << "chunked body (" << fresh << " + " <<
+            in.dechunked.contentSize() << " may exceed " <<
+            "chunked_request_body_max_size=" <<
+            Config.maxChunkedRequestBodySize);
+        in.dechunkingState = chunkError;
+        return false;
+    }
+        
+    if (fresh > in.chunked.potentialSpaceSize()) {
+        // should not happen if Config.maxChunkedRequestBodySize is reasonable
+        debugs(33,1, HERE << "request_body_max_size exceeds chunked buffer " <<
+            "size: " << fresh << " + " << in.chunked.contentSize() << " > " <<
+            in.chunked.potentialSpaceSize() << " with " <<
+            "chunked_request_body_max_size=" <<
+            Config.maxChunkedRequestBodySize);
+        in.dechunkingState = chunkError;
+        return false;
+    }
+    in.chunked.append(in.buf + in.chunkedSeen, fresh);
+    in.chunkedSeen += fresh;
+
+    try { // the parser will throw on errors
+        if (in.bodyParser->parse(&in.chunked, &in.dechunked))
+            in.dechunkingState = chunkReady; // successfully parsed all chunks
+        else
+            return true; // need more, keep the same state
+    } catch (...) {
+        debugs(33,3, HERE << "chunk parsing error");
+        in.dechunkingState = chunkError;
+    }
+#endif
+    return false; // error, unsupported, or done
+}
+
 char *
 ConnStateData::In::addressToReadInto() const
 {
     return buf + notYetUsed;
 }
 
-ConnStateData::In::In() : buf (NULL), notYetUsed (0), allocatedSize (0)
+ConnStateData::In::In() : bodyParser(NULL),
+    buf (NULL), notYetUsed (0), allocatedSize (0),
+    dechunkingState(ConnStateData::chunkUnknown)
 {}
 
 ConnStateData::In::~In()
 {
     if (allocatedSize)
         memFreeBuf(allocatedSize, buf);
+    if (bodyParser)
+#if ICAP_CLIENT   
+        delete bodyParser; // TODO: pool
+#else
+        assert(false); // chunked requests are only supported if ICAP is
+#endif
 }
diff --git a/src/client_side.h b/src/client_side.h
index 0ad21f186f..a9f00589a4 100644
--- a/src/client_side.h
+++ b/src/client_side.h
@@ -46,6 +46,9 @@ class clientStreamNode;
 
 class AuthUserRequest;
 
+class ChunkedCodingParser;
+class HttpParser;
+
 template <class T>
 
 class Range;
@@ -149,14 +152,23 @@ public:
 
     int fd;
 
+    /// chunk buffering and parsing algorithm state
+    typedef enum { chunkUnknown, chunkNone, chunkParsing, chunkReady, chunkError } DechunkingState;
+
     struct In
     {
         In();
         ~In();
         char *addressToReadInto() const;
+
+        ChunkedCodingParser *bodyParser; ///< parses chunked request body
+        MemBuf chunked; ///< contains unparsed raw (chunked) body data
+        MemBuf dechunked; ///< accumulates parsed (dechunked) content
         char *buf;
         size_t notYetUsed;
         size_t allocatedSize;
+        size_t chunkedSeen; ///< size of processed or ignored raw read data
+        DechunkingState dechunkingState; ///< request dechunking state
     } in;
 
     int64_t bodySizeLeft();
@@ -208,6 +220,10 @@ public:
     void handleReadData(char *buf, size_t size);
     void handleRequestBodyData();
 
+    void startDechunkingRequest(HttpParser *hp);
+    bool parseRequestChunks(HttpParser *hp);
+    void finishDechunkingRequest(HttpParser *hp);
+
 private:
     CBDATA_CLASS2(ConnStateData);
     bool transparent_;
diff --git a/src/structs.h b/src/structs.h
index d3182ad3fa..5ade3dce5e 100644
--- a/src/structs.h
+++ b/src/structs.h
@@ -255,6 +255,7 @@ struct _SquidConfig
     Timeout;
     size_t maxRequestHeaderSize;
     int64_t maxRequestBodySize;
+    int64_t maxChunkedRequestBodySize;
     size_t maxReplyHeaderSize;
     acl_size_t *ReplyBodySize;