From: Alex Rousskov Date: Thu, 2 Jul 2009 16:36:36 +0000 (-0600) Subject: Truncate too-long HTTP response bodies to match their Content-Length header. X-Git-Tag: SQUID_3_2_0_1~913 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=821beb5e424258ebdc604c75e5954088cd631c9c;p=thirdparty%2Fsquid.git Truncate too-long HTTP response bodies to match their Content-Length header. Sometimes a broken server sends more than Content-Length bytes in the response. For example, a 302 redirect message with "Content-Length: 0" header may include an HTML body. Squid used to send "everything" it read to the client, even if it read more than the Content-Length bytes. That may have helped in some cases, but we should be more conservative when dealing with broken servers to combat message smuggling attacks and other bad side-effects for clients. We now do not forward more than the advertised content length and declare the connection with a broken server non-persistent. Chunked responses (that HTTP/1.0 Squid should not receive and that must not have a Content-Length header) are not truncated because RFC 2616 says we MUST ignore their Content-Length header. TODO: Do not cache the truncated entry and purge the cached version, if any. --- diff --git a/src/MemBuf.cc b/src/MemBuf.cc index 32d5974f72..4bce79907e 100644 --- a/src/MemBuf.cc +++ b/src/MemBuf.cc @@ -226,6 +226,15 @@ void MemBuf::consume(mb_size_t shiftSize) PROF_stop(MemBuf_consume); } +// removes last tailSize bytes +void MemBuf::truncate(mb_size_t tailSize) +{ + const mb_size_t cSize = contentSize(); + assert(0 <= tailSize && tailSize <= cSize); + assert(!stolen); /* not frozen */ + size -= tailSize; +} + /** * calls memcpy, appends exactly size bytes, * extends buffer or creates buffer if needed. diff --git a/src/MemBuf.h b/src/MemBuf.h index c881c2679d..a480520257 100644 --- a/src/MemBuf.h +++ b/src/MemBuf.h @@ -85,6 +85,7 @@ public: void consume(mb_size_t sz); // removes sz bytes, moving content left void append(const char *c, mb_size_t sz); // grows if needed and possible void appended(mb_size_t sz); // updates content size after external append + void truncate(mb_size_t sz); // removes sz last bytes void terminate(); // zero-terminates the buffer w/o increasing contentSize diff --git a/src/http.cc b/src/http.cc index 3e66c1f19c..fa2ecd29e8 100644 --- a/src/http.cc +++ b/src/http.cc @@ -76,7 +76,8 @@ static void copyOneHeaderFromClientsideRequestToUpstreamRequest(const HttpHeader HttpHeader * hdr_out, const int we_do_ranges, const http_state_flags); HttpStateData::HttpStateData(FwdState *theFwdState) : AsyncJob("HttpStateData"), ServerStateData(theFwdState), - lastChunk(0), header_bytes_read(0), reply_bytes_read(0), httpChunkDecoder(NULL) + lastChunk(0), header_bytes_read(0), reply_bytes_read(0), + body_bytes_truncated(0), httpChunkDecoder(NULL) { debugs(11,5,HERE << "HttpStateData " << this << " created"); ignoreCacheControl = false; @@ -976,6 +977,9 @@ HttpStateData::persistentConnStatus() const if (body_bytes_read < vrep->content_length) return INCOMPLETE_MSG; + + if (body_bytes_truncated > 0) // already read more than needed + return COMPLETE_NONPERSISTENT_MSG; // disable pconns } /** \par @@ -1160,6 +1164,33 @@ HttpStateData::continueAfterParsingHeader() return false; // quit on error } +/** truncate what we read if we read too much so that writeReplyBody() + writes no more than what we should have read */ +void +HttpStateData::truncateVirginBody() +{ + assert(flags.headers_parsed); + + HttpReply *vrep = virginReply(); + int64_t clen = -1; + if (!vrep->expectingBody(request->method, clen) || clen < 0) + return; // no body or a body of unknown size, including chunked + + const int64_t body_bytes_read = reply_bytes_read - header_bytes_read; + if (body_bytes_read - body_bytes_truncated <= clen) + return; // we did not read too much or already took care of the extras + + if (const int64_t extras = body_bytes_read - body_bytes_truncated - clen) { + // server sent more that the advertised content length + debugs(11,5, HERE << "body_bytes_read=" << body_bytes_read << + " clen=" << clen << '/' << vrep->content_length << + " body_bytes_truncated=" << body_bytes_truncated << '+' << extras); + + readBuf->truncate(extras); + body_bytes_truncated += extras; + } +} + /** * Call this when there is data from the origin server * which should be sent to either StoreEntry, or to ICAP... @@ -1167,6 +1198,7 @@ HttpStateData::continueAfterParsingHeader() void HttpStateData::writeReplyBody() { + truncateVirginBody(); // if needed const char *data = readBuf->content(); int len = readBuf->contentSize(); addVirginReplyBody(data, len); diff --git a/src/http.h b/src/http.h index ab6e3d76d0..41d9737e56 100644 --- a/src/http.h +++ b/src/http.h @@ -71,6 +71,7 @@ public: size_t read_sz; int header_bytes_read; // to find end of response, int reply_bytes_read; // without relying on StoreEntry + int body_bytes_truncated; // positive when we read more than we wanted MemBuf *readBuf; bool ignoreCacheControl; bool surrogateNoStore; @@ -93,6 +94,7 @@ private: void checkDateSkew(HttpReply *); bool continueAfterParsingHeader(); + void truncateVirginBody(); virtual void haveParsedReplyHeaders(); virtual void closeServer(); // end communication with the server