From 6327f3495f67c4ba7a1642332cbd6e1c80a95ac3 Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Sat, 20 Oct 2012 04:07:11 -0600 Subject: [PATCH] HTTP/1.1: Cache-Control compliance upgrade trunk rev 11361 converted Cache-Control header from using a single mask bitmap (shared by request and response) to separate CC header objects in the request response. This conversion contained several regressions like the one bug 3670 reports. This patch: * documents HttpStateData::cacheableReply() clarifying the overall method action and what each individual check it doing. * resolves several visible regressions, including bug 3670. * extends the caching to handle the "no-cache" controls as per HTTP/1.1 (MAY store, but MUST revalidate before use). * extends the caching for several lesser known cases of "MAY store" exemptions handling authenticated transactions. * removes an old hack handling Pragme:no-cache (undefined in HTTP/1.1) One side effect of now caching transactions utilizing "no-cache" is that hacks around Pragma:no-cache are reduced to only having any effect when Cache-Control is absent. Reducing their performance cost. And now require --enable-http-violations is built in since HTTP/1.1 states that response Pragma are not defined and thus SHOULD be ignored. --- src/http.cc | 165 +++++++++++++++++++++++++++++++++++----------------- src/http.h | 2 + 2 files changed, 113 insertions(+), 54 deletions(-) diff --git a/src/http.cc b/src/http.cc index 3177fd7adc..de09782e08 100644 --- a/src/http.cc +++ b/src/http.cc @@ -340,67 +340,103 @@ HttpStateData::cacheableReply() #define REFRESH_OVERRIDE(flag) 0 #endif - if (surrogateNoStore) + // Check for Surrogate/1.0 protocol conditions + // NP: reverse-proxy traffic our parent server has instructed us never to cache + if (surrogateNoStore) { + debugs(22, 3, HERE << "NO because Surrogate-Control:no-store"); return 0; + } - // RFC 2616: do not cache replies to responses with no-store CC directive - if (request && request->cache_control && - request->cache_control->noStore() && - !REFRESH_OVERRIDE(ignore_no_store)) - return 0; + // RFC 2616: HTTP/1.1 Cache-Control conditions + if (!ignoreCacheControl) { + // XXX: check to see if the request headers alone were enough to prevent caching earlier + // (ie no-store request header) no need to check those all again here if so. + // for now we are not reliably doing that so we waste CPU re-checking request CC - if (!ignoreCacheControl && request->cache_control != NULL) { - const HttpHdrCc* cc=request->cache_control; - if (cc->Private()) { - if (!REFRESH_OVERRIDE(ignore_private)) - return 0; + // RFC 2616 section 14.9.2 - MUST NOT cache any response with request CC:no-store + if (request && request->cache_control && request->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because client request Cache-Control:no-store"); + return 0; } - if (cc->noCache()) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; + // NP: request CC:no-cache only means cache READ is forbidden. STORE is permitted. + // NP: request CC:private is undefined. We ignore. + // NP: other request CC flags are limiters on HIT/MISS. We don't care about here. + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with CC:no-store + if (rep->cache_control && rep->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:no-store"); + return 0; } - if (cc->noStore()) { - if (!REFRESH_OVERRIDE(ignore_no_store)) - return 0; + // RFC 2616 section 14.9.1 - MUST NOT cache any response with CC:private in a shared cache like Squid. + // TODO: add a shared/private cache configuration possibility. + if (rep->cache_control && + rep->cache_control->Private() && + !REFRESH_OVERRIDE(ignore_private)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:private"); + return 0; } - } + // NP: being conservative; CC:private overrides CC:public when both are present in a response. - if (request->flags.auth || request->flags.auth_sent) { - /* - * Responses to requests with authorization may be cached - * only if a Cache-Control: public reply header is present. - * RFC 2068, sec 14.9.4 - */ + } + // RFC 2068, sec 14.9.4 - MUST NOT cache any response with Authentication UNLESS certain CC controls are present + // allow HTTP violations to IGNORE those controls (ie re-block caching Auth) + if (request && (request->flags.auth || request->flags.auth_sent) && !REFRESH_OVERRIDE(ignore_auth)) { + if (!rep->cache_control) { + debugs(22, 3, HERE << "NO because Authenticated and server reply missing Cache-Control"); + return 0; + } - if (!request->cache_control || !request->cache_control->Public()) { - if (!REFRESH_OVERRIDE(ignore_auth)) - return 0; + if (ignoreCacheControl) { + debugs(22, 3, HERE << "NO because Authenticated and ignoring Cache-Control"); + return 0; } - } - /* Pragma: no-cache in _replies_ is not documented in HTTP, - * but servers like "Active Imaging Webcast/2.0" sure do use it */ - if (hdr->has(HDR_PRAGMA)) { - String s = hdr->getList(HDR_PRAGMA); - const int no_cache = strListIsMember(&s, "no-cache", ','); - s.clean(); + // HTTPbis pt7 section 4.1 clause 3: a response CC:public is present + bool mayStore = false; + if (rep->cache_control->Public()) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:public"); + mayStore = true; + + // HTTPbis pt7 section 4.1 clause 2: a response CC:must-revalidate is present + } else if (rep->cache_control->mustRevalidate() && !REFRESH_OVERRIDE(ignore_must_revalidate)) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:public"); + mayStore = true; + +#if 0 // waiting on HTTPbis WG agreement before we do this + // NP: given the must-revalidate exception we should also be able to exempt no-cache. + } else if (rep->cache_control->noCache() && !REFRESH_OVERRIDE(ignore_no_cache)) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:no-cache"); + mayStore = true; +#endif - if (no_cache) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; + // HTTPbis pt7 section 4.1 clause 1: a response CC:s-maxage is present + } else if (rep->cache_control->sMaxAge()) { + debugs(22, 3, HERE << " Authenticated but server reply Cache-Control:s-maxage"); + mayStore = true; } + + if (!mayStore) { + debugs(22, 3, HERE << "NO because Authenticated transaction"); + return 0; + } + + // NP: response CC:no-cache is equivalent to CC:must-revalidate,max-age=0. We MAY cache, and do so. + // NP: other request CC flags are limiters on HIT/MISS/REFRESH. We don't care about here. } - /* - * The "multipart/x-mixed-replace" content type is used for + /* HACK: The "multipart/x-mixed-replace" content type is used for * continuous push replies. These are generally dynamic and * probably should not be cachable */ if ((v = hdr->getStr(HDR_CONTENT_TYPE))) - if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) + if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) { + debugs(22, 3, HERE << "NO because Content-Type:multipart/x-mixed-replace"); return 0; + } switch (rep->sline.status) { /* Responses that are cacheable */ @@ -421,11 +457,12 @@ HttpStateData::cacheableReply() */ if (!refreshIsCachable(entry) && !REFRESH_OVERRIDE(store_stale)) { - debugs(22, 3, "refreshIsCachable() returned non-cacheable.."); + debugs(22, 3, "NO because refreshIsCachable() returned non-cacheable.."); return 0; - } else + } else { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status); return 1; - + } /* NOTREACHED */ break; @@ -433,11 +470,17 @@ HttpStateData::cacheableReply() case HTTP_MOVED_TEMPORARILY: case HTTP_TEMPORARY_REDIRECT: - if (rep->expires > rep->date && rep->date > 0) + if (rep->date <= 0) { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Date missing/invalid"); + return 0; + } + if (rep->expires > rep->date) { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status << " and Expires > Date"); return 1; - else + } else { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Expires <= Date"); return 0; - + } /* NOTREACHED */ break; @@ -466,6 +509,7 @@ HttpStateData::cacheableReply() case HTTP_SERVICE_UNAVAILABLE: case HTTP_GATEWAY_TIMEOUT: + debugs(22, 3, HERE << "MAYBE because HTTP status " << rep->sline.status); return -1; /* NOTREACHED */ @@ -502,12 +546,12 @@ HttpStateData::cacheableReply() case HTTP_REQUESTED_RANGE_NOT_SATISFIABLE: case HTTP_EXPECTATION_FAILED: + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status); return 0; default: /* RFC 2616 section 6.1.1: an unrecognized response MUST NOT be cached. */ - debugs (11, 3, HERE << "Unknown HTTP status code " << rep->sline.status << ". Not cacheable."); - + debugs (11, 3, HERE << "NO because unknown HTTP status code " << rep->sline.status); return 0; /* NOTREACHED */ @@ -918,12 +962,25 @@ HttpStateData::haveParsedReplyHeaders() no_cache: - if (!ignoreCacheControl && rep->cache_control) { - if (rep->cache_control->proxyRevalidate() || - rep->cache_control->mustRevalidate() || - rep->cache_control->hasSMaxAge() - ) - EBIT_SET(entry->flags, ENTRY_REVALIDATE); + if (!ignoreCacheControl) { + if (rep->cache_control) { + if (rep->cache_control->proxyRevalidate() || + rep->cache_control->mustRevalidate() || + rep->cache_control->noCache() || + rep->cache_control->hasSMaxAge()) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#if USE_HTTP_VIOLATIONS // response header Pragma::no-cache is undefined in HTTP + else { + // Expensive calculation. So only do it IF the CC: header is not present. + + /* HACK: Pragma: no-cache in _replies_ is not documented in HTTP, + * but servers like "Active Imaging Webcast/2.0" sure do use it */ + if (rep->header.has(HDR_PRAGMA) && + rep->header.hasListMember(HDR_PRAGMA,"no-cache",',')) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#endif } #if HEADERS_LOG diff --git a/src/http.h b/src/http.h index 4cae62d539..b158e3e258 100644 --- a/src/http.h +++ b/src/http.h @@ -61,6 +61,8 @@ public: void processReplyBody(); void readReply(const CommIoCbParams &io); virtual void maybeReadVirginBody(); // read response data from the network + + // Determine whether the response is a cacheable representation int cacheableReply(); peer *_peer; /* peer request made to */ -- 2.47.2