From c1520b6724745fa81179bcc6ebb3d196f021cd2d Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Fri, 20 Jun 2008 16:43:01 +1200 Subject: [PATCH] Author: Alex Rousskov Bug #425 fix: purge matching entries on PUT, POST, and DELETE requests. This patch focuses on purging URLs in Location and Content-Location headers of PUT, POST, and DELETE responses. Purging Request-URIs was already supported for PUT and DELETE, but needed polishing. I moved all code related to method-based purging into one Server method and outside the neighbors_do_private_keys guard (and store entry key is private guard). We may purge more related entries than before. I also implemented Amos' TODO to purge related entries when receiving a request with an unknown request method. Again, we may now purge more related entries than before. My primary concern about the polishing part of the change is that the old code used to check that the cache entry being purged is not the current entry: assert(e != pe); The new code does not check for that but appears to work in my limited tests. I am not quite sure why we needed that check if all purging methods are not cachable anyway. Perhaps it is unsafe to call e->release() for some entries? TODO: We cannot find Vary-controlled entries by URL and, hence, we cannot purge them, right? TODO: Optimize method category "search" in HttpRequestMethod by using precomputed method_id:category maps. --- src/HttpRequestMethod.cc | 45 ++++++++++++++++++++++++++ src/HttpRequestMethod.h | 1 + src/Server.cc | 68 ++++++++++++++++++++++++++++++++++++++-- src/Server.h | 3 +- src/client_side_reply.cc | 25 +++++++++------ src/ftp.cc | 2 ++ src/http.cc | 45 ++------------------------ 7 files changed, 134 insertions(+), 55 deletions(-) diff --git a/src/HttpRequestMethod.cc b/src/HttpRequestMethod.cc index a846db22e5..8b05d813c5 100644 --- a/src/HttpRequestMethod.cc +++ b/src/HttpRequestMethod.cc @@ -189,6 +189,10 @@ HttpRequestMethod::image() const bool HttpRequestMethod::isCacheble() const { + // TODO: optimize the lookup with a precomputed flags array + // XXX: the list seems wrong; e.g., Is METHOD_DELETE really cachable? + // see also http.cc::httpCachable() + if (theMethod == METHOD_CONNECT) return false; @@ -206,3 +210,44 @@ HttpRequestMethod::isCacheble() const return true; } + +bool +HttpRequestMethod::purgesOthers() const +{ + // TODO: optimize the lookup with a precomputed flags array + + switch (theMethod) { + /* common sense suggests purging is not required? */ + case METHOD_GET: // XXX: but we do purge HEAD on successful GET + case METHOD_HEAD: + case METHOD_NONE: + case METHOD_CONNECT: + case METHOD_TRACE: + case METHOD_OPTIONS: + case METHOD_PROPFIND: + case METHOD_BPROPFIND: + case METHOD_COPY: + case METHOD_BCOPY: + case METHOD_LOCK: + case METHOD_UNLOCK: + case METHOD_SEARCH: + return false; + + /* purging mandated by RFC 2616 */ + case METHOD_POST: + case METHOD_PUT: + case METHOD_DELETE: + return true; + + /* purging suggested by common sense */ + case METHOD_PURGE: + return true; + + /* all others */ + case METHOD_OTHER: + default: + return false; // be conservative: we do not know some methods specs + } + + return false; // not reached +} diff --git a/src/HttpRequestMethod.h b/src/HttpRequestMethod.h index 0e719b07e9..33c9fd23a4 100644 --- a/src/HttpRequestMethod.h +++ b/src/HttpRequestMethod.h @@ -148,6 +148,7 @@ public: char const* image() const; bool isCacheble() const; + bool purgesOthers() const; private: static const char *RequestMethodStr[]; diff --git a/src/Server.cc b/src/Server.cc index 9e03698ee4..09387309ff 100644 --- a/src/Server.cc +++ b/src/Server.cc @@ -37,6 +37,7 @@ #include "Store.h" #include "HttpRequest.h" #include "HttpReply.h" +#include "TextException.h" #include "errorpage.h" #if USE_ADAPTATION @@ -44,6 +45,10 @@ #include "adaptation/Service.h" #endif +// implemented in client_side_reply.cc until sides have a common parent +extern void purgeEntriesByUrl(const char *url); + + ServerStateData::ServerStateData(FwdState *theFwdState): AsyncJob("ServerStateData"),requestSender(NULL) #if USE_ADAPTATION , adaptedHeadSource(NULL) @@ -366,11 +371,70 @@ ServerStateData::sendMoreRequestBody() } } -// called by noteAdaptationAnswer(), HTTP server overwrites this +// Compares hosts in urls, returns false if different, no sheme, or no host. +static bool +sameUrlHosts(const char *url1, const char *url2) +{ + // XXX: Want urlHostname() here, but it uses static storage and copying + const char *host1 = strchr(url1, ':'); + const char *host2 = strchr(url2, ':'); + + if (host1 && host2) { + // skip scheme slashes + do { + ++host1; + ++host2; + } while (*host1 == '/' && *host2 == '/'); + + if (!*host1) + return false; // no host + + // increment while the same until we reach the end of the URL/host + while (*host1 && *host1 != '/' && *host1 == *host2) { + ++host1; + ++host2; + } + return *host1 == *host2; + } + + return false; // no URL scheme +} + +// purges entries that match the value of a given HTTP [response] header +static void +purgeEntriesByHeader(const char *reqUrl, HttpMsg *rep, http_hdr_type hdr) +{ + if (const char *url = rep->header.getStr(hdr)) + if (sameUrlHosts(reqUrl, url)) // prevent purging DoS, per RFC 2616 + purgeEntriesByUrl(url); +} + +// some HTTP methods should purge matching cache entries +void +ServerStateData::maybePurgeOthers() +{ + // only some HTTP methods should purge matching cache entries + if (!request->method.purgesOthers()) + return; + + // and probably only if the response was successful + if (theFinalReply->sline.status >= 400) + return; + + // XXX: should we use originalRequest() here? + const char *reqUrl = urlCanonical(request); + debugs(88, 5, "maybe purging due to " << RequestMethodStr(request->method) << ' ' << reqUrl); + purgeEntriesByUrl(reqUrl); + purgeEntriesByHeader(reqUrl, theFinalReply, HDR_LOCATION); + purgeEntriesByHeader(reqUrl, theFinalReply, HDR_CONTENT_LOCATION); +} + +// called (usually by kids) when we have final (possibly adapted) reply headers void ServerStateData::haveParsedReplyHeaders() { - // default does nothing + Must(theFinalReply); + maybePurgeOthers(); } HttpRequest * diff --git a/src/Server.h b/src/Server.h index 1680ee3ef7..28d4b5f082 100644 --- a/src/Server.h +++ b/src/Server.h @@ -116,7 +116,7 @@ private: protected: // kids customize these - virtual void haveParsedReplyHeaders(); /**< default does nothing */ + virtual void haveParsedReplyHeaders(); /**< called when got final headers */ virtual void completeForwarding(); /**< default calls fwd->complete() */ // BodyConsumer for HTTP: consume request body. @@ -191,6 +191,7 @@ protected: private: void quitIfAllDone(); /**< successful termination */ void sendBodyIsTooLargeError(); + void maybePurgeOthers(); HttpReply *theVirginReply; /**< reply received from the origin server */ HttpReply *theFinalReply; /**< adapted reply from ICAP or virgin reply */ diff --git a/src/client_side_reply.cc b/src/client_side_reply.cc index c97354cf34..a971611989 100644 --- a/src/client_side_reply.cc +++ b/src/client_side_reply.cc @@ -714,23 +714,30 @@ clientReplyContext::purgeRequestFindObjectToPurge() StoreEntry::getPublicByRequestMethod(this, http->request, METHOD_GET); } +// Purges all entries with a given url +// TODO: move to SideAgent parent, when we have one /* * We probably cannot purge Vary-affected responses because their MD5 * keys depend on vary headers. */ +void +purgeEntriesByUrl(const char *url) +{ + for (HttpRequestMethod m(METHOD_NONE); m != METHOD_ENUM_END; ++m) { + if (m.isCacheble()) { + if (StoreEntry *entry = storeGetPublic(url, m)) { + debugs(88, 5, "purging " << RequestMethodStr(m) << ' ' << url); + entry->release(); + } + } + } +} + void clientReplyContext::purgeAllCached() { const char *url = urlCanonical(http->request); - - for (HttpRequestMethod m(METHOD_NONE); m != METHOD_ENUM_END; ++m) { - if (m.isCacheble()) { - if (StoreEntry *entry = storeGetPublic(url, m)) { - debugs(88, 5, "purging " << RequestMethodStr(m) << ' ' << url); - entry->release(); - } - } - } + purgeEntriesByUrl(url); } void diff --git a/src/ftp.cc b/src/ftp.cc index 1a8aac4e34..393f57b6b9 100644 --- a/src/ftp.cc +++ b/src/ftp.cc @@ -3705,6 +3705,8 @@ FtpStateData::appendSuccessHeader() void FtpStateData::haveParsedReplyHeaders() { + ServerStateData::haveParsedReplyHeaders(); + StoreEntry *e = entry; e->timestampsSet(); diff --git a/src/http.cc b/src/http.cc index 844f323b2d..ef2b61bbd4 100644 --- a/src/http.cc +++ b/src/http.cc @@ -290,49 +290,6 @@ httpMaybeRemovePublic(StoreEntry * e, http_status status) assert(e != pe); pe->release(); } - - if (forbidden) - return; - - /// \todo AYJ: given the coment below + new behaviour of accepting METHOD_UNKNOWN, should we invert this test - /// removing the object unless the method is nown to be safely kept? - switch (e->mem_obj->method.id()) { - - case METHOD_PUT: - - case METHOD_DELETE: - - case METHOD_PROPPATCH: - - case METHOD_MKCOL: - - case METHOD_MOVE: - - case METHOD_BMOVE: - - case METHOD_BDELETE: - /** \par - * Remove any cached GET object if it is believed that the - * object may have changed as a result of other methods - */ - - if (e->mem_obj->request) - pe = storeGetPublicByRequestMethod(e->mem_obj->request, METHOD_GET); - else - pe = storeGetPublic(e->mem_obj->url, METHOD_GET); - - if (pe != NULL) { - assert(e != pe); - pe->release(); - } - - break; - - default: - /* Keep GCC happy. The methods above are all mutating HTTP methods - */ - break; - } } void @@ -774,6 +731,8 @@ HttpStateData::processReplyHeader() void HttpStateData::haveParsedReplyHeaders() { + ServerStateData::haveParsedReplyHeaders(); + Ctx ctx = ctx_enter(entry->mem_obj->url); HttpReply *rep = finalReply(); -- 2.47.2