Bug #425 fix: purge matching entries on PUT, POST, and DELETE requests.
This patch focuses on purging URLs in Location and Content-Location headers
of PUT, POST, and DELETE responses.
Purging Request-URIs was already supported for PUT and DELETE, but needed
polishing. I moved all code related to method-based purging into one Server
method and outside the neighbors_do_private_keys guard (and store entry key is
private guard). We may purge more related entries than before.
I also implemented Amos' TODO to purge related entries when receiving a
request with an unknown request method. Again, we may now purge more related
entries than before.
My primary concern about the polishing part of the change is that the old code
used to check that the cache entry being purged is not the current entry:
assert(e != pe);
The new code does not check for that but appears to work in my limited tests.
I am not quite sure why we needed that check if all purging methods are not
cachable anyway. Perhaps it is unsafe to call e->release() for some entries?
TODO: We cannot find Vary-controlled entries by URL and, hence, we cannot
purge them, right?
TODO: Optimize method category "search" in HttpRequestMethod by using
precomputed method_id:category maps.
bool
HttpRequestMethod::isCacheble() const
{
+ // TODO: optimize the lookup with a precomputed flags array
+ // XXX: the list seems wrong; e.g., Is METHOD_DELETE really cachable?
+ // see also http.cc::httpCachable()
+
if (theMethod == METHOD_CONNECT)
return false;
return true;
}
+
+bool
+HttpRequestMethod::purgesOthers() const
+{
+ // TODO: optimize the lookup with a precomputed flags array
+
+ switch (theMethod) {
+ /* common sense suggests purging is not required? */
+ case METHOD_GET: // XXX: but we do purge HEAD on successful GET
+ case METHOD_HEAD:
+ case METHOD_NONE:
+ case METHOD_CONNECT:
+ case METHOD_TRACE:
+ case METHOD_OPTIONS:
+ case METHOD_PROPFIND:
+ case METHOD_BPROPFIND:
+ case METHOD_COPY:
+ case METHOD_BCOPY:
+ case METHOD_LOCK:
+ case METHOD_UNLOCK:
+ case METHOD_SEARCH:
+ return false;
+
+ /* purging mandated by RFC 2616 */
+ case METHOD_POST:
+ case METHOD_PUT:
+ case METHOD_DELETE:
+ return true;
+
+ /* purging suggested by common sense */
+ case METHOD_PURGE:
+ return true;
+
+ /* all others */
+ case METHOD_OTHER:
+ default:
+ return false; // be conservative: we do not know some methods specs
+ }
+
+ return false; // not reached
+}
char const* image() const;
bool isCacheble() const;
+ bool purgesOthers() const;
private:
static const char *RequestMethodStr[];
#include "Store.h"
#include "HttpRequest.h"
#include "HttpReply.h"
+#include "TextException.h"
#include "errorpage.h"
#if USE_ADAPTATION
#include "adaptation/Service.h"
#endif
+// implemented in client_side_reply.cc until sides have a common parent
+extern void purgeEntriesByUrl(const char *url);
+
+
ServerStateData::ServerStateData(FwdState *theFwdState): AsyncJob("ServerStateData"),requestSender(NULL)
#if USE_ADAPTATION
, adaptedHeadSource(NULL)
}
}
-// called by noteAdaptationAnswer(), HTTP server overwrites this
+// Compares hosts in urls, returns false if different, no sheme, or no host.
+static bool
+sameUrlHosts(const char *url1, const char *url2)
+{
+ // XXX: Want urlHostname() here, but it uses static storage and copying
+ const char *host1 = strchr(url1, ':');
+ const char *host2 = strchr(url2, ':');
+
+ if (host1 && host2) {
+ // skip scheme slashes
+ do {
+ ++host1;
+ ++host2;
+ } while (*host1 == '/' && *host2 == '/');
+
+ if (!*host1)
+ return false; // no host
+
+ // increment while the same until we reach the end of the URL/host
+ while (*host1 && *host1 != '/' && *host1 == *host2) {
+ ++host1;
+ ++host2;
+ }
+ return *host1 == *host2;
+ }
+
+ return false; // no URL scheme
+}
+
+// purges entries that match the value of a given HTTP [response] header
+static void
+purgeEntriesByHeader(const char *reqUrl, HttpMsg *rep, http_hdr_type hdr)
+{
+ if (const char *url = rep->header.getStr(hdr))
+ if (sameUrlHosts(reqUrl, url)) // prevent purging DoS, per RFC 2616
+ purgeEntriesByUrl(url);
+}
+
+// some HTTP methods should purge matching cache entries
+void
+ServerStateData::maybePurgeOthers()
+{
+ // only some HTTP methods should purge matching cache entries
+ if (!request->method.purgesOthers())
+ return;
+
+ // and probably only if the response was successful
+ if (theFinalReply->sline.status >= 400)
+ return;
+
+ // XXX: should we use originalRequest() here?
+ const char *reqUrl = urlCanonical(request);
+ debugs(88, 5, "maybe purging due to " << RequestMethodStr(request->method) << ' ' << reqUrl);
+ purgeEntriesByUrl(reqUrl);
+ purgeEntriesByHeader(reqUrl, theFinalReply, HDR_LOCATION);
+ purgeEntriesByHeader(reqUrl, theFinalReply, HDR_CONTENT_LOCATION);
+}
+
+// called (usually by kids) when we have final (possibly adapted) reply headers
void
ServerStateData::haveParsedReplyHeaders()
{
- // default does nothing
+ Must(theFinalReply);
+ maybePurgeOthers();
}
HttpRequest *
protected:
// kids customize these
- virtual void haveParsedReplyHeaders(); /**< default does nothing */
+ virtual void haveParsedReplyHeaders(); /**< called when got final headers */
virtual void completeForwarding(); /**< default calls fwd->complete() */
// BodyConsumer for HTTP: consume request body.
private:
void quitIfAllDone(); /**< successful termination */
void sendBodyIsTooLargeError();
+ void maybePurgeOthers();
HttpReply *theVirginReply; /**< reply received from the origin server */
HttpReply *theFinalReply; /**< adapted reply from ICAP or virgin reply */
StoreEntry::getPublicByRequestMethod(this, http->request, METHOD_GET);
}
+// Purges all entries with a given url
+// TODO: move to SideAgent parent, when we have one
/*
* We probably cannot purge Vary-affected responses because their MD5
* keys depend on vary headers.
*/
+void
+purgeEntriesByUrl(const char *url)
+{
+ for (HttpRequestMethod m(METHOD_NONE); m != METHOD_ENUM_END; ++m) {
+ if (m.isCacheble()) {
+ if (StoreEntry *entry = storeGetPublic(url, m)) {
+ debugs(88, 5, "purging " << RequestMethodStr(m) << ' ' << url);
+ entry->release();
+ }
+ }
+ }
+}
+
void
clientReplyContext::purgeAllCached()
{
const char *url = urlCanonical(http->request);
-
- for (HttpRequestMethod m(METHOD_NONE); m != METHOD_ENUM_END; ++m) {
- if (m.isCacheble()) {
- if (StoreEntry *entry = storeGetPublic(url, m)) {
- debugs(88, 5, "purging " << RequestMethodStr(m) << ' ' << url);
- entry->release();
- }
- }
- }
+ purgeEntriesByUrl(url);
}
void
void
FtpStateData::haveParsedReplyHeaders()
{
+ ServerStateData::haveParsedReplyHeaders();
+
StoreEntry *e = entry;
e->timestampsSet();
assert(e != pe);
pe->release();
}
-
- if (forbidden)
- return;
-
- /// \todo AYJ: given the coment below + new behaviour of accepting METHOD_UNKNOWN, should we invert this test
- /// removing the object unless the method is nown to be safely kept?
- switch (e->mem_obj->method.id()) {
-
- case METHOD_PUT:
-
- case METHOD_DELETE:
-
- case METHOD_PROPPATCH:
-
- case METHOD_MKCOL:
-
- case METHOD_MOVE:
-
- case METHOD_BMOVE:
-
- case METHOD_BDELETE:
- /** \par
- * Remove any cached GET object if it is believed that the
- * object may have changed as a result of other methods
- */
-
- if (e->mem_obj->request)
- pe = storeGetPublicByRequestMethod(e->mem_obj->request, METHOD_GET);
- else
- pe = storeGetPublic(e->mem_obj->url, METHOD_GET);
-
- if (pe != NULL) {
- assert(e != pe);
- pe->release();
- }
-
- break;
-
- default:
- /* Keep GCC happy. The methods above are all mutating HTTP methods
- */
- break;
- }
}
void
void
HttpStateData::haveParsedReplyHeaders()
{
+ ServerStateData::haveParsedReplyHeaders();
+
Ctx ctx = ctx_enter(entry->mem_obj->url);
HttpReply *rep = finalReply();