/*
- * $Id: store_swapout.cc,v 1.102 2006/05/19 17:05:18 wessels Exp $
+ * $Id$
*
* DEBUG: section 20 Storage Manager Swapout Functions
* AUTHOR: Duane Wessels
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
*
*/
-#include "squid.h"
+#include "squid-old.h"
+#include "cbdata.h"
#include "StoreClient.h"
#include "Store.h"
/* FIXME: Abstract the use of this more */
#include "mem_node.h"
#include "MemObject.h"
#include "SwapDir.h"
+#include "StatCounters.h"
+#include "swap_log_op.h"
static void storeSwapOutStart(StoreEntry * e);
-static STIOCB storeSwapOutFileClosed;
-static STIOCB storeSwapOutFileNotify;
+static StoreIOState::STIOCB storeSwapOutFileClosed;
+static StoreIOState::STFNCB storeSwapOutFileNotify;
+
+// wrapper to cross C/C++ ABI boundary. xfree is extern "C" for libraries.
+static void xfree_cppwrapper(void *x)
+{
+ xfree(x);
+}
/* start swapping object to disk */
static void
/* Build the swap metadata, so the filesystem will know how much
* metadata there is to store
*/
- debug(20, 5) ("storeSwapOutStart: Begin SwapOut '%s' to dirno %d, fileno %08X\n",
- storeUrl(e), e->swap_dirn, e->swap_filen);
+ debugs(20, 5, "storeSwapOutStart: Begin SwapOut '" << e->url() << "' to dirno " <<
+ e->swap_dirn << ", fileno " << std::hex << std::setw(8) << std::setfill('0') <<
+ std::uppercase << e->swap_filen);
e->swap_status = SWAPOUT_WRITING;
/* If we start swapping out objects with OutOfBand Metadata,
* then this code needs changing
*/
+
+ /* TODO: make some sort of data,size refcounted immutable buffer
+ * and stop fooling ourselves with "const char*" buffers.
+ */
+
+ // Create metadata now, possibly in vain: storeCreate needs swap_hdr_sz.
+ const char *buf = e->getSerialisedMetaData ();
+ assert(buf);
+
/* Create the swap file */
- generic_cbdata *c = cbdataAlloc(generic_cbdata);
- c->data = e;
+ generic_cbdata *c = new generic_cbdata(e);
sio = storeCreate(e, storeSwapOutFileNotify, storeSwapOutFileClosed, c);
- if (NULL == sio.getRaw()) {
+ if (sio == NULL) {
e->swap_status = SWAPOUT_NONE;
- cbdataFree(c);
+ mem->swapout.decision = MemObject::SwapOut::swImpossible;
+ delete c;
+ xfree((char*)buf);
storeLog(STORE_LOG_SWAPOUTFAIL, e);
return;
}
mem->swapout.sio = sio;
/* Don't lock until after create, or the replacement
* code might get confused */
- storeLockObject(e);
+
+ e->lock();
/* Pick up the file number if it was assigned immediately */
e->swap_filen = mem->swapout.sio->swap_filen;
+
e->swap_dirn = mem->swapout.sio->swap_dirn;
+
/* write out the swap metadata */
- /* TODO: make some sort of data,size refcounted immutable buffer
- * for use by this sort of function.
- */
- char const *buf = e->getSerialisedMetaData ();
- /* If we start swapping out with out of band metadata, this assert
- * will catch it - this code needs to be adjusted if that happens
- */
- assert (buf);
- storeIOWrite(mem->swapout.sio, buf, mem->swap_hdr_sz, 0, xfree);
+ storeIOWrite(mem->swapout.sio, buf, mem->swap_hdr_sz, 0, xfree_cppwrapper);
}
static void
-storeSwapOutFileNotify(void *data, int errflag, storeIOState * sio)
+storeSwapOutFileNotify(void *data, int errflag, StoreIOState::Pointer self)
{
generic_cbdata *c = (generic_cbdata *)data;
StoreEntry *e = (StoreEntry *)c->data;
MemObject *mem = e->mem_obj;
assert(e->swap_status == SWAPOUT_WRITING);
assert(mem);
- assert(mem->swapout.sio == sio);
+ assert(mem->swapout.sio == self);
assert(errflag == 0);
+ assert(e->swap_filen < 0); // if this fails, call SwapDir::disconnect(e)
e->swap_filen = mem->swapout.sio->swap_filen;
e->swap_dirn = mem->swapout.sio->swap_dirn;
}
MemObject *mem = anEntry->mem_obj;
do {
- /*
- * Evil hack time.
- * We are paging out to disk in page size chunks. however, later on when
- * we update the queue position, we might not have a page (I *think*),
- * so we do the actual page update here.
- */
+ // find the page containing the first byte we have not swapped out yet
+ mem_node *page =
+ mem->data_hdr.getBlockContainingLocation(mem->swapout.queue_offset);
- if (mem->swapout.memnode == NULL) {
- /* We need to swap out the first page */
- mem->swapout.memnode = const_cast<mem_node *>(mem->data_hdr.start());
- } else {
- /* We need to swap out the next page */
- /* 20030636 RBC - we don't have ->next anymore.
- * But we do have the next location */
- mem->swapout.memnode = mem->data_hdr.getBlockContainingLocation (mem->swapout.memnode->end());
- }
+ if (!page)
+ return; // wait for more data to become available
+
+ // memNodeWriteComplete() and absence of buffer offset math below
+ // imply that we always write from the very beginning of the page
+ assert(page->start() == mem->swapout.queue_offset);
/*
* Get the length of this buffer. We are assuming(!) that the buffer
* but we can look at this at a later date or whenever the code results
* in bad swapouts, whichever happens first. :-)
*/
- ssize_t swap_buf_len = mem->swapout.memnode->nodeBuffer.length;
+ ssize_t swap_buf_len = page->nodeBuffer.length;
- debug(20, 3) ("storeSwapOut: swap_buf_len = %d\n", (int) swap_buf_len);
+ debugs(20, 3, "storeSwapOut: swap_buf_len = " << swap_buf_len);
assert(swap_buf_len > 0);
- debug(20, 3) ("storeSwapOut: swapping out %ld bytes from %ld\n",
- (long int) swap_buf_len, (long int) mem->swapout.queue_offset);
+ debugs(20, 3, "storeSwapOut: swapping out " << swap_buf_len << " bytes from " << mem->swapout.queue_offset);
mem->swapout.queue_offset += swap_buf_len;
- storeIOWrite(mem->swapout.sio, mem->data_hdr.NodeGet(mem->swapout.memnode), swap_buf_len, -1, memNodeWriteComplete);
+ storeIOWrite(mem->swapout.sio,
+ mem->data_hdr.NodeGet(page),
+ swap_buf_len,
+ -1,
+ memNodeWriteComplete);
/* the storeWrite() call might generate an error */
if (anEntry->swap_status != SWAPOUT_WRITING)
break;
- ssize_t swapout_size = (ssize_t) (mem->endOffset() - mem->swapout.queue_offset);
+ int64_t swapout_size = mem->endOffset() - mem->swapout.queue_offset;
if (anEntry->store_status == STORE_PENDING)
if (swapout_size < SM_PAGE_SIZE)
* It's overhead is therefor, significant.
*/
void
-storeSwapOut(StoreEntry * e)
+StoreEntry::swapOut()
{
- if (!e->mem_obj)
+ if (!mem_obj)
return;
- if (!e->swapoutPossible())
+ // this flag may change so we must check even if we are swappingOut
+ if (EBIT_TEST(flags, ENTRY_ABORTED)) {
+ assert(EBIT_TEST(flags, RELEASE_REQUEST));
+ // StoreEntry::abort() already closed the swap out file, if any
+ // no trimming: data producer must stop production if ENTRY_ABORTED
return;
+ }
- MemObject *mem = e->mem_obj;
-
- debug(20, 7) ("storeSwapOut: mem->inmem_lo = %d\n",
- (int) mem->inmem_lo);
+ const bool weAreOrMayBeSwappingOut = swappingOut() || mayStartSwapOut();
- debug(20, 7) ("storeSwapOut: mem->endOffset() = %d\n",
- (int) mem->endOffset());
+ trimMemory(weAreOrMayBeSwappingOut);
- debug(20, 7) ("storeSwapOut: swapout.queue_offset = %d\n",
- (int) mem->swapout.queue_offset);
+ if (!weAreOrMayBeSwappingOut)
+ return; // nothing else to do
- if (mem->swapout.sio.getRaw())
- debug(20, 7) ("storeSwapOut: storeOffset() = %d\n",
- (int) mem->swapout.sio->offset());
+ // Aborted entries have STORE_OK, but swapoutPossible rejects them. Thus,
+ // store_status == STORE_OK below means we got everything we wanted.
- ssize_t swapout_maxsize = (ssize_t) (mem->endOffset() - mem->swapout.queue_offset);
+ debugs(20, 7, HERE << "storeSwapOut: mem->inmem_lo = " << mem_obj->inmem_lo);
+ debugs(20, 7, HERE << "storeSwapOut: mem->endOffset() = " << mem_obj->endOffset());
+ debugs(20, 7, HERE << "storeSwapOut: swapout.queue_offset = " << mem_obj->swapout.queue_offset);
- assert(swapout_maxsize >= 0);
+ if (mem_obj->swapout.sio != NULL)
+ debugs(20, 7, "storeSwapOut: storeOffset() = " << mem_obj->swapout.sio->offset() );
- off_t const lowest_offset = mem->lowestMemReaderOffset();
+ int64_t const lowest_offset = mem_obj->lowestMemReaderOffset();
- debug(20, 7) ("storeSwapOut: lowest_offset = %d\n",
- (int) lowest_offset);
-
- /*
- * Grab the swapout_size and check to see whether we're going to defer
- * the swapout based upon size
- */
- if ((e->store_status != STORE_OK) && (swapout_maxsize < store_maxobjsize)) {
- /*
- * NOTE: the store_maxobjsize here is the max of optional
- * max-size values from 'cache_dir' lines. It is not the
- * same as 'maximum_object_size'. By default, store_maxobjsize
- * will be set to -1. However, I am worried that this
- * deferance may consume a lot of memory in some cases.
- * It would be good to make this decision based on reply
- * content-length, rather than wait to accumulate huge
- * amounts of object data in memory.
- */
- debug(20, 5) ("storeSwapOut: Deferring starting swapping out\n");
- return;
- }
+ debugs(20, 7, HERE << "storeSwapOut: lowest_offset = " << lowest_offset);
- e->trimMemory();
-#if SIZEOF_OFF_T == 4
+#if SIZEOF_OFF_T <= 4
- if (mem->endOffset() > 0x7FFF0000) {
- debug(20, 0) ("WARNING: preventing off_t overflow for %s\n", storeUrl(e));
- storeAbort(e);
+ if (mem_obj->endOffset() > 0x7FFF0000) {
+ debugs(20, 0, "WARNING: preventing off_t overflow for " << url());
+ abort();
return;
}
#endif
- if (e->swap_status == SWAPOUT_WRITING)
- assert(mem->inmem_lo <= (off_t)mem->objectBytesOnDisk() );
-
- if (!storeSwapOutAble(e))
- return;
+ if (swap_status == SWAPOUT_WRITING)
+ assert(mem_obj->inmem_lo <= mem_obj->objectBytesOnDisk() );
- debug(20, 7) ("storeSwapOut: swapout_size = %d\n",
- (int) swapout_maxsize);
-
- if (swapout_maxsize == 0) {
- if (e->store_status == STORE_OK)
- storeSwapOutFileClose(e);
+ // buffered bytes we have not swapped out yet
+ const int64_t swapout_maxsize = mem_obj->availableForSwapOut();
+ assert(swapout_maxsize >= 0);
+ debugs(20, 7, "storeSwapOut: swapout_size = " << swapout_maxsize);
- return; /* Nevermore! */
+ if (swapout_maxsize == 0) { // swapped everything we got
+ if (store_status == STORE_OK) { // got everything we wanted
+ assert(mem_obj->object_sz >= 0);
+ swapOutFileClose(StoreIOState::wroteAll);
+ }
+ // else need more data to swap out
+ return;
}
- if (e->store_status == STORE_PENDING) {
+ if (store_status == STORE_PENDING) {
/* wait for a full block to write */
if (swapout_maxsize < SM_PAGE_SIZE)
* Wait until we are below the disk FD limit, only if the
* next server-side read won't be deferred.
*/
- if (storeTooManyDiskFilesOpen() && !e->checkDeferRead(-1))
+ if (storeTooManyDiskFilesOpen() && !checkDeferRead(-1))
return;
}
/* Ok, we have stuff to swap out. Is there a swapout.sio open? */
- if (e->swap_status == SWAPOUT_NONE) {
- assert(mem->swapout.sio == NULL);
- assert(mem->inmem_lo == 0);
-
- if (storeCheckCachable(e))
- storeSwapOutStart(e);
- else
- return;
-
- /* ENTRY_CACHABLE will be cleared and we'll never get here again */
+ if (swap_status == SWAPOUT_NONE) {
+ assert(mem_obj->swapout.sio == NULL);
+ assert(mem_obj->inmem_lo == 0);
+ storeSwapOutStart(this); // sets SwapOut::swImpossible on failures
}
- if (mem->swapout.sio == NULL)
+ if (mem_obj->swapout.sio == NULL)
return;
- doPages(e);
+ doPages(this);
- if (NULL == mem->swapout.sio.getRaw())
+ if (mem_obj->swapout.sio == NULL)
/* oops, we're not swapping out any more */
return;
- if (e->store_status == STORE_OK) {
+ if (store_status == STORE_OK) {
/*
* If the state is STORE_OK, then all data must have been given
* to the filesystem at this point because storeSwapOut() is
* not going to be called again for this entry.
*/
- assert(mem->endOffset() == mem->swapout.queue_offset);
- storeSwapOutFileClose(e);
+ assert(mem_obj->object_sz >= 0);
+ assert(mem_obj->endOffset() == mem_obj->swapout.queue_offset);
+ swapOutFileClose(StoreIOState::wroteAll);
}
}
void
-storeSwapOutFileClose(StoreEntry * e)
+StoreEntry::swapOutFileClose(int how)
{
- MemObject *mem = e->mem_obj;
- assert(mem != NULL);
- debug(20, 3) ("storeSwapOutFileClose: %s\n", e->getMD5Text());
- debug(20, 3) ("storeSwapOutFileClose: sio = %p\n", mem->swapout.sio.getRaw());
+ assert(mem_obj != NULL);
+ debugs(20, 3, "storeSwapOutFileClose: " << getMD5Text() << " how=" << how);
+ debugs(20, 3, "storeSwapOutFileClose: sio = " << mem_obj->swapout.sio.getRaw());
- if (mem->swapout.sio == NULL)
+ if (mem_obj->swapout.sio == NULL)
return;
- storeClose(mem->swapout.sio);
+ storeClose(mem_obj->swapout.sio, how);
}
static void
-storeSwapOutFileClosed(void *data, int errflag, storeIOState * sio)
+storeSwapOutFileClosed(void *data, int errflag, StoreIOState::Pointer self)
{
generic_cbdata *c = (generic_cbdata *)data;
StoreEntry *e = (StoreEntry *)c->data;
MemObject *mem = e->mem_obj;
+ assert(mem->swapout.sio == self);
assert(e->swap_status == SWAPOUT_WRITING);
cbdataFree(c);
- if (errflag) {
- debug(20, 1) ("storeSwapOutFileClosed: dirno %d, swapfile %08X, errflag=%d\n\t%s\n",
- e->swap_dirn, e->swap_filen, errflag, xstrerror());
+ // if object_size is still unknown, the entry was probably aborted
+ if (errflag || e->objectLen() < 0) {
+ debugs(20, 2, "storeSwapOutFileClosed: dirno " << e->swap_dirn << ", swapfile " <<
+ std::hex << std::setw(8) << std::setfill('0') << std::uppercase <<
+ e->swap_filen << ", errflag=" << errflag);
if (errflag == DISK_NO_SPACE_LEFT) {
/* FIXME: this should be handle by the link from store IO to
storeConfigure();
}
- if (e->swap_filen > 0)
+ if (e->swap_filen >= 0)
e->unlink();
- e->swap_filen = -1;
-
- e->swap_dirn = -1;
+ assert(e->swap_status == SWAPOUT_NONE);
- e->swap_status = SWAPOUT_NONE;
-
- storeReleaseRequest(e);
+ e->releaseRequest();
} else {
/* swapping complete */
- debug(20, 3) ("storeSwapOutFileClosed: SwapOut complete: '%s' to %d, %08X\n",
- storeUrl(e), e->swap_dirn, e->swap_filen);
- e->swap_file_sz = objectLen(e) + mem->swap_hdr_sz;
+ debugs(20, 3, "storeSwapOutFileClosed: SwapOut complete: '" << e->url() << "' to " <<
+ e->swap_dirn << ", " << std::hex << std::setw(8) << std::setfill('0') <<
+ std::uppercase << e->swap_filen);
+ debugs(20, 5, HERE << "swap_file_sz = " <<
+ e->objectLen() << " + " << mem->swap_hdr_sz);
+
+ e->swap_file_sz = e->objectLen() + mem->swap_hdr_sz;
e->swap_status = SWAPOUT_DONE;
- e->store()->updateSize(e->swap_file_sz, 1);
+ e->store()->swappedOut(*e);
- if (storeCheckCachable(e)) {
+ // XXX: For some Stores, it is pointless to re-check cachability here
+ // and it leads to double counts in store_check_cachable_hist. We need
+ // another way to signal a completed but failed swapout. Or, better,
+ // each Store should handle its own logging and LOG state setting.
+ if (e->checkCachable()) {
storeLog(STORE_LOG_SWAPOUT, e);
storeDirSwapLog(e, SWAP_LOG_ADD);
}
- statCounter.swap.outs++;
+ ++statCounter.swap.outs;
}
- debug(20, 3) ("storeSwapOutFileClosed: %s:%d\n", __FILE__, __LINE__);
+ debugs(20, 3, "storeSwapOutFileClosed: " << __FILE__ << ":" << __LINE__);
mem->swapout.sio = NULL;
e->unlock();
}
-/*
- * Is this entry a candidate for writing to disk?
- */
-int
-storeSwapOutAble(const StoreEntry * e)
+bool
+StoreEntry::mayStartSwapOut()
{
dlink_node *node;
- if (e->mem_obj->swapout.sio.getRaw() != NULL)
- return 1;
+ // must be checked in the caller
+ assert(!EBIT_TEST(flags, ENTRY_ABORTED));
+
+ if (!Config.cacheSwap.n_configured)
+ return false;
+
+ assert(mem_obj);
+ MemObject::SwapOut::Decision &decision = mem_obj->swapout.decision;
+
+ // if we decided that swapout is not possible, do not repeat same checks
+ if (decision == MemObject::SwapOut::swImpossible) {
+ debugs(20, 3, HERE << " already rejected");
+ return false;
+ }
+
+ // if we decided that swapout is possible, do not repeat same checks
+ if (decision == MemObject::SwapOut::swPossible) {
+ debugs(20, 3, HERE << "already allowed");
+ return true;
+ }
+
+ // if we are swapping out already, do not repeat same checks
+ if (swap_status != SWAPOUT_NONE) {
+ debugs(20, 3, HERE << " already started");
+ decision = MemObject::SwapOut::swPossible;
+ return true;
+ }
+
+ if (!checkCachable()) {
+ debugs(20, 3, HERE << "not cachable");
+ decision = MemObject::SwapOut::swImpossible;
+ return false;
+ }
+
+ if (EBIT_TEST(flags, ENTRY_SPECIAL)) {
+ debugs(20, 3, HERE << url() << " SPECIAL");
+ decision = MemObject::SwapOut::swImpossible;
+ return false;
+ }
+
+ // check cache_dir max-size limit if all cache_dirs have it
+ if (store_maxobjsize >= 0) {
+ // TODO: add estimated store metadata size to be conservative
+
+ // use guaranteed maximum if it is known
+ const int64_t expectedEnd = mem_obj->expectedReplySize();
+ debugs(20, 7, HERE << "expectedEnd = " << expectedEnd);
+ if (expectedEnd > store_maxobjsize) {
+ debugs(20, 3, HERE << "will not fit: " << expectedEnd <<
+ " > " << store_maxobjsize);
+ decision = MemObject::SwapOut::swImpossible;
+ return false; // known to outgrow the limit eventually
+ }
+
+ // use current minimum (always known)
+ const int64_t currentEnd = mem_obj->endOffset();
+ if (currentEnd > store_maxobjsize) {
+ debugs(20, 3, HERE << "does not fit: " << currentEnd <<
+ " > " << store_maxobjsize);
+ decision = MemObject::SwapOut::swImpossible;
+ return false; // already does not fit and may only get bigger
+ }
+
+ // prevent default swPossible answer for yet unknown length
+ if (expectedEnd < 0) {
+ debugs(20, 3, HERE << "wait for more info: " <<
+ store_maxobjsize);
+ return false; // may fit later, but will be rejected now
+ }
- if (e->mem_obj->inmem_lo > 0)
- return 0;
+ if (store_status != STORE_OK) {
+ const int64_t maxKnownSize = expectedEnd < 0 ?
+ mem_obj->availableForSwapOut() : expectedEnd;
+ debugs(20, 7, HERE << "maxKnownSize= " << maxKnownSize);
+ if (maxKnownSize < store_maxobjsize) {
+ /*
+ * NOTE: the store_maxobjsize here is the max of optional
+ * max-size values from 'cache_dir' lines. It is not the
+ * same as 'maximum_object_size'. By default, store_maxobjsize
+ * will be set to -1. However, I am worried that this
+ * deferance may consume a lot of memory in some cases.
+ * Should we add an option to limit this memory consumption?
+ */
+ debugs(20, 5, HERE << "Deferring swapout start for " <<
+ (store_maxobjsize - maxKnownSize) << " bytes");
+ return false;
+ }
+ }
+ }
+
+ if (mem_obj->inmem_lo > 0) {
+ debugs(20, 3, "storeSwapOut: (inmem_lo > 0) imem_lo:" << mem_obj->inmem_lo);
+ decision = MemObject::SwapOut::swImpossible;
+ return false;
+ }
/*
* If there are DISK clients, we must write to disk
* therefore this should be an assert?
* RBC 20030708: We can use disk to avoid mem races, so this shouldn't be
* an assert.
+ *
+ * XXX: Not clear what "mem races" the above refers to, especially when
+ * dealing with non-cachable objects that cannot have multiple clients.
+ *
+ * XXX: If STORE_DISK_CLIENT needs SwapOut::swPossible, we have to check
+ * for that flag earlier, but forcing swapping may contradict max-size or
+ * other swapability restrictions. Change storeClientType() and/or its
+ * callers to take swap-in availability into account.
*/
- for (node = e->mem_obj->clients.head; node; node = node->next) {
- if (((store_client *) node->data)->getType() == STORE_DISK_CLIENT)
- return 1;
+ for (node = mem_obj->clients.head; node; node = node->next) {
+ if (((store_client *) node->data)->getType() == STORE_DISK_CLIENT) {
+ debugs(20, 3, HERE << "DISK client found");
+ decision = MemObject::SwapOut::swPossible;
+ return true;
+ }
}
- /* Don't pollute the disk with icons and other special entries */
- if (EBIT_TEST(e->flags, ENTRY_SPECIAL))
- return 0;
-
- if (!EBIT_TEST(e->flags, ENTRY_CACHABLE))
- return 0;
-
- if (!e->mem_obj->isContiguous())
- return 0;
+ if (!mem_obj->isContiguous()) {
+ debugs(20, 3, "storeSwapOut: not Contiguous");
+ decision = MemObject::SwapOut::swImpossible;
+ return false;
+ }
- return 1;
+ decision = MemObject::SwapOut::swPossible;
+ return true;
}