From 49769258cf5250d789ab2aca803830d92ceadaa5 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Tue, 9 Jul 2013 18:41:01 -0600 Subject: [PATCH] Use Rock::IoState::writeableAnchor_ to detect rock entries open for writing. Just e.mem_obj->swapout.sio presence is not reliable enough because we may switch from writing to reading while the [writing] sio is still around. More explicitly disabled on-disk collapsing of entries. The relevant code is unstable under load [at least when combined with memory caching]. We were not calling Ipc::StoreMap::startAppending() before so we probably did not fully disk-collapsed entries before these temporary changes. Added an XXX to mark an assert() that may fail if we allow on-disk collapsing. --- src/fs/rock/RockIoState.cc | 6 +++--- src/fs/rock/RockSwapDir.cc | 13 +++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/fs/rock/RockIoState.cc b/src/fs/rock/RockIoState.cc index 5646d075fb..a202e3db88 100644 --- a/src/fs/rock/RockIoState.cc +++ b/src/fs/rock/RockIoState.cc @@ -177,7 +177,7 @@ Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff) const SlotId sidNext = reserveSlotForWriting(); // throws assert(sidNext >= 0); writeToDisk(sidNext); - } else if (Store::Root().transientReaders(*e)) { + } else if (false && Store::Root().transientReaders(*e)) { // write partial buffer for all remote hit readers to see writeBufToDisk(-1); } @@ -283,7 +283,7 @@ void Rock::IoState::finishedWriting(const int errFlag) { // we incremented offset_ while accumulating data in write() - writeableAnchor_ = NULL; + // we do not reset writeableAnchor_ here because we still keep the lock CollapsedForwarding::Broadcast(*e); callBack(errFlag); } @@ -297,7 +297,7 @@ Rock::IoState::close(int how) if (!theFile) { debugs(79, 3, "I/O already canceled"); assert(!callback); - assert(!writeableAnchor_); + // We keep writeableAnchor_ after callBack() on I/O errors. assert(!readableAnchor_); return; } diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc index 7c0c1effdb..a81da20459 100644 --- a/src/fs/rock/RockSwapDir.cc +++ b/src/fs/rock/RockSwapDir.cc @@ -83,7 +83,7 @@ Rock::SwapDir::get(const cache_key *key) bool Rock::SwapDir::anchorCollapsed(StoreEntry &collapsed, bool &inSync) { - if (!map || !theFile || !theFile->canRead()) + if (true || !map || !theFile || !theFile->canRead()) return false; sfileno filen; @@ -161,13 +161,16 @@ void Rock::SwapDir::disconnect(StoreEntry &e) // before it switches from SWAPOUT_WRITING to SWAPOUT_DONE. // since e has swap_filen, its slot is locked for reading and/or writing - // but it is difficult to know whether THIS worker is reading or writing e - if (e.swap_status == SWAPOUT_WRITING || - (e.mem_obj && e.mem_obj->swapout.sio != NULL)) { + // but it is difficult to know whether THIS worker is reading or writing e, + // especially since we may switch from writing to reading. This code relies + // on Rock::IoState::writeableAnchor_ being set when we locked for writing. + if (e.mem_obj && e.mem_obj->swapout.sio != NULL && + dynamic_cast(*e.mem_obj->swapout.sio).writeableAnchor_) { map->abortWriting(e.swap_filen); e.swap_dirn = -1; e.swap_filen = -1; e.swap_status = SWAPOUT_NONE; + dynamic_cast(*e.mem_obj->swapout.sio).writeableAnchor_ = NULL; Store::Root().transientsAbandon(e); // broadcasts after the change } else { map->closeForReading(e.swap_filen); @@ -750,6 +753,7 @@ Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOS assert(slot->sameKey(static_cast(e.key))); assert(slot->basics.swap_file_sz > 0); + // XXX: basics.swap_file_sz may grow for collapsed disk hits assert(slot->basics.swap_file_sz == e.swap_file_sz); return sio; @@ -822,6 +826,7 @@ Rock::SwapDir::writeCompleted(int errflag, size_t rlen, RefCount< ::WriteRequest if (request->sidNext < 0) { // close, the entry gets the read lock map->closeForWriting(sio.swap_filen, true); + sio.writeableAnchor_ = NULL; sio.finishedWriting(errflag); } } else { -- 2.39.5