dnl
dnl
dnl
-AC_INIT([Squid Web Proxy],[3.HEAD-BZR],[http://www.squid-cache.org/bugs/],[squid])
+AC_INIT([Squid Web Proxy],[3.2-rock-BZR],[http://www.squid-cache.org/bugs/],[squid])
AC_PREREQ(2.61)
AC_CONFIG_HEADERS([include/autoconf.h])
AC_CONFIG_AUX_DIR(cfgaux)
fi
;;
+ Mmapped)
+ AC_MSG_NOTICE([Enabling Mmapped DiskIO module])
+ DISK_LIBS="$DISK_LIBS libMmapped.a"
+ DISK_MODULES="$DISK_MODULES Mmapped"
+ DISK_LINKOBJS="$DISK_LINKOBJS DiskIO/Mmapped/MmappedDiskIOModule.o"
+ ;;
+
Blocking)
AC_MSG_NOTICE([Enabling Blocking DiskIO module])
DISK_LIBS="$DISK_LIBS libBlocking.a"
# for STORE_TESTS substition
STORE_TESTS="$STORE_TESTS tests/testCoss$EXEEXT"
;;
+ rock)
+ if ! test "x$squid_disk_module_candidates_Mmapped" = "xyes"; then
+ AC_MSG_ERROR([Storage modeule Rock requires Mmapped DiskIO module])
+ fi
+ STORE_TESTS="$STORE_TESTS tests/testRock$EXEEXT"
+ ;;
ufs)
STORE_TESTS="$STORE_TESTS tests/testUfs$EXEEXT"
esac
AH_TEMPLATE(HAVE_FS_AUFS, "Define to 1 if aufs filesystem module is build")
AH_TEMPLATE(HAVE_FS_DISKD, "Define to 1 if diskd filesystem module is build")
AH_TEMPLATE(HAVE_FS_COSS, "Define to 1 if coss filesystem module is build")
+AH_TEMPLATE(HAVE_FS_ROCK, "Define to 1 if rock filesystem module is build")
dnl got final squid_storeio_module_candidates, build library lists
Vector &operator += (E item) {push_back(item); return *this;};
void insert (E);
+ const E &front() const;
+ E &front();
E &back();
E pop_back();
E shift(); // aka pop_front
return items[size() - 1];
}
+template<class E>
+const E &
+Vector<E>::front() const
+{
+ assert (size());
+ return items[0];
+}
+
+template<class E>
+E &
+Vector<E>::front()
+{
+ assert (size());
+ return items[0];
+}
+
template<class E>
void
Vector<E>::prune(E item)
assert (fd > -1);
assert (ioRequestor.getRaw());
readRequest = aRequest;
+ debugs(79, 3, HERE << aRequest->len << " for FD " << fd << " at " << aRequest->offset);
file_read(fd, aRequest->buf, aRequest->len, aRequest->offset, ReadDone, this);
}
void
BlockingFile::write(WriteRequest *aRequest)
{
- debugs(79, 3, "storeUfsWrite: FD " << fd);
+ debugs(79, 3, HERE << aRequest->len << " for FD " << fd << " at " << aRequest->offset);
writeRequest = aRequest;
file_write(fd,
aRequest->offset,
BlockingFile::writeDone(int rvfd, int errflag, size_t len)
{
assert (rvfd == fd);
- debugs(79, 3, "storeUfsWriteDone: FD " << fd << ", len " << len);
+ debugs(79,3, HERE << "FD " << fd << ", len " << len);
WriteRequest::Pointer result = writeRequest;
writeRequest = NULL;
--- /dev/null
+#include "squid.h"
+#include "MmappedDiskIOModule.h"
+#include "MmappedIOStrategy.h"
+
+MmappedDiskIOModule::MmappedDiskIOModule()
+{
+ ModuleAdd(*this);
+}
+
+MmappedDiskIOModule &
+MmappedDiskIOModule::GetInstance()
+{
+ return Instance;
+}
+
+void
+MmappedDiskIOModule::init()
+{}
+
+void
+MmappedDiskIOModule::shutdown()
+{}
+
+
+DiskIOStrategy*
+MmappedDiskIOModule::createStrategy()
+{
+ return new MmappedIOStrategy();
+}
+
+MmappedDiskIOModule MmappedDiskIOModule::Instance;
+
+char const *
+MmappedDiskIOModule::type () const
+{
+ return "Mmapped";
+}
--- /dev/null
+#ifndef SQUID_MMAPPEDDISKIOMODULE_H
+#define SQUID_MMAPPEDDISKIOMODULE_H
+
+#include "DiskIO/DiskIOModule.h"
+
+class MmappedDiskIOModule : public DiskIOModule
+{
+
+public:
+ static MmappedDiskIOModule &GetInstance();
+ MmappedDiskIOModule();
+ virtual void init();
+ virtual void shutdown();
+ virtual char const *type () const;
+ virtual DiskIOStrategy* createStrategy();
+
+private:
+ static MmappedDiskIOModule Instance;
+};
+
+#endif /* SQUID_MMAPPEDDISKIOMODULE_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 47 Store Directory Routines
+ */
+
+#include "DiskIO/Mmapped/MmappedFile.h"
+#include <sys/mman.h>
+#include "DiskIO/IORequestor.h"
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+
+CBDATA_CLASS_INIT(MmappedFile);
+
+// helper class to deal with mmap(2) offset alignment and other low-level specs
+class Mmapping {
+public:
+ Mmapping(int fd, size_t length, int prot, int flags, off_t offset);
+ ~Mmapping();
+
+ void *map(); ///< calls mmap(2); returns usable buffer or nil on failure
+ bool unmap(); ///< unmaps previously mapped buffer, if any
+
+private:
+ const int fd; ///< descriptor of the mmapped file
+ const size_t length; ///< user-requested data length, needed for munmap
+ const int prot; ///< mmap(2) "protection" flags
+ const int flags; ///< other mmap(2) flags
+ const off_t offset; ///< user-requested data offset
+
+ off_t delta; ///< mapped buffer increment to hit user offset
+ void *buf; ///< buffer returned by mmap, needed for munmap
+};
+
+
+void *
+MmappedFile::operator new(size_t sz)
+{
+ CBDATA_INIT_TYPE(MmappedFile);
+ MmappedFile *result = cbdataAlloc(MmappedFile);
+ /* Mark result as being owned - we want the refcounter to do the delete
+ * call */
+ return result;
+}
+
+void
+MmappedFile::operator delete(void *address)
+{
+ MmappedFile *t = static_cast<MmappedFile *>(address);
+ cbdataFree(t);
+}
+
+MmappedFile::MmappedFile(char const *aPath): fd(-1),
+ minOffset(0), maxOffset(-1), error_(false)
+{
+ assert(aPath);
+ path_ = xstrdup(aPath);
+ debugs(79,5, HERE << this << ' ' << path_);
+}
+
+MmappedFile::~MmappedFile()
+{
+ safe_free(path_);
+ doClose();
+}
+
+// XXX: almost a copy of BlockingFile::open
+void
+MmappedFile::open(int flags, mode_t mode, RefCount<IORequestor> callback)
+{
+ assert(fd < 0);
+
+ /* Simulate async calls */
+ fd = file_open(path_ , flags);
+ ioRequestor = callback;
+
+ if (fd < 0) {
+ debugs(79,3, HERE << "open error: " << xstrerror());
+ error_ = true;
+ } else {
+ store_open_disk_fd++;
+ debugs(79,3, HERE << "FD " << fd);
+
+ // setup mapping boundaries
+ struct stat sb;
+ if (fstat(fd, &sb) == 0)
+ maxOffset = sb.st_size; // we do not expect it to change
+ }
+
+ callback->ioCompletedNotification();
+}
+
+/**
+ * Alias for MmappedFile::open(...)
+ \copydoc MmappedFile::open(int flags, mode_t mode, RefCount<IORequestor> callback)
+ */
+void
+MmappedFile::create(int flags, mode_t mode, RefCount<IORequestor> callback)
+{
+ /* We use the same logic path for open */
+ open(flags, mode, callback);
+}
+
+void MmappedFile::doClose()
+{
+ if (fd >= 0) {
+ file_close(fd);
+ fd = -1;
+ store_open_disk_fd--;
+ }
+}
+
+void
+MmappedFile::close()
+{
+ debugs(79, 3, HERE << this << " closing for " << ioRequestor);
+ doClose();
+ assert(ioRequestor != NULL);
+ ioRequestor->closeCompleted();
+}
+
+bool
+MmappedFile::canRead() const
+{
+ return fd >= 0;
+}
+
+bool
+MmappedFile::canWrite() const
+{
+ return fd >= 0;
+}
+
+bool
+MmappedFile::error() const
+{
+ return error_;
+}
+
+void
+MmappedFile::read(ReadRequest *aRequest)
+{
+ debugs(79,3, HERE << "(FD " << fd << ", " << aRequest->len << ", " <<
+ aRequest->offset << ")");
+
+ assert(fd >= 0);
+ assert(ioRequestor != NULL);
+ assert(aRequest->len >= 0);
+ assert(aRequest->len > 0); // TODO: work around mmap failures on zero-len?
+ assert(aRequest->offset >= 0);
+ assert(!error_); // TODO: propagate instead?
+
+ assert(minOffset < 0 || minOffset <= aRequest->offset);
+ assert(maxOffset < 0 || aRequest->offset + aRequest->len <= (uint64_t)maxOffset);
+
+ Mmapping mapping(fd, aRequest->len, PROT_READ, MAP_PRIVATE | MAP_NORESERVE,
+ aRequest->offset);
+
+ bool done = false;
+ if (void *buf = mapping.map()) {
+ memcpy(aRequest->buf, buf, aRequest->len);
+ done = mapping.unmap();
+ }
+ error_ = !done;
+
+ const ssize_t rlen = error_ ? -1 : (ssize_t)aRequest->len;
+ const int errflag = error_ ? DISK_ERROR : DISK_OK;
+ ioRequestor->readCompleted(aRequest->buf, rlen, errflag, aRequest);
+}
+
+void
+MmappedFile::write(WriteRequest *aRequest)
+{
+ debugs(79,3, HERE << "(FD " << fd << ", " << aRequest->len << ", " <<
+ aRequest->offset << ")");
+
+ assert(fd >= 0);
+ assert(ioRequestor != NULL);
+ assert(aRequest->len >= 0);
+ assert(aRequest->len > 0); // TODO: work around mmap failures on zero-len?
+ assert(aRequest->offset >= 0);
+ assert(!error_); // TODO: propagate instead?
+
+ assert(minOffset < 0 || minOffset <= aRequest->offset);
+ assert(maxOffset < 0 || aRequest->offset + aRequest->len <= (uint64_t)maxOffset);
+
+ const ssize_t written =
+ pwrite(fd, aRequest->buf, aRequest->len, aRequest->offset);
+ if (written < 0) {
+ debugs(79,1, HERE << "error: " << xstrerr(errno));
+ error_ = true;
+ } else
+ if (static_cast<size_t>(written) != aRequest->len) {
+ debugs(79,1, HERE << "problem: " << written << " < " << aRequest->len);
+ error_ = true;
+ }
+
+ if (aRequest->free_func)
+ (aRequest->free_func)(const_cast<char*>(aRequest->buf)); // broken API?
+
+ if (!error_) {
+ debugs(79,5, HERE << "wrote " << aRequest->len << " to FD " << fd << " at " << aRequest->offset);
+ } else {
+ doClose();
+ }
+
+ const ssize_t rlen = error_ ? 0 : (ssize_t)aRequest->len;
+ const int errflag = error_ ? DISK_ERROR : DISK_OK;
+ ioRequestor->writeCompleted(errflag, rlen, aRequest);
+}
+
+/// we only support blocking I/O
+bool
+MmappedFile::ioInProgress() const
+{
+ return false;
+}
+
+Mmapping::Mmapping(int aFd, size_t aLength, int aProt, int aFlags, off_t anOffset):
+ fd(aFd), length(aLength), prot(aProt), flags(aFlags), offset(anOffset),
+ delta(-1), buf(NULL)
+{
+}
+
+Mmapping::~Mmapping()
+{
+ if (buf)
+ unmap();
+}
+
+void *
+Mmapping::map()
+{
+ // mmap(2) requires that offset is a multiple of the page size
+ static const int pageSize = getpagesize();
+ delta = offset % pageSize;
+
+ buf = mmap(NULL, length + delta, prot, flags, fd, offset - delta);
+
+ if (buf == MAP_FAILED) {
+ const int errNo = errno;
+ debugs(79,3, HERE << "error FD " << fd << "mmap(" << length << '+' <<
+ delta << ", " << offset << '-' << delta << "): " << xstrerr(errNo));
+ buf = NULL;
+ return NULL;
+ }
+
+ return static_cast<char*>(buf) + delta;
+}
+
+bool
+Mmapping::unmap()
+{
+ debugs(79,9, HERE << "FD " << fd <<
+ " munmap(" << buf << ", " << length << '+' << delta << ')');
+
+ if (!buf) // forgot or failed to map
+ return false;
+
+ const bool error = munmap(buf, length + delta) != 0;
+ if (error) {
+ const int errNo = errno;
+ debugs(79,3, HERE << "error FD " << fd <<
+ " munmap(" << buf << ", " << length << '+' << delta << "): " <<
+ "): " << xstrerr(errNo));
+ }
+ buf = NULL;
+ return !error;
+}
+
+// TODO: check MAP_NORESERVE, consider MAP_POPULATE and MAP_FIXED
--- /dev/null
+#ifndef SQUID_MMAPPEDFILE_H
+#define SQUID_MMAPPEDFILE_H
+
+#include "cbdata.h"
+#include "DiskIO/DiskFile.h"
+#include "DiskIO/IORequestor.h"
+
+class MmappedFile : public DiskFile
+{
+
+public:
+ void *operator new(size_t);
+ void operator delete(void *);
+ MmappedFile(char const *path);
+ ~MmappedFile();
+ virtual void open(int flags, mode_t mode, RefCount<IORequestor> callback);
+ virtual void create(int flags, mode_t mode, RefCount<IORequestor> callback);
+ virtual void read(ReadRequest *);
+ virtual void write(WriteRequest *);
+ virtual void close();
+ virtual bool error() const;
+ virtual int getFD() const { return fd;}
+
+ virtual bool canRead() const;
+ virtual bool canWrite() const;
+ virtual bool ioInProgress() const;
+
+private:
+ CBDATA_CLASS(MmappedFile);
+
+ char const *path_;
+ RefCount<IORequestor> ioRequestor;
+ //RefCount<ReadRequest> readRequest;
+ //RefCount<WriteRequest> writeRequest;
+ int fd;
+
+ // mmapped memory leads to SEGV and bus errors if it maps beyond file
+ int64_t minOffset; ///< enforced if not negative (to preserve file headers)
+ int64_t maxOffset; ///< enforced if not negative (to avoid crashes)
+
+ bool error_;
+
+ void doClose();
+};
+
+#endif /* SQUID_MMAPPEDFILE_H */
--- /dev/null
+
+/*
+ * $Id$
+ *
+ * DEBUG: section 47 Store Directory Routines
+ */
+
+#include "MmappedIOStrategy.h"
+#include "MmappedFile.h"
+bool
+MmappedIOStrategy::shedLoad()
+{
+ return false;
+}
+
+int
+MmappedIOStrategy::load()
+{
+ /* Return 999 (99.9%) constant load */
+ return 999;
+}
+
+DiskFile::Pointer
+MmappedIOStrategy::newFile (char const *path)
+{
+ return new MmappedFile (path);
+}
+
+void
+MmappedIOStrategy::unlinkFile(char const *path)
+{
+#if USE_UNLINKD
+ unlinkdUnlink(path);
+#else
+ ::unlink(path);
+#endif
+}
--- /dev/null
+#ifndef SQUID_MMAPPEDIOSTRATEGY_H
+#define SQUID_MMAPPEDIOSTRATEGY_H
+#include "DiskIO/DiskIOStrategy.h"
+
+class MmappedIOStrategy : public DiskIOStrategy
+{
+
+public:
+ virtual bool shedLoad();
+ virtual int load();
+ virtual RefCount<DiskFile> newFile(char const *path);
+ virtual void unlinkFile (char const *);
+};
+
+#endif /* SQUID_MMAPPEDIOSTRATEGY_H */
AIOPS_SOURCE = DiskIO/DiskThreads/aiops.cc
endif
-EXTRA_LIBRARIES = libAIO.a libBlocking.a libDiskDaemon.a libDiskThreads.a
+EXTRA_LIBRARIES = libAIO.a libBlocking.a libDiskDaemon.a libDiskThreads.a \
+ libMmapped.a
noinst_LIBRARIES = $(DISK_LIBS)
noinst_LTLIBRARIES = libsquid.la
recv-announce \
tests/testUfs \
tests/testCoss \
+ tests/testRock \
tests/testNull \
ufsdump
DiskIO/Blocking/BlockingDiskIOModule.cc \
DiskIO/Blocking/BlockingDiskIOModule.h
+libMmapped_a_SOURCES = \
+ DiskIO/Mmapped/MmappedFile.cc \
+ DiskIO/Mmapped/MmappedFile.h \
+ DiskIO/Mmapped/MmappedIOStrategy.cc \
+ DiskIO/Mmapped/MmappedIOStrategy.h \
+ DiskIO/Mmapped/MmappedDiskIOModule.cc \
+ DiskIO/Mmapped/MmappedDiskIOModule.h
+
libDiskDaemon_a_SOURCES = \
DiskIO/DiskDaemon/DiskdFile.cc \
DiskIO/DiskDaemon/DiskdFile.h \
tests_testUfs_DEPENDENCIES = \
$(SWAP_TEST_DS)
+tests_testRock_SOURCES = \
+ tests/testRock.cc \
+ tests/testMain.cc \
+ tests/testRock.h \
+ tests/stub_cache_manager.cc \
+ tests/stub_HelperChildConfig.cc \
+ tests/stub_Port.cc \
+ tests/stub_TypedMsgHdr.cc \
+ tests/stub_UdsOp.cc \
+ $(SWAP_TEST_SOURCES)
+nodist_tests_testRock_SOURCES = \
+ swap_log_op.cc \
+ $(SWAP_TEST_GEN_SOURCES) \
+ SquidMath.cc \
+ SquidMath.h
+tests_testRock_LDADD = \
+ $(COMMON_LIBS) \
+ $(REPL_OBJS) \
+ $(DISK_LIBS) \
+ $(DISK_OS_LIBS) \
+ acl/libapi.la \
+ $(top_builddir)/lib/libmisccontainers.la \
+ $(top_builddir)/lib/libmiscencoding.la \
+ $(top_builddir)/lib/libmiscutil.la \
+ $(REGEXLIB) \
+ $(SQUID_CPPUNIT_LIBS) \
+ $(SSLLIB) \
+ $(COMPAT_LIB) \
+ $(XTRA_LIBS)
+tests_testRock_LDFLAGS = $(LIBADD_DL)
+tests_testRock_DEPENDENCIES = \
+ $(SWAP_TEST_DS)
+
tests_testCoss_SOURCES = \
tests/testCoss.cc \
tests/testMain.cc \
/// \ingroup SwapStoreAPI
SQUIDCEXTERN char *storeSwapMetaPack(tlv * tlv_list, int *length);
/// \ingroup SwapStoreAPI
+SQUIDCEXTERN size_t storeSwapMetaSize(const StoreEntry * e);
+/// \ingroup SwapStoreAPI
SQUIDCEXTERN tlv *storeSwapMetaBuild(StoreEntry * e);
/// \ingroup SwapStoreAPI
SQUIDCEXTERN void storeSwapTLVFree(tlv * n);
int const StoreMetaUnpacker::MinimumBufferLength = sizeof(char) + sizeof(int);
+/// useful for meta stored in pre-initialized (with zeros) db files
+bool
+StoreMetaUnpacker::isBufferZero()
+{
+ // We could memcmp the entire buffer, but it is probably safe enough
+ // to test a few bytes because if we do not detect a corrupted entry
+ // it is not a big deal. Empty entries are not isBufferSane anyway.
+ const int depth = 10;
+ if (buflen < depth)
+ return false; // cannot be sure enough
+
+ for (int i = 0; i < depth; ++i) {
+ if (buf[i])
+ return false;
+ }
+ return true;
+}
+
bool
StoreMetaUnpacker::isBufferSane()
{
public:
StoreMetaUnpacker (const char *buf, ssize_t bufferLength, int *hdrlen);
StoreMeta *createStoreMeta();
+ bool isBufferZero(); ///< all-zeros buffer, implies !isBufferSane
bool isBufferSane();
private:
#include "StoreFileSystem.h"
#include "ConfigOption.h"
+SwapDir::SwapDir(char const *aType): theType(aType),
+ cur_size (0), max_size(0),
+ path(NULL), index(-1), min_objsize(0), max_objsize (-1),
+ repl(NULL), removals(0), scanned(0),
+ cleanLog(NULL)
+{
+ fs.blksize = 1024;
+}
+
SwapDir::~SwapDir()
{
+ // TODO: should we delete repl?
xfree(path);
}
{
public:
- SwapDir(char const *aType) : theType (aType), cur_size(0), max_size(0), min_objsize(0), max_objsize(-1), cleanLog(NULL) {
- fs.blksize = 1024;
- path = NULL;
- }
-
+ SwapDir(char const *aType);
virtual ~SwapDir();
virtual void reconfigure(int, char *) = 0;
char const *type() const;
higher hit ratio at the expense of an increase in response
time.
+ The rock store type:
+
+ cache_dir rock Directory-Name Mbytes <max-size=bytes>
+
+ The Rock Store type is a database-style storage. All cached
+ entries are stored in a "database" file, using fixed-size slots,
+ one entry per slot. The database size is specified in MB. The
+ slot size is specified in bytes using the max-size option. See
+ below for more info on the max-size option.
+
The coss store type:
NP: COSS filesystem in Squid-3 has been deemed too unstable for
' output as-is
- left aligned
- width field width. If starting with 0 the
- output is zero padded
+ width minimum and/or maximum field width: [min][.max]
+ When minimum starts with 0, the field is zero-padded.
+ String values exceeding maximum width are truncated.
{arg} argument such as header name etc
Format codes:
returning a chain of services to be used next. The services
are specified using the X-Next-Services ICAP response header
value, formatted as a comma-separated list of service names.
- Each named service should be configured in squid.conf and
- should have the same method and vectoring point as the current
- ICAP transaction. Services violating these rules are ignored.
- An empty X-Next-Services value results in an empty plan which
- ends the current adaptation.
+ Each named service should be configured in squid.conf. Other
+ services are ignored. An empty X-Next-Services value results
+ in an empty plan which ends the current adaptation.
+
+ Dynamic adaptation plan may cross or cover multiple supported
+ vectoring points in their natural processing order.
Routing is not allowed by default: the ICAP X-Next-Services
response header is ignored.
assert(fdd->write_q->len > fdd->write_q->buf_offset);
- debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " << (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes");
+ debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
+ (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
+ fdd->write_q->file_offset);
errno = 0;
if (fdd->write_q->file_offset != -1)
- lseek(fd, fdd->write_q->file_offset, SEEK_SET);
+ lseek(fd, fdd->write_q->file_offset, SEEK_SET); /* XXX ignore return? */
len = FD_WRITE_METHOD(fd,
fdd->write_q->buf + fdd->write_q->buf_offset,
include $(top_srcdir)/src/Common.am
-EXTRA_LTLIBRARIES = libaufs.la libdiskd.la libcoss.la libufs.la
+EXTRA_LTLIBRARIES = libaufs.la libdiskd.la libcoss.la libufs.la librock.la
noinst_LTLIBRARIES = $(STORE_LIBS_TO_BUILD) libfs.la
# aufs is a "fake" legacy store
ufs/ufscommon.cc \
ufs/ufscommon.h
+librock_la_SOURCES = \
+ rock/RockCommon.cc \
+ rock/RockCommon.h \
+ rock/RockDirMap.cc \
+ rock/RockDirMap.h \
+ rock/RockFile.cc \
+ rock/RockFile.h \
+ rock/RockIoState.cc \
+ rock/RockIoState.h \
+ rock/RockIoRequests.cc \
+ rock/RockIoRequests.h \
+ rock/RockRebuild.cc \
+ rock/RockRebuild.h \
+ rock/RockStoreFileSystem.cc \
+ rock/RockStoreFileSystem.h \
+ rock/RockSwapDir.cc \
+ rock/RockSwapDir.h
+
libfs_la_SOURCES = Module.cc Module.h
libfs_la_LIBADD = $(STORE_LIBS_TO_BUILD)
libfs_la_DEPENDENCIES = $(STORE_LIBS_TO_BUILD)
coss/clean: clean
ufs/all: libufs.la
ufs/clean: clean
+rock/all: librock.la
+rock/clean: clean
TESTS += testHeaders
## Special Universal .h dependency test script
## aborts if error encountered
-testHeaders: $(srcdir)/ufs/*.h $(srcdir)/coss/*.h
+testHeaders: $(srcdir)/ufs/*.h $(srcdir)/coss/*.h $(srcdir)/rock/*.h
$(SHELL) $(top_srcdir)/test-suite/testheaders.sh "$(CXXCOMPILE)" $^ || exit 1
## diskd/ has no .h files
## aufs/ has no .h files
static StoreFSufs<UFSSwapDir> *DiskdInstance;
#endif
+#ifdef HAVE_FS_ROCK
+#include "fs/rock/RockStoreFileSystem.h"
+static Rock::StoreFileSystem *RockInstance = NULL;
+#endif
+
+
/* TODO: Modify coss code to:
* (a) remove the StoreFScoss::GetInstance method,
* (b) declare the StoreFScoss::stats as static and
DiskdInstance = new StoreFSufs<UFSSwapDir>("DiskDaemon", "diskd");;
#endif
+#ifdef HAVE_FS_ROCK
+ RockInstance = new Rock::StoreFileSystem();
+#endif
+
}
delete DiskdInstance;
#endif
+#ifdef HAVE_FS_ROCK
+ delete RockInstance;
+#endif
+
}
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 79 Disk IO Routines
+ */
+
+#include "squid.h"
+#include "fs/rock/RockDirMap.h"
+
+Rock::DirMap::DirMap(int roughLimit): hintPast(-1), hintNext(0),
+ bitLimit(roundLimit(roughLimit)), bitCount(0), words(NULL), wordCount(0)
+{
+ syncWordCount();
+ allocate();
+}
+
+Rock::DirMap::DirMap(const DirMap &m):
+ hintPast(m.hintPast), hintNext(m.hintNext),
+ bitLimit(m.bitLimit), bitCount(m.bitCount),
+ words(NULL), wordCount(m.wordCount)
+{
+ syncWordCount();
+ copyFrom(m);
+}
+
+Rock::DirMap::~DirMap()
+{
+ deallocate();
+}
+
+Rock::DirMap &Rock::DirMap::operator =(const DirMap &m)
+{
+ deallocate();
+
+ hintPast = m.hintPast;
+ hintNext = m.hintNext;
+ bitLimit = m.bitLimit;
+ bitCount = m.bitCount;
+
+ wordCount = m.wordCount;
+ copyFrom(m);
+ return *this;
+}
+
+void
+Rock::DirMap::resize(const int roughLimit)
+{
+ const int newLimit = roundLimit(roughLimit);
+ // TODO: optimize?
+ if (newLimit != bitLimit) {
+ DirMap old(*this);
+ deallocate();
+ bitLimit = newLimit;
+ syncWordCount();
+ copyFrom(old);
+ }
+}
+
+int
+Rock::DirMap::entryLimit() const
+{
+ return bitLimit;
+}
+
+int
+Rock::DirMap::entryCount() const
+{
+ return bitCount;
+}
+
+bool
+Rock::DirMap::full() const
+{
+ return bitCount >= bitLimit;
+}
+
+bool
+Rock::DirMap::valid(const int pos) const
+{
+ return 0 <= pos && pos < bitLimit;
+}
+
+int
+Rock::DirMap::useNext()
+{
+ assert(!full());
+ const int next = findNext();
+ assert(valid(next)); // because we were not full
+ use(next);
+ return next;
+}
+
+/// low-level allocation, assumes wordCount is set
+void
+Rock::DirMap::allocate()
+{
+ assert(!words);
+ words = new unsigned long[wordCount];
+ memset(words, 0, ramSize());
+}
+
+/// low-level deallocation; may place the object in an inconsistent state
+void
+Rock::DirMap::deallocate()
+{
+ delete[] words;
+ words = NULL;
+}
+
+/// low-level copy; assumes all counts have been setup
+void
+Rock::DirMap::copyFrom(const DirMap &m)
+{
+ allocate();
+ if (m.wordCount)
+ memcpy(words, m.words, min(ramSize(), m.ramSize()));
+}
+
+/// low-level ram size calculation for mem*() calls
+int
+Rock::DirMap::ramSize() const
+{
+ return sizeof(*words) * wordCount;
+}
+
+/* XXX: Number of bits in a long and other constants from filemap.cc */
+#if SIZEOF_LONG == 8
+#define LONG_BIT_SHIFT 6
+#define BITS_IN_A_LONG 0x40
+#define LONG_BIT_MASK 0x3F
+#define ALL_ONES (unsigned long) 0xFFFFFFFFFFFFFFFF
+#elif SIZEOF_LONG == 4
+#define LONG_BIT_SHIFT 5
+#define BITS_IN_A_LONG 0x20
+#define LONG_BIT_MASK 0x1F
+#define ALL_ONES (unsigned long) 0xFFFFFFFF
+#else
+#define LONG_BIT_SHIFT 5
+#define BITS_IN_A_LONG 0x20
+#define LONG_BIT_MASK 0x1F
+#define ALL_ONES (unsigned long) 0xFFFFFFFF
+#endif
+
+#define FM_INITIAL_NUMBER (1<<14)
+
+int
+Rock::DirMap::AbsoluteEntryLimit()
+{
+ const int sfilenoMax = 0xFFFFFF; // Core sfileno maximum
+ return ((sfilenoMax+1) >> LONG_BIT_SHIFT) << LONG_BIT_SHIFT;
+}
+
+/// Adjust limit so that there are no "extra" bits in the last word
+// that are above the limit but still found by findNext.
+int
+Rock::DirMap::roundLimit(const int roughLimit) const
+{
+ const int allowedLimit = min(roughLimit, AbsoluteEntryLimit());
+ const int newLimit = (allowedLimit >> LONG_BIT_SHIFT) << LONG_BIT_SHIFT;
+ debugs(8, 3, HERE << "adjusted map limit from " << roughLimit << " to " <<
+ newLimit);
+ return newLimit;
+}
+
+/// calculate wordCount for the number of entries (bitLimit)
+void
+Rock::DirMap::syncWordCount()
+{
+ wordCount = bitLimit >> LONG_BIT_SHIFT;
+ debugs(8, 3, HERE << wordCount << ' ' << BITS_IN_A_LONG <<
+ "-bit long words for " << bitLimit << " bits");
+}
+
+void
+Rock::DirMap::use(const int pos)
+{
+ if (!has(pos)) {
+ assert(valid(pos));
+
+ const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+ words[pos >> LONG_BIT_SHIFT] |= bitmask;
+
+ ++bitCount;
+ debugs(8, 6, HERE << pos);
+ } else {
+ debugs(8, 3, HERE << pos << " in vain");
+ }
+}
+
+void
+Rock::DirMap::clear(const int pos)
+{
+ if (has(pos)) {
+ const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+ words[pos >> LONG_BIT_SHIFT] &= ~bitmask;
+ --bitCount;
+ debugs(8, 6, HERE << pos);
+ } else {
+ debugs(8, 3, HERE << pos << " in vain");
+ assert(valid(pos));
+ }
+ if (hintPast < 0)
+ hintPast = pos; // remember cleared slot
+}
+
+bool
+Rock::DirMap::has(const int pos) const
+{
+ if (!valid(pos)) // the only place where we are forgiving
+ return false;
+
+ const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+ return words[pos >> LONG_BIT_SHIFT] & bitmask;
+}
+
+/// low-level empty-slot search routine, uses and updates hints
+int
+Rock::DirMap::findNext() const
+{
+ // try the clear-based hint, if any
+ if (hintPast >= 0) {
+ const int result = hintPast;
+ hintPast = -1; // assume used; or we could update it in set()
+ if (valid(result) && !has(result))
+ return result;
+ }
+
+ // adjust and try the scan-based hint
+ if (!valid(hintNext))
+ hintNext = 0;
+ if (valid(hintNext) && !has(hintNext))
+ return hintNext++;
+
+ // start scan with the scan-based hint
+ int wordPos = hintNext >> LONG_BIT_SHIFT;
+
+ for (int i = 0; i < wordCount; ++i) {
+ if (words[wordPos] != ALL_ONES)
+ break;
+
+ wordPos = (wordPos + 1) % wordCount;
+ }
+
+ for (int bitPos = 0; bitPos < BITS_IN_A_LONG; ++bitPos) {
+ hintNext = ((unsigned long) wordPos << LONG_BIT_SHIFT) | bitPos;
+
+ if (hintNext < bitLimit && !has(hintNext))
+ return hintNext++;
+ }
+
+ // the map is full
+ return -1;
+}
--- /dev/null
+#ifndef SQUID_FS_ROCK_DIR_MAP_H
+#define SQUID_FS_ROCK_DIR_MAP_H
+
+namespace Rock {
+
+/// \ingroup Rock
+/// bitmap of used db slots indexed by sfileno
+class DirMap
+{
+public:
+ // the map may adjust the limit down a little; see roundLimit()
+ DirMap(const int roughLimit = 0);
+ DirMap(const DirMap &map);
+ ~DirMap();
+
+ DirMap &operator =(const DirMap &map);
+ void resize(const int newLimit); ///< forgets higher bits or appends zeros
+
+ bool full() const; ///< there are no empty slots left
+ bool has(int n) const; ///< whether slot n is occupied
+ bool valid(int n) const; ///< whether n is a valid slot coordinate
+ int entryCount() const; ///< number of bits turned on
+ int entryLimit() const; ///< maximum number of bits that can be turned on
+
+ void use(int n); ///< mark slot n as used
+ void clear(int n); ///< mark slot n as unused
+ int useNext(); ///< finds and uses an empty slot, returning its coordinate
+
+ static int AbsoluteEntryLimit(); ///< maximum entryLimit() possible
+
+private:
+ /// unreliable next empty slot suggestion #1 (clear based)
+ mutable int hintPast;
+ ///< unreliable next empty slot suggestion #2 (scan based)
+ mutable int hintNext;
+
+ int bitLimit; ///< maximum number of map entries
+ int bitCount; ///< current number of map entries
+
+ unsigned long *words; ///< low level storage
+ int wordCount; ///< number of words allocated
+
+ int roundLimit(const int roughLimit) const;
+ void syncWordCount();
+ int ramSize() const;
+ void allocate();
+ void deallocate();
+ void copyFrom(const DirMap &map);
+ int findNext() const;
+};
+
+} // namespace Rock
+
+// We do not reuse struct _fileMap because we cannot control its size,
+// resulting in sfilenos that are pointing beyond the database.
+
+// TODO: Consider using std::bitset. Is it really slower for findNext()?
+
+
+#endif /* SQUID_FS_ROCK_DIR_MAP_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 79 Disk IO Routines
+ */
+
+#include "fs/rock/RockIoRequests.h"
+
+CBDATA_NAMESPACED_CLASS_INIT(Rock, ReadRequest);
+CBDATA_NAMESPACED_CLASS_INIT(Rock, WriteRequest);
+
+Rock::ReadRequest::ReadRequest(const ::ReadRequest &base,
+ const IoState::Pointer &anSio):
+ ::ReadRequest(base),
+ sio(anSio)
+{
+}
+
+Rock::WriteRequest::WriteRequest(const ::WriteRequest &base,
+ const IoState::Pointer &anSio):
+ ::WriteRequest(base),
+ sio(anSio)
+{
+}
--- /dev/null
+#ifndef SQUID_FS_ROCK_IO_REQUESTS_H
+#define SQUID_FS_ROCK_IO_REQUESTS_H
+
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockIoState.h"
+
+class DiskFile;
+
+namespace Rock {
+
+/// \ingroup Rock
+class ReadRequest: public ::ReadRequest
+{
+public:
+ ReadRequest(const ::ReadRequest &base, const IoState::Pointer &anSio);
+ IoState::Pointer sio;
+
+private:
+ CBDATA_CLASS2(ReadRequest);
+};
+
+
+/// \ingroup Rock
+class WriteRequest: public ::WriteRequest
+{
+public:
+ WriteRequest(const ::WriteRequest &base, const IoState::Pointer &anSio);
+ IoState::Pointer sio;
+
+private:
+ CBDATA_CLASS2(WriteRequest);
+};
+
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_IO_REQUESTS_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 79 Disk IO Routines
+ */
+
+#include "Parsing.h"
+#include "DiskIO/DiskIOModule.h"
+#include "DiskIO/DiskIOStrategy.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockIoState.h"
+#include "fs/rock/RockIoRequests.h"
+#include "fs/rock/RockSwapDir.h"
+
+Rock::IoState::IoState(SwapDir *dir,
+ StoreEntry *anEntry,
+ StoreIOState::STFNCB *cbFile,
+ StoreIOState::STIOCB *cbIo,
+ void *data):
+ slotSize(0),
+ entrySize(0)
+{
+ e = anEntry;
+ swap_filen = e->swap_filen;
+ swap_dirn = dir->index;
+ slotSize = dir->max_objsize;
+ file_callback = cbFile;
+ callback = cbIo;
+ callback_data = cbdataReference(data);
+ ++store_open_disk_fd; // TODO: use a dedicated counter?
+ //theFile is set by SwapDir because it depends on DiskIOStrategy
+}
+
+Rock::IoState::~IoState()
+{
+ --store_open_disk_fd;
+ if (callback_data)
+ cbdataReferenceDone(callback_data);
+ theFile = NULL;
+}
+
+void
+Rock::IoState::file(const RefCount<DiskFile> &aFile)
+{
+ assert(!theFile);
+ assert(aFile != NULL);
+ theFile = aFile;
+}
+
+void
+Rock::IoState::read_(char *buf, size_t len, off_t off, STRCB *cb, void *data)
+{
+ assert(theFile != NULL);
+ assert(theFile->canRead());
+
+ // Core specifies buffer length, but we must not exceed stored entry size
+ assert(off >= 0);
+ assert(entrySize >= 0);
+ const int64_t offset = static_cast<int64_t>(off);
+ assert(offset <= entrySize);
+ if (offset + (int64_t)len > entrySize)
+ len = entrySize - offset;
+
+ assert(read.callback == NULL);
+ assert(read.callback_data == NULL);
+ read.callback = cb;
+ read.callback_data = cbdataReference(data);
+
+ theFile->read(new ReadRequest(::ReadRequest(buf, offset_ + offset, len), this));
+}
+
+// We only buffer data here; we actually write when close() is called.
+// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions
+// of the slot when the write does not end at the page or sector boundary.
+void
+Rock::IoState::write(char const *buf, size_t size, off_t offset, FREE *dtor)
+{
+ // TODO: move to create?
+ if (!offset) {
+ assert(theBuf.isNull());
+ assert(entrySize >= 0);
+ theBuf.init(min(entrySize, slotSize), slotSize);
+ } else {
+ // Core uses -1 offset as "append". Sigh.
+ assert(offset == -1);
+ assert(!theBuf.isNull());
+ }
+
+ theBuf.append(buf, size);
+
+ if (dtor)
+ (dtor)(const_cast<char*>(buf)); // cast due to a broken API?
+}
+
+// write what was buffered during write() calls
+void
+Rock::IoState::startWriting()
+{
+ assert(theFile != NULL);
+ assert(theFile->canWrite());
+ assert(!theBuf.isNull());
+
+ static const int ps = getpagesize();
+ const size_t tail = (ps - (theBuf.contentSize() % ps)) % ps;
+ const int64_t writeSize = theBuf.contentSize() + tail;
+ debugs(79, 5, HERE << swap_filen << " at " << offset_ << '+' <<
+ theBuf.contentSize() << '+' << tail);
+
+ assert(theBuf.contentSize() <= slotSize);
+ assert(writeSize <= slotSize);
+ // theFile->write may call writeCompleted immediatelly
+ theFile->write(new WriteRequest(::WriteRequest(theBuf.content(), offset_,
+ writeSize, theBuf.freeFunc()), this));
+}
+
+//
+void
+Rock::IoState::finishedWriting(const int errFlag)
+{
+ callBack(errFlag);
+}
+
+void
+Rock::IoState::close()
+{
+ debugs(79, 3, HERE << swap_filen << " at " << offset_);
+ if (!theBuf.isNull())
+ startWriting();
+ else
+ callBack(0);
+}
+
+/// close callback (STIOCB) dialer: breaks dependencies and
+/// counts IOState concurrency level
+class StoreIOStateCb: public CallDialer
+{
+public:
+ StoreIOStateCb(StoreIOState::STIOCB *cb, void *data, int err, const Rock::IoState::Pointer &anSio):
+ callback(NULL),
+ callback_data(NULL),
+ errflag(err),
+ sio(anSio) {
+
+ callback = cb;
+ callback_data = cbdataReference(data);
+ }
+
+ StoreIOStateCb(const StoreIOStateCb &cb):
+ callback(NULL),
+ callback_data(NULL),
+ errflag(cb.errflag),
+ sio(cb.sio) {
+
+ callback = cb.callback;
+ callback_data = cbdataReference(cb.callback_data);
+ }
+
+ virtual ~StoreIOStateCb() {
+ cbdataReferenceDone(callback_data); // may be nil already
+ }
+
+ void dial(AsyncCall &call) {
+ void *cbd;
+ if (cbdataReferenceValidDone(callback_data, &cbd) && callback)
+ callback(cbd, errflag, sio.getRaw());
+ }
+
+ bool canDial(AsyncCall &call) const {
+ return cbdataReferenceValid(callback_data) && callback;
+ }
+
+ virtual void print(std::ostream &os) const {
+ os << '(' << callback_data << ", err=" << errflag << ')';
+ }
+
+private:
+ StoreIOStateCb &operator =(const StoreIOStateCb &cb); // not defined
+
+ StoreIOState::STIOCB *callback;
+ void *callback_data;
+ int errflag;
+ Rock::IoState::Pointer sio;
+};
+
+
+void
+Rock::IoState::callBack(int errflag)
+{
+ debugs(79,3, HERE << "errflag=" << errflag);
+ theFile = NULL;
+
+ AsyncCall::Pointer call = asyncCall(79,3, "SomeIoStateCloseCb",
+ StoreIOStateCb(callback, callback_data, errflag, this));
+ ScheduleCallHere(call);
+
+ callback = NULL;
+ cbdataReferenceDone(callback_data);
+}
+
--- /dev/null
+#ifndef SQUID_FS_ROCK_IO_STATE_H
+#define SQUID_FS_ROCK_IO_STATE_H
+
+#include "MemBuf.h"
+#include "SwapDir.h"
+
+class DiskFile;
+
+namespace Rock {
+
+class SwapDir;
+
+/// \ingroup Rock
+class IoState: public ::StoreIOState
+{
+public:
+ typedef RefCount<IoState> Pointer;
+
+ IoState(SwapDir *dir, StoreEntry *e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data);
+ virtual ~IoState();
+
+ void file(const RefCount<DiskFile> &aFile);
+
+ // ::StoreIOState API
+ virtual void read_(char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data);
+ virtual void write(char const *buf, size_t size, off_t offset, FREE * free_func);
+ virtual void close();
+
+ /// called by SwapDir when writing is done
+ void finishedWriting(int errFlag);
+
+ int64_t slotSize; ///< db cell size
+ int64_t entrySize; ///< planned or actual stored size for the entry
+
+ MEMPROXY_CLASS(IoState);
+
+private:
+ void startWriting();
+ void callBack(int errflag);
+
+ RefCount<DiskFile> theFile; // "file" responsible for this I/O
+ MemBuf theBuf; // use for write content accumulation only
+};
+
+MEMPROXY_CLASS_INLINE(IoState);
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_IO_STATE_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 79 Disk IO Routines
+ */
+
+#include "fs/rock/RockRebuild.h"
+#include "fs/rock/RockSwapDir.h"
+
+CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
+
+Rock::Rebuild::Rebuild(SwapDir *dir):
+ sd(dir),
+ dbSize(0),
+ dbEntrySize(0),
+ dbEntryLimit(0),
+ fd(-1),
+ dbOffset(0),
+ fileno(0)
+{
+ assert(sd);
+ memset(&counts, 0, sizeof(counts));
+ dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
+ dbEntrySize = sd->max_objsize;
+ dbEntryLimit = sd->entryLimit();
+}
+
+Rock::Rebuild::~Rebuild()
+{
+ if (fd >= 0)
+ file_close(fd);
+}
+
+/// prepares and initiates entry loading sequence
+void
+Rock::Rebuild::start() {
+ debugs(47,2, HERE << sd->index);
+
+ fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ failure("cannot open db", errno);
+
+ char buf[SwapDir::HeaderSize];
+ if (read(fd, buf, sizeof(buf)) != SwapDir::HeaderSize)
+ failure("cannot read db header", errno);
+
+ dbOffset = SwapDir::HeaderSize;
+ fileno = 0;
+
+ checkpoint();
+}
+
+/// quits if done; otherwise continues after a pause
+void
+Rock::Rebuild::checkpoint()
+{
+ if (dbOffset < dbSize)
+ eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
+ else
+ complete();
+}
+
+void
+Rock::Rebuild::Steps(void *data)
+{
+ static_cast<Rebuild*>(data)->steps();
+}
+
+void
+Rock::Rebuild::steps() {
+ debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+ dbOffset << " <= " << dbSize);
+
+ const int maxCount = dbEntryLimit;
+ const int wantedCount = opt_foreground_rebuild ? maxCount : 50;
+ const int stepCount = min(wantedCount, maxCount);
+ for (int i = 0; i < stepCount && dbOffset < dbSize; ++i, ++fileno) {
+ doOneEntry();
+ dbOffset += dbEntrySize;
+
+ if (counts.scancount % 1000 == 0)
+ storeRebuildProgress(sd->index, maxCount, counts.scancount);
+ }
+
+ checkpoint();
+}
+
+void
+Rock::Rebuild::doOneEntry() {
+ debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+ dbOffset << " <= " << dbSize);
+
+ if (lseek(fd, dbOffset, SEEK_SET) < 0)
+ failure("cannot seek to db entry", errno);
+
+ cache_key key[SQUID_MD5_DIGEST_LENGTH];
+ StoreEntry loadedE;
+ if (!storeRebuildLoadEntry(fd, loadedE, key, counts, 0)) {
+ // skip empty slots
+ if (loadedE.swap_filen > 0 || loadedE.swap_file_sz > 0) {
+ counts.invalid++;
+ sd->unlink(fileno);
+ }
+ return;
+ }
+
+ assert(loadedE.swap_filen < dbEntryLimit);
+ if (!storeRebuildKeepEntry(loadedE, key, counts))
+ return;
+
+ counts.objcount++;
+ // loadedE->dump(5);
+
+ //StoreEntry *e =
+ sd->addEntry(fileno, loadedE);
+ //storeDirSwapLog(e, SWAP_LOG_ADD);
+}
+
+void
+Rock::Rebuild::complete() {
+ debugs(47,3, HERE << sd->index);
+ close(fd);
+ StoreController::store_dirs_rebuilding--;
+ storeRebuildComplete(&counts);
+ delete this; // same as cbdataFree
+}
+
+void
+Rock::Rebuild::failure(const char *msg, int errNo) {
+ debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+ dbOffset << " <= " << dbSize);
+
+ if (errNo)
+ debugs(47,0, "Rock cache_dir rebuild failure: " << xstrerr(errNo));
+ debugs(47,0, "Do you need to run 'squid -z' to initialize storage?");
+
+ assert(sd);
+ fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
+ sd->index, sd->filePath, msg);
+}
--- /dev/null
+#ifndef SQUID_FS_ROCK_REBUILD_H
+#define SQUID_FS_ROCK_REBUILD_H
+
+#include "config.h"
+#include "structs.h"
+
+namespace Rock {
+
+class SwapDir;
+
+/// \ingroup Rock
+/// manages store rebuild process: loading meta information from db on disk
+class Rebuild {
+public:
+ Rebuild(SwapDir *dir);
+ ~Rebuild();
+ void start();
+
+private:
+ CBDATA_CLASS2(Rebuild);
+
+ void checkpoint();
+ void steps();
+ void doOneEntry();
+ void complete();
+ void failure(const char *msg, int errNo = 0);
+
+ SwapDir *sd;
+
+ int64_t dbSize;
+ int dbEntrySize;
+ int dbEntryLimit;
+
+ int fd; // store db file descriptor
+ int64_t dbOffset;
+ int fileno;
+
+ struct _store_rebuild_data counts;
+
+ static void Steps(void *data);
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_REBUILD_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 92 Storage File System
+ */
+
+#include "fs/rock/RockStoreFileSystem.h"
+#include "fs/rock/RockSwapDir.h"
+
+
+Rock::StoreFileSystem::StoreFileSystem()
+{
+ FsAdd(*this);
+}
+
+Rock::StoreFileSystem::~StoreFileSystem()
+{
+}
+
+char const *
+Rock::StoreFileSystem::type() const
+{
+ return "rock";
+}
+
+SwapDir *
+Rock::StoreFileSystem::createSwapDir()
+{
+ return new SwapDir();
+}
+
+void
+Rock::StoreFileSystem::done()
+{
+}
+
+void
+Rock::StoreFileSystem::registerWithCacheManager()
+{
+ assert(false); // XXX: implement
+}
+
+void
+Rock::StoreFileSystem::setup()
+{
+ debugs(92,2, HERE << "Will use Rock FS");
+}
+
+void
+Rock::StoreFileSystem::Stats(StoreEntry *sentry)
+{
+ assert(false); // XXX: implement
+}
--- /dev/null
+#ifndef SQUID_FS_ROCK_FS_H
+#define SQUID_FS_ROCK_FS_H
+
+#include "StoreFileSystem.h"
+
+namespace Rock {
+
+/// \ingroup Rock, FileSystems
+class StoreFileSystem: public ::StoreFileSystem
+{
+
+public:
+ static void Stats(StoreEntry * sentry);
+
+ StoreFileSystem();
+ virtual ~StoreFileSystem();
+
+ virtual char const *type() const;
+ virtual SwapDir *createSwapDir();
+ virtual void done();
+ virtual void registerWithCacheManager();
+ virtual void setup();
+
+private:
+ //static Stats Stats_;
+
+ StoreFileSystem(const StoreFileSystem &); // not implemented
+ StoreFileSystem &operator=(const StoreFileSystem &); // not implemented
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_FS_H */
--- /dev/null
+/*
+ * $Id$
+ *
+ * DEBUG: section 47 Store Directory Routines
+ */
+
+#include "config.h"
+#include "Parsing.h"
+#include <iomanip>
+#include "MemObject.h"
+#include "DiskIO/DiskIOModule.h"
+#include "DiskIO/DiskIOStrategy.h"
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockSwapDir.h"
+#include "fs/rock/RockIoState.h"
+#include "fs/rock/RockIoRequests.h"
+#include "fs/rock/RockRebuild.h"
+
+// must be divisible by 1024 due to cur_size and max_size KB madness
+const int64_t Rock::SwapDir::HeaderSize = 16*1024;
+
+Rock::SwapDir::SwapDir(): ::SwapDir("rock"), filePath(NULL), io(NULL)
+{
+}
+
+Rock::SwapDir::~SwapDir()
+{
+ delete io;
+ safe_free(filePath);
+}
+
+StoreSearch *
+Rock::SwapDir::search(String const url, HttpRequest *)
+{
+ assert(false); return NULL; // XXX: implement
+}
+
+// TODO: encapsulate as a tool; identical to CossSwapDir::create()
+void
+Rock::SwapDir::create()
+{
+ assert(path);
+ assert(filePath);
+
+ debugs (47,3, HERE << "creating in " << path);
+
+ struct stat swap_sb;
+ if (::stat(path, &swap_sb) < 0) {
+ debugs (47, 1, "Creating Rock db directory: " << path);
+#ifdef _SQUID_MSWIN_
+ const int res = mkdir(path);
+#else
+ const int res = mkdir(path, 0700);
+#endif
+ if (res != 0) {
+ debugs(47,0, "Failed to create Rock db dir " << path <<
+ ": " << xstrerror());
+ fatal("Rock Store db creation error");
+ }
+ }
+
+#if SLOWLY_FILL_WITH_ZEROS
+ /* TODO just set the file size */
+ char block[1024]; // max_size is in KB so this is one unit of max_size
+ memset(block, '\0', sizeof(block));
+
+ const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+ for (off_t offset = 0; offset < max_size; ++offset) {
+ if (write(swap, block, sizeof(block)) != sizeof(block)) {
+ debugs(47,0, "Failed to create Rock Store db in " << filePath <<
+ ": " << xstrerror());
+ fatal("Rock Store db creation error");
+ }
+ }
+ close(swap);
+#else
+ const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+ if (swap < 0) {
+ debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+ "; create error: " << xstrerror());
+ fatal("Rock Store db creation error");
+ }
+
+ if (ftruncate(swap, maximumSize()) != 0) {
+ debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+ "; truncate error: " << xstrerror());
+ fatal("Rock Store db creation error");
+ }
+
+ char header[HeaderSize];
+ memset(header, '\0', sizeof(header));
+ if (write(swap, header, sizeof(header)) != sizeof(header)) {
+ debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+ "; write error: " << xstrerror());
+ fatal("Rock Store db initialization error");
+ }
+ close(swap);
+#endif
+
+}
+
+void
+Rock::SwapDir::init()
+{
+ debugs(47,2, HERE);
+
+ // XXX: SwapDirs aren't refcounted. We make IORequestor calls, which
+ // are refcounted. We up our count once to avoid implicit delete's.
+ RefCountReference();
+
+ DiskIOModule *m = DiskIOModule::Find("Mmapped"); // TODO: configurable?
+ assert(m);
+ io = m->createStrategy();
+ io->init();
+
+ theFile = io->newFile(filePath);
+ theFile->open(O_RDWR, 0644, this);
+
+ rebuild();
+}
+
+void
+Rock::SwapDir::parse(int anIndex, char *aPath)
+{
+ index = anIndex;
+
+ path = xstrdup(aPath);
+
+ // cache store is located at path/db
+ String fname(path);
+ fname.append("/rock");
+ filePath = xstrdup(fname.termedBuf());
+
+ parseSize();
+ parseOptions(0);
+
+ repl = createRemovalPolicy(Config.replPolicy);
+
+ validateOptions();
+}
+
+void
+Rock::SwapDir::reconfigure(int, char *)
+{
+ parseSize();
+ parseOptions(1);
+ // TODO: can we reconfigure the replacement policy (repl)?
+ validateOptions();
+}
+
+/// parse maximum db disk size
+void
+Rock::SwapDir::parseSize()
+{
+ max_size = GetInteger() << 10; // MBytes to KBytes
+ if (max_size < 0)
+ fatal("negative Rock cache_dir size value");
+}
+
+/// check the results of the configuration; only level-0 debugging works here
+void
+Rock::SwapDir::validateOptions()
+{
+ if (max_objsize <= 0)
+ fatal("Rock store requires a positive max-size");
+
+ const int64_t eLimitHi = 0xFFFFFF; // Core sfileno maximum
+ const int64_t eLimitLo = map.entryLimit(); // dynamic shrinking unsupported
+ const int64_t eWanted = (maximumSize() - HeaderSize)/max_objsize;
+ const int64_t eAllowed = min(max(eLimitLo, eWanted), eLimitHi);
+
+ map.resize(eAllowed); // the map may decide to use an even lower limit
+
+ // Note: We could try to shrink max_size now. It is stored in KB so we
+ // may not be able to make it match the end of the last entry exactly.
+ const int64_t mapRoundWasteMx = max_objsize*sizeof(long)*8;
+ const int64_t sizeRoundWasteMx = 1024; // max_size stored in KB
+ const int64_t roundingWasteMx = max(mapRoundWasteMx, sizeRoundWasteMx);
+ const int64_t totalWaste = maximumSize() - diskOffsetLimit();
+ assert(diskOffsetLimit() <= maximumSize());
+
+ // warn if maximum db size is not reachable due to sfileno limit
+ if (map.entryLimit() == map.AbsoluteEntryLimit() &&
+ totalWaste > roundingWasteMx) {
+ debugs(47, 0, "Rock store cache_dir[" << index << "]:");
+ debugs(47, 0, "\tmaximum number of entries: " << map.entryLimit());
+ debugs(47, 0, "\tmaximum entry size: " << max_objsize << " bytes");
+ debugs(47, 0, "\tmaximum db size: " << maximumSize() << " bytes");
+ debugs(47, 0, "\tusable db size: " << diskOffsetLimit() << " bytes");
+ debugs(47, 0, "\tdisk space waste: " << totalWaste << " bytes");
+ debugs(47, 0, "WARNING: Rock store config wastes space.");
+ }
+
+ if (!repl) {
+ debugs(47,0, "ERROR: Rock cache_dir[" << index << "] " <<
+ "lacks replacement policy and will overflow.");
+ // not fatal because it can be added later
+ }
+
+ cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+}
+
+void
+Rock::SwapDir::rebuild() {
+ ++StoreController::store_dirs_rebuilding;
+ Rebuild *r = new Rebuild(this);
+ r->start(); // will delete self when done
+}
+
+/* Add a new object to the cache with empty memory copy and pointer to disk
+ * use to rebuild store from disk. XXX: dupes UFSSwapDir::addDiskRestore */
+StoreEntry *
+Rock::SwapDir::addEntry(int fileno, const StoreEntry &from)
+{
+ /* if you call this you'd better be sure file_number is not
+ * already in use! */
+ StoreEntry *e = new StoreEntry(); // TODO: optimize by reusing "from"?
+ debugs(47, 5, HERE << e << ' ' << storeKeyText((const cache_key*)from.key)
+ << ", fileno="<< std::setfill('0') << std::hex << std::uppercase <<
+ std::setw(8) << fileno);
+ e->store_status = STORE_OK;
+ e->setMemStatus(NOT_IN_MEMORY);
+ e->swap_status = SWAPOUT_DONE;
+ e->swap_filen = fileno;
+ e->swap_dirn = index;
+ e->swap_file_sz = from.swap_file_sz;
+ e->lock_count = 0;
+ e->lastref = from.lastref;
+ e->timestamp = from.timestamp;
+ e->expires = from.expires;
+ e->lastmod = from.lastmod;
+ e->refcount = from.refcount;
+ e->flags = from.flags;
+ EBIT_SET(e->flags, ENTRY_CACHABLE);
+ EBIT_CLR(e->flags, RELEASE_REQUEST);
+ EBIT_CLR(e->flags, KEY_PRIVATE);
+ e->ping_status = PING_NONE;
+ EBIT_CLR(e->flags, ENTRY_VALIDATED);
+ map.use(e->swap_filen);
+ e->hashInsert((const cache_key*)from.key); /* do it after we clear KEY_PRIVATE */
+ trackReferences(*e);
+ return e;
+}
+
+
+int
+Rock::SwapDir::canStore(const StoreEntry &e) const
+{
+ debugs(47,8, HERE << e.swap_file_sz << " ? " << max_objsize);
+
+ if (EBIT_TEST(e.flags, ENTRY_SPECIAL))
+ return -1;
+
+ if (io->shedLoad())
+ return -1;
+
+ return io->load();
+}
+
+StoreIOState::Pointer
+Rock::SwapDir::createStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
+{
+ if (!theFile || theFile->error()) {
+ debugs(47,4, HERE << theFile);
+ return NULL;
+ }
+
+ if (full()) {
+ maintain();
+ if (full()) // maintain() above warns when it fails
+ return NULL;
+ }
+
+ IoState *sio = new IoState(this, &e, cbFile, cbIo, data);
+
+ sio->swap_dirn = index;
+ sio->swap_filen = map.useNext();
+ sio->offset_ = diskOffset(sio->swap_filen);
+ sio->entrySize = e.objectLen() + e.mem_obj->swap_hdr_sz;
+
+ debugs(47,5, HERE << "dir " << index << " created new fileno " <<
+ std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+ sio->swap_filen << std::dec << " at " << sio->offset_ << " size: " <<
+ sio->entrySize << " (" << e.objectLen() << '+' <<
+ e.mem_obj->swap_hdr_sz << ")");
+
+ assert(sio->offset_ + sio->entrySize <= diskOffsetLimit());
+
+ sio->file(theFile);
+
+ trackReferences(e);
+ return sio;
+}
+
+int64_t
+Rock::SwapDir::diskOffset(int filen) const
+{
+ return HeaderSize + max_objsize*filen;
+}
+
+int64_t
+Rock::SwapDir::diskOffsetLimit() const
+{
+ return diskOffset(map.entryLimit());
+}
+
+StoreIOState::Pointer
+Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
+{
+ if (!theFile || theFile->error()) {
+ debugs(47,4, HERE << theFile);
+ return NULL;
+ }
+
+ IoState *sio = new IoState(this, &e, cbFile, cbIo, data);
+
+ sio->swap_dirn = index;
+ sio->swap_filen = e.swap_filen;
+ debugs(47,5, HERE << "dir " << index << " has old fileno: " <<
+ std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+ sio->swap_filen);
+
+ assert(map.valid(sio->swap_filen));
+ sio->offset_ = diskOffset(sio->swap_filen);
+ sio->entrySize = e.swap_file_sz;
+ assert(sio->entrySize <= max_objsize);
+
+ if (!map.has(sio->swap_filen)) {
+ debugs(47,1, HERE << "bug: dir " << index << " lost fileno: " <<
+ std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+ sio->swap_filen);
+ return NULL;
+ }
+
+ assert(sio->offset_ + sio->entrySize <= diskOffsetLimit());
+
+ sio->file(theFile);
+ return sio;
+}
+
+void
+Rock::SwapDir::ioCompletedNotification()
+{
+ if (!theFile) {
+ debugs(47, 1, HERE << filePath << ": initialization failure or " <<
+ "premature close of rock db file");
+ fatalf("Rock cache_dir failed to initialize db file: %s", filePath);
+ }
+
+ if (theFile->error()) {
+ debugs(47, 1, HERE << filePath << ": " << xstrerror());
+ fatalf("Rock cache_dir failed to open db file: %s", filePath);
+ }
+
+ cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+
+ // TODO: lower debugging level
+ debugs(47,1, "Rock cache_dir[" << index << "] limits: " <<
+ std::setw(12) << maximumSize() << " disk bytes and " <<
+ std::setw(7) << map.entryLimit() << " entries");
+}
+
+void
+Rock::SwapDir::closeCompleted()
+{
+ theFile = NULL;
+}
+
+void
+Rock::SwapDir::readCompleted(const char *buf, int rlen, int errflag, RefCount< ::ReadRequest> r)
+{
+ ReadRequest *request = dynamic_cast<Rock::ReadRequest*>(r.getRaw());
+ assert(request);
+ IoState::Pointer sio = request->sio;
+
+ // do not increment sio->offset_: callers always supply relative offset
+
+ StoreIOState::STRCB *callback = sio->read.callback;
+ assert(callback);
+ sio->read.callback = NULL;
+ void *cbdata;
+ if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata))
+ callback(cbdata, r->buf, rlen, sio.getRaw());
+}
+
+void
+Rock::SwapDir::writeCompleted(int errflag, size_t rlen, RefCount< ::WriteRequest> r)
+{
+ Rock::WriteRequest *request = dynamic_cast<Rock::WriteRequest*>(r.getRaw());
+ assert(request);
+ assert(request->sio != NULL);
+ IoState &sio = *request->sio;
+ if (errflag != DISK_OK)
+ map.clear(sio.swap_filen); // TODO: test by forcing failure
+ // else sio.offset_ += rlen;
+
+ // TODO: always compute cur_size based on map, do not store it
+ cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+ assert(sio.offset_ <= diskOffsetLimit()); // post-factum check
+
+ sio.finishedWriting(errflag);
+}
+
+bool
+Rock::SwapDir::full() const
+{
+ return map.full();
+}
+
+void
+Rock::SwapDir::updateSize(int64_t size, int sign)
+{
+ // it is not clear what store_swap_size really is; TODO: move low-level
+ // size maintenance to individual store dir types
+ cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+ store_swap_size = cur_size;
+
+ if (sign > 0)
+ ++n_disk_objects;
+ else if (sign < 0)
+ --n_disk_objects;
+}
+
+// storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT,
+// but it should not happen for us
+void
+Rock::SwapDir::diskFull() {
+ debugs(20,1, "Internal ERROR: No space left error with rock cache_dir: " <<
+ filePath);
+}
+
+/// purge while full(); it should be sufficient to purge just one
+void
+Rock::SwapDir::maintain()
+{
+ debugs(47,3, HERE << "cache_dir[" << index << "] guards: " <<
+ StoreController::store_dirs_rebuilding << !repl << !full());
+
+ // XXX: UFSSwapDir::maintain says we must quit during rebuild
+ if (StoreController::store_dirs_rebuilding)
+ return;
+
+ if (!repl)
+ return; // no means (cannot find a victim)
+
+ if (!full())
+ return; // no need (to find a victim)
+
+ debugs(47,3, HERE << "cache_dir[" << index << "] state: " <<
+ map.full() << ' ' << currentSize() << " < " << diskOffsetLimit());
+
+ // Hopefully, we find a removable entry much sooner (TODO: use time?)
+ const int maxProbed = 10000;
+ RemovalPurgeWalker *walker = repl->PurgeInit(repl, maxProbed);
+
+ // It really should not take that long, but this will stop "infinite" loops
+ const int maxFreed = 1000;
+ int freed = 0;
+ // TODO: should we purge more than needed to minimize overheads?
+ for (; freed < maxFreed && full(); ++freed) {
+ if (StoreEntry *e = walker->Next(walker))
+ e->release(); // will call our unlink() method
+ else
+ break; // no more objects
+ }
+
+ debugs(47,2, HERE << "Rock cache_dir[" << index << "] freed " << freed <<
+ " scanned " << walker->scanned << '/' << walker->locked);
+
+ walker->Done(walker);
+
+ if (full()) {
+ debugs(47,0, "ERROR: Rock cache_dir[" << index << "] " <<
+ "is still full after freeing " << freed << " entries. A bug?");
+ }
+}
+
+void
+Rock::SwapDir::reference(StoreEntry &e)
+{
+ debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+ if (repl->Referenced)
+ repl->Referenced(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::dereference(StoreEntry &e)
+{
+ debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+ if (repl->Dereferenced)
+ repl->Dereferenced(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::unlink(int fileno)
+{
+ debugs(47,5, HERE << index << ' ' << fileno);
+ if (map.has(fileno)) {
+ map.clear(fileno);
+ cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+ // XXX: update store
+ }
+}
+
+void
+Rock::SwapDir::unlink(StoreEntry &e)
+{
+ debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+ ignoreReferences(e);
+ unlink(e.swap_filen);
+}
+
+void
+Rock::SwapDir::trackReferences(StoreEntry &e)
+{
+ debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+ repl->Add(repl, &e, &e.repl);
+}
+
+
+void
+Rock::SwapDir::ignoreReferences(StoreEntry &e)
+{
+ debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+ repl->Remove(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::statfs(StoreEntry &e) const
+{
+ storeAppendPrintf(&e, "\n");
+ storeAppendPrintf(&e, "Maximum Size: %"PRIu64" KB\n", max_size);
+ storeAppendPrintf(&e, "Current Size: %"PRIu64" KB %.2f%%\n", cur_size,
+ 100.0 * cur_size / max_size);
+
+ storeAppendPrintf(&e, "Maximum entries: %9d\n", map.entryLimit());
+ storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n",
+ map.entryCount(), (100.0 * map.entryCount() / map.entryLimit()));
+
+ storeAppendPrintf(&e, "Pending operations: %d out of %d\n",
+ store_open_disk_fd, Config.max_open_disk_fds);
+
+ storeAppendPrintf(&e, "Flags:");
+
+ if (flags.selected)
+ storeAppendPrintf(&e, " SELECTED");
+
+ if (flags.read_only)
+ storeAppendPrintf(&e, " READ-ONLY");
+
+ storeAppendPrintf(&e, "\n");
+
+}
--- /dev/null
+#ifndef SQUID_FS_ROCK_SWAP_DIR_H
+#define SQUID_FS_ROCK_SWAP_DIR_H
+
+#include "SwapDir.h"
+#include "DiskIO/IORequestor.h"
+#include "rock/RockDirMap.h"
+
+class DiskIOStrategy;
+class DiskFile;
+class ReadRequest;
+class WriteRequest;
+
+namespace Rock {
+
+class Rebuild;
+
+/// \ingroup Rock
+class SwapDir: public ::SwapDir, public IORequestor
+{
+public:
+ SwapDir();
+ virtual ~SwapDir();
+
+ /* public ::SwapDir API */
+ virtual void reconfigure(int, char *);
+ virtual StoreSearch *search(String const url, HttpRequest *);
+
+protected:
+ /* protected ::SwapDir API */
+ virtual void create();
+ virtual void init();
+ virtual int canStore(StoreEntry const &) const;
+ virtual StoreIOState::Pointer createStoreIO(StoreEntry &, StoreIOState::STFNCB *, StoreIOState::STIOCB *, void *);
+ virtual StoreIOState::Pointer openStoreIO(StoreEntry &, StoreIOState::STFNCB *, StoreIOState::STIOCB *, void *);
+ virtual void maintain();
+ virtual void updateSize(int64_t size, int sign);
+ virtual void diskFull();
+ virtual void reference(StoreEntry &e);
+ virtual void dereference(StoreEntry &e);
+ virtual void unlink(StoreEntry &e);
+ virtual void statfs(StoreEntry &e) const;
+
+ /* IORequestor API */
+ virtual void ioCompletedNotification();
+ virtual void closeCompleted();
+ virtual void readCompleted(const char *buf, int len, int errflag, RefCount< ::ReadRequest>);
+ virtual void writeCompleted(int errflag, size_t len, RefCount< ::WriteRequest>);
+
+ virtual void parse(int index, char *path);
+ void parseSize(); ///< parses anonymous cache_dir size option
+ void validateOptions(); ///< warns of configuration problems; may quit
+
+ void rebuild(); ///< starts loading and validating stored entry metadata
+ void unlink(int fileno); ///< used for entries failed to load in rebuild
+ ///< used to add entries successfully loaded during rebuild
+ StoreEntry *addEntry(int fileno, const StoreEntry &from);
+
+ bool full() const; ///< no more entries can be stored without purging
+ void trackReferences(StoreEntry &e); ///< add to replacement policy scope
+ void ignoreReferences(StoreEntry &e); ///< delete from repl policy scope
+
+ // TODO: change cur_size and max_size type to stop this madness
+ int64_t currentSize() const { return static_cast<int64_t>(cur_size) << 10;}
+ int64_t maximumSize() const { return static_cast<int64_t>(max_size) << 10;}
+ int64_t diskOffset(int filen) const;
+ int64_t diskOffsetLimit() const;
+ int entryLimit() const { return map.entryLimit(); }
+
+ friend class Rebuild;
+ const char *filePath; ///< location of cache storage file inside path/
+
+private:
+ DiskIOStrategy *io;
+ RefCount<DiskFile> theFile; ///< cache storage for this cache_dir
+ DirMap map;
+
+ static const int64_t HeaderSize; ///< on-disk db header size
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_SWAP_DIR_H */
rebuildFromDirectory();
}
-struct InitStoreEntry : public unary_function<StoreMeta, void> {
- InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
-
- void operator()(StoreMeta const &x) {
- switch (x.getType()) {
-
- case STORE_META_KEY:
- assert(x.length == SQUID_MD5_DIGEST_LENGTH);
- memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
- break;
-
- case STORE_META_STD:
- struct old_metahdr {
- time_t timestamp;
- time_t lastref;
- time_t expires;
- time_t lastmod;
- size_t swap_file_sz;
- u_short refcount;
- u_short flags;
- } *tmp;
- tmp = (struct old_metahdr *)x.value;
- assert(x.length == STORE_HDR_METASIZE_OLD);
- what->timestamp = tmp->timestamp;
- what->lastref = tmp->lastref;
- what->expires = tmp->expires;
- what->lastmod = tmp->lastmod;
- what->swap_file_sz = tmp->swap_file_sz;
- what->refcount = tmp->refcount;
- what->flags = tmp->flags;
- break;
-
- case STORE_META_STD_LFS:
- assert(x.length == STORE_HDR_METASIZE);
- memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
- break;
-
- default:
- break;
- }
- }
-
- StoreEntry *what;
- cache_key *index;
-};
-
void
RebuildState::rebuildFromDirectory()
{
- LOCAL_ARRAY(char, hdr_buf, SM_PAGE_SIZE);
currentEntry(NULL);
cache_key key[SQUID_MD5_DIGEST_LENGTH];
struct stat sb;
- int swap_hdr_len;
int fd = -1;
- StoreMeta *tlv_list;
assert(this != NULL);
debugs(47, 3, "commonUfsDirRebuildFromDirectory: DIR #" << sd->index);
continue;
}
- if ((++counts.scancount & 0xFFFF) == 0)
- debugs(47, 3, " " << sd->path << " " << std::setw(7) << counts.scancount << " files opened so far.");
- debugs(47, 9, "file_in: fd=" << fd << " "<< std::setfill('0') << std::hex << std::uppercase << std::setw(8) << filn);
-
-
- statCounter.syscalls.disk.reads++;
-
- int len;
-
- if ((len = FD_READ_METHOD(fd, hdr_buf, SM_PAGE_SIZE)) < 0) {
- debugs(47, 1, "commonUfsDirRebuildFromDirectory: read(FD " << fd << "): " << xstrerror());
- file_close(fd);
- store_open_disk_fd--;
- fd = -1;
- continue;
- }
+ StoreEntry tmpe;
+ const bool loaded = storeRebuildLoadEntry(fd, tmpe, key, counts,
+ (int64_t)sb.st_size);
file_close(fd);
store_open_disk_fd--;
fd = -1;
- swap_hdr_len = 0;
-
- StoreMetaUnpacker aBuilder(hdr_buf, len, &swap_hdr_len);
-
- if (!aBuilder.isBufferSane()) {
- debugs(47, 1, "commonUfsDirRebuildFromDirectory: Swap data buffer length is not sane.");
- /* XXX shouldn't this be a call to commonUfsUnlink ? */
- sd->unlinkFile ( filn);
- continue;
- }
-
- tlv_list = aBuilder.createStoreMeta ();
- if (tlv_list == NULL) {
- debugs(47, 1, "commonUfsDirRebuildFromDirectory: failed to get meta data");
- /* XXX shouldn't this be a call to commonUfsUnlink ? */
- sd->unlinkFile (filn);
+ if (!loaded) {
+ // XXX: shouldn't this be a call to commonUfsUnlink?
+ sd->unlinkFile(filn); // should we unlink in all failure cases?
continue;
}
- debugs(47, 3, "commonUfsDirRebuildFromDirectory: successful swap meta unpacking");
- memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
-
- StoreEntry tmpe;
- InitStoreEntry visitor(&tmpe, key);
- for_each(*tlv_list, visitor);
- storeSwapTLVFree(tlv_list);
- tlv_list = NULL;
-
- if (storeKeyNull(key)) {
- debugs(47, 1, "commonUfsDirRebuildFromDirectory: NULL key");
- sd->unlinkFile(filn);
+ if (!storeRebuildKeepEntry(tmpe, key, counts))
continue;
- }
-
- tmpe.key = key;
- /* check sizes */
-
- if (tmpe.swap_file_sz == 0) {
- tmpe.swap_file_sz = (uint64_t) sb.st_size;
- } else if (tmpe.swap_file_sz == (uint64_t)(sb.st_size - swap_hdr_len)) {
- tmpe.swap_file_sz = (uint64_t) sb.st_size;
- } else if (tmpe.swap_file_sz != (uint64_t)sb.st_size) {
- debugs(47, 1, "commonUfsDirRebuildFromDirectory: SIZE MISMATCH " <<
- tmpe.swap_file_sz << "!=" <<
- sb.st_size);
-
- sd->unlinkFile(filn);
- continue;
- }
-
- if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
- sd->unlinkFile(filn);
- counts.badflags++;
- continue;
- }
-
- /* this needs to become
- * 1) unpack url
- * 2) make synthetic request with headers ?? or otherwise search
- * for a matching object in the store
- * TODO FIXME change to new async api
- * TODO FIXME I think there is a race condition here with the
- * async api :
- * store A reads in object foo, searchs for it, and finds nothing.
- * store B reads in object foo, searchs for it, finds nothing.
- * store A gets called back with nothing, so registers the object
- * store B gets called back with nothing, so registers the object,
- * which will conflict when the in core index gets around to scanning
- * store B.
- *
- * this suggests that rather than searching for duplicates, the
- * index rebuild should just assume its the most recent accurate
- * store entry and whoever indexes the stores handles duplicates.
- */
- e = Store::Root().get(key);
-
- if (e && e->lastref >= tmpe.lastref) {
- /* key already exists, current entry is newer */
- /* keep old, ignore new */
- counts.dupcount++;
- continue;
- } else if (NULL != e) {
- /* URL already exists, this swapfile not being used */
- /* junk old, load new */
- e->release(); /* release old entry */
- counts.dupcount++;
- }
counts.objcount++;
// tmpe.dump(5);
SQUIDCEXTERN void storeRebuildComplete(struct _store_rebuild_data *);
SQUIDCEXTERN void storeRebuildProgress(int sd_index, int total, int sofar);
+/// tries to load and validate entry metadata; returns tmp entry on success
+SQUIDCEXTERN bool storeRebuildLoadEntry(int fd, StoreEntry &e, cache_key *key, struct _store_rebuild_data &counts, uint64_t expectedSize);
+/// checks whether the loaded entry should be kept; updates counters
+SQUIDCEXTERN bool storeRebuildKeepEntry(const StoreEntry &e, const cache_key *key, struct _store_rebuild_data &counts);
+
+
/*
* store_swapin.c
*/
{
nodes.destroy(SplayNode<mem_node *>::DefaultFree);
inmem_hi = 0;
+ debugs(19, 9, HERE << this << " hi: " << inmem_hi);
}
bool
memcpy(aNode->nodeBuffer.data + aNode->nodeBuffer.length, source, copyLen);
+ debugs(19, 9, HERE << this << " hi: " << inmem_hi);
if (inmem_hi <= location)
inmem_hi = location + copyLen;
/* Adjust the ptr and len according to what was deposited in the page */
aNode->nodeBuffer.length += copyLen;
+ debugs(19, 9, HERE << this << " hi: " << inmem_hi);
+ debugs(19, 9, HERE << this << " hi: " << endOffset());
return copyLen;
}
void
mem_hdr::internalAppend(const char *data, int len)
{
- debugs(19, 6, "memInternalAppend: len " << len);
+ debugs(19, 6, "memInternalAppend: " << this << " len " << len);
while (len > 0) {
makeAppendSpace();
{
assert(target.range().end > target.range().start);
- debugs(19, 6, "memCopy: " << target.range());
+ debugs(19, 6, "memCopy: " << this << " " << target.range());
/* we shouldn't ever ask for absent offsets */
mem_hdr::write (StoreIOBuffer const &writeBuffer)
{
PROF_start(mem_hdr_write);
- debugs(19, 6, "mem_hdr::write: " << writeBuffer.range() << " object end " << endOffset());
+ debugs(19, 6, "mem_hdr::write: " << this << " " << writeBuffer.range() << " object end " << endOffset());
if (unionNotEmpty(writeBuffer)) {
debugs(19,0,"mem_hdr::write: writeBuffer: " << writeBuffer.range());
}
mem_hdr::mem_hdr() : inmem_hi(0)
-{}
+{
+ debugs(19, 9, HERE << this << " hi: " << inmem_hi);
+}
mem_hdr::~mem_hdr()
{
if (mem_obj->inmem_lo == 0 && !isEmpty()) {
if (swap_status == SWAPOUT_DONE) {
+ debugs(20,7, HERE << mem_obj << " lo: " << mem_obj->inmem_lo << " hi: " << mem_obj->endOffset() << " size: " << mem_obj->object_sz);
if (mem_obj->endOffset() == mem_obj->object_sz) {
/* hot object fully swapped in */
return STORE_MEM_CLIENT;
return STORE_DISK_CLIENT;
}
-StoreEntry::StoreEntry()
+StoreEntry::StoreEntry():
+ swap_file_sz(0)
{
debugs(20, 3, HERE << "new StoreEntry " << this);
mem_obj = NULL;
swap_dirn = -1;
}
-StoreEntry::StoreEntry(const char *aUrl, const char *aLogUrl)
+StoreEntry::StoreEntry(const char *aUrl, const char *aLogUrl):
+ swap_file_sz(0)
{
debugs(20, 3, HERE << "new StoreEntry " << this);
mem_obj = new MemObject(aUrl, aLogUrl);
debugs(20, 4, "StoreEntry::setMemStatus: not inserting special " << mem_obj->url << " into policy");
} else {
mem_policy->Add(mem_policy, this, &mem_obj->repl);
- debugs(20, 4, "StoreEntry::setMemStatus: inserted mem node " << mem_obj->url);
+ debugs(20, 4, "StoreEntry::setMemStatus: inserted mem node " << mem_obj->url << " key: " << getMD5Text());
}
hot_obj_count++;
char const *
StoreEntry::getSerialisedMetaData()
{
+ const size_t swap_hdr_sz0 = storeSwapMetaSize(this);
+ assert (swap_hdr_sz0 >= 0);
+ mem_obj->swap_hdr_sz = (size_t) swap_hdr_sz0;
+ // now we can use swap_hdr_sz to calculate swap_file_sz
+ // so that storeSwapMetaBuild/Pack can pack corrent swap_file_sz
+ swap_file_sz = objectLen() + mem_obj->swap_hdr_sz;
+
StoreMeta *tlv_list = storeSwapMetaBuild(this);
int swap_hdr_sz;
char *result = storeSwapMetaPack(tlv_list, &swap_hdr_sz);
+ assert(static_cast<int>(swap_hdr_sz0) == swap_hdr_sz);
storeSwapTLVFree(tlv_list);
- assert (swap_hdr_sz >= 0);
- mem_obj->swap_hdr_sz = (size_t) swap_hdr_sz;
return result;
}
+/*
+ * Calculate TLV list size for a StoreEntry
+ * XXX: Must match the actual storeSwapMetaBuild result size
+ */
+size_t
+storeSwapMetaSize(const StoreEntry * e)
+{
+ size_t size = 0;
+ ++size; // STORE_META_OK
+ size += sizeof(int); // size of header to follow
+
+ const size_t pfx = sizeof(char) + sizeof(int); // in the start of list entries
+
+ size += pfx + SQUID_MD5_DIGEST_LENGTH;
+ size += pfx + STORE_HDR_METASIZE;
+ size += pfx + strlen(e->url()) + 1;
+
+ if (const char *vary = e->mem_obj->vary_headers)
+ size += pfx + strlen(vary) + 1;
+
+ debugs(20, 3, "storeSwapMetaSize(" << e->url() << "): " << size);
+ return size;
+}
+
bool
StoreEntry::swapoutPossible()
{
size_t statCount = 500;
while (statCount-- && !currentSearch->isDone() && currentSearch->next()) {
- ++validated;
StoreEntry *e;
e = currentSearch->currentItem();
* Only set the file bit if we know its a valid entry
* otherwise, set it in the validation procedure
*/
- e->store()->updateSize(e->swap_file_sz, 1);
+
+
+ if (e->swap_status == SWAPOUT_DONE)
+ e->store()->updateSize(e->swap_file_sz, 1);
if ((++validated & 0x3FFFF) == 0)
/* TODO format the int with with a stream operator */
debugs(20, 1, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete");
last_report = squid_curtime;
}
+
+#include "fde.h"
+#include "StoreMetaUnpacker.h"
+#include "StoreMeta.h"
+#include "Generic.h"
+
+struct InitStoreEntry : public unary_function<StoreMeta, void> {
+ InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
+
+ void operator()(StoreMeta const &x) {
+ switch (x.getType()) {
+
+ case STORE_META_KEY:
+ assert(x.length == SQUID_MD5_DIGEST_LENGTH);
+ memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
+ break;
+
+ case STORE_META_STD:
+ struct old_metahdr {
+ time_t timestamp;
+ time_t lastref;
+ time_t expires;
+ time_t lastmod;
+ size_t swap_file_sz;
+ u_short refcount;
+ u_short flags;
+ } *tmp;
+ tmp = (struct old_metahdr *)x.value;
+ assert(x.length == STORE_HDR_METASIZE_OLD);
+ what->timestamp = tmp->timestamp;
+ what->lastref = tmp->lastref;
+ what->expires = tmp->expires;
+ what->lastmod = tmp->lastmod;
+ what->swap_file_sz = tmp->swap_file_sz;
+ what->refcount = tmp->refcount;
+ what->flags = tmp->flags;
+ break;
+
+ case STORE_META_STD_LFS:
+ assert(x.length == STORE_HDR_METASIZE);
+ memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ StoreEntry *what;
+ cache_key *index;
+};
+
+bool
+storeRebuildLoadEntry(int fd, StoreEntry &tmpe, cache_key *key,
+ struct _store_rebuild_data &counts, uint64_t expectedSize)
+{
+ if (fd < 0)
+ return false;
+
+ char hdr_buf[SM_PAGE_SIZE];
+
+ ++counts.scancount;
+ statCounter.syscalls.disk.reads++;
+ int len;
+ if ((len = FD_READ_METHOD(fd, hdr_buf, SM_PAGE_SIZE)) < 0) {
+ debugs(47, 1, HERE << "failed to read swap entry meta data: " << xstrerror());
+ return false;
+ }
+
+ int swap_hdr_len = 0;
+ StoreMetaUnpacker aBuilder(hdr_buf, len, &swap_hdr_len);
+ if (aBuilder.isBufferZero()) {
+ debugs(47,5, HERE << "skipping empty record.");
+ return false;
+ }
+
+ if (!aBuilder.isBufferSane()) {
+ debugs(47,1, "Warning: Ignoring malformed cache entry.");
+ return false;
+ }
+
+ StoreMeta *tlv_list = aBuilder.createStoreMeta();
+ if (!tlv_list) {
+ debugs(47, 1, HERE << "failed to get swap entry meta data list");
+ return false;
+ }
+
+ debugs(47,7, HERE << "successful swap meta unpacking");
+ memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
+
+ InitStoreEntry visitor(&tmpe, key);
+ for_each(*tlv_list, visitor);
+ storeSwapTLVFree(tlv_list);
+ tlv_list = NULL;
+
+ if (storeKeyNull(key)) {
+ debugs(47,1, HERE << "NULL swap entry key");
+ return false;
+ }
+
+ tmpe.key = key;
+ /* check sizes */
+
+ if (expectedSize > 0) {
+ if (tmpe.swap_file_sz == 0) {
+ tmpe.swap_file_sz = expectedSize;
+ } else if (tmpe.swap_file_sz == (uint64_t)(expectedSize - swap_hdr_len)) {
+ tmpe.swap_file_sz = expectedSize;
+ } else if (tmpe.swap_file_sz != expectedSize) {
+ debugs(47, 1, HERE << "swap entry SIZE MISMATCH " <<
+ tmpe.swap_file_sz << "!=" << expectedSize);
+ return false;
+ }
+ }
+
+ if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
+ counts.badflags++;
+ return false;
+ }
+
+ return true;
+}
+
+bool
+storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key,
+ struct _store_rebuild_data &counts)
+{
+ /* this needs to become
+ * 1) unpack url
+ * 2) make synthetic request with headers ?? or otherwise search
+ * for a matching object in the store
+ * TODO FIXME change to new async api
+ * TODO FIXME I think there is a race condition here with the
+ * async api :
+ * store A reads in object foo, searchs for it, and finds nothing.
+ * store B reads in object foo, searchs for it, finds nothing.
+ * store A gets called back with nothing, so registers the object
+ * store B gets called back with nothing, so registers the object,
+ * which will conflict when the in core index gets around to scanning
+ * store B.
+ *
+ * this suggests that rather than searching for duplicates, the
+ * index rebuild should just assume its the most recent accurate
+ * store entry and whoever indexes the stores handles duplicates.
+ */
+ if (StoreEntry *e = Store::Root().get(key)) {
+
+ if (e->lastref >= tmpe.lastref) {
+ /* key already exists, old entry is newer */
+ /* keep old, ignore new */
+ counts.dupcount++;
+ return false;
+ } else {
+ /* URL already exists, this swapfile not being used */
+ /* junk old, load new */
+ e->release(); /* release old entry */
+ counts.dupcount++;
+ }
+ }
+
+ return true;
+}
return;
}
+ if (e->mem_status != NOT_IN_MEMORY)
+ debugs(20, 3, HERE << "already IN_MEMORY");
+
debugs(20, 3, "storeSwapInStart: called for : " << e->swap_dirn << " " <<
std::hex << std::setw(8) << std::setfill('0') << std::uppercase <<
e->swap_filen << " " << e->getMD5Text());
debugs(20, 3, "storeSwapOutFileClosed: SwapOut complete: '" << e->url() << "' to " <<
e->swap_dirn << ", " << std::hex << std::setw(8) << std::setfill('0') <<
std::uppercase << e->swap_filen);
+ debugs(20, 3, "storeSwapOutFileClosed: Should be:" <<
+ e->swap_file_sz << " = " << e->objectLen() << " + " <<
+ mem->swap_hdr_sz);
e->swap_file_sz = e->objectLen() + mem->swap_hdr_sz;
e->swap_status = SWAPOUT_DONE;
e->store()->updateSize(e->swap_file_sz, 1);
storeRebuildComplete(struct _store_rebuild_data *dc)
{}
+bool
+storeRebuildLoadEntry(int fd, StoreEntry &tmpe, cache_key *key,
+ struct _store_rebuild_data &counts, uint64_t expectedSize)
+{
+ return false;
+}
+
+bool
+storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key,
+ struct _store_rebuild_data &counts)
+{
+ return false;
+}