Rock Store implementation merged from the 3p1-rock branch (r9630).

author Alex Rousskov <rousskov@measurement-factory.com>

Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)

committer Alex Rousskov <rousskov@measurement-factory.com>

Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)
author Alex Rousskov <rousskov@measurement-factory.com>
Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)
committer Alex Rousskov <rousskov@measurement-factory.com>
Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)
diff --git a/configure.ac b/configure.ac

index 085ddf4812600f20e9dd5909b923daa0a696df4f..8dcdc7b21f421003459fc3d6a5aae6bc03c70963 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -3,7 +3,7 @@ dnl  $Id$
  dnl
  dnl
  dnl
-AC_INIT([Squid Web Proxy],[3.HEAD-BZR],[http://www.squid-cache.org/bugs/],[squid])
+AC_INIT([Squid Web Proxy],[3.2-rock-BZR],[http://www.squid-cache.org/bugs/],[squid])
  AC_PREREQ(2.61)
  AC_CONFIG_HEADERS([include/autoconf.h])
  AC_CONFIG_AUX_DIR(cfgaux)
@@ -626,6 +626,13 @@ for module in $squid_disk_module_candidates none; do
        fi
        ;;
  
+    Mmapped)
+      AC_MSG_NOTICE([Enabling Mmapped DiskIO module])
+      DISK_LIBS="$DISK_LIBS libMmapped.a"
+      DISK_MODULES="$DISK_MODULES Mmapped"
+      DISK_LINKOBJS="$DISK_LINKOBJS DiskIO/Mmapped/MmappedDiskIOModule.o"
+      ;;
+
      Blocking)
        AC_MSG_NOTICE([Enabling Blocking DiskIO module])
        DISK_LIBS="$DISK_LIBS libBlocking.a"
@@ -713,6 +720,12 @@ for fs in $squid_storeio_module_candidates none; do
        # for STORE_TESTS substition
        STORE_TESTS="$STORE_TESTS tests/testCoss$EXEEXT"
        ;;
+    rock)
+       if ! test "x$squid_disk_module_candidates_Mmapped" = "xyes"; then
+         AC_MSG_ERROR([Storage modeule Rock requires Mmapped DiskIO module])
+       fi
+       STORE_TESTS="$STORE_TESTS tests/testRock$EXEEXT"
+       ;;
      ufs)
        STORE_TESTS="$STORE_TESTS tests/testUfs$EXEEXT"
      esac
@@ -724,6 +737,7 @@ AH_TEMPLATE(HAVE_FS_UFS, "Define to 1 if ufs filesystem module is build")
  AH_TEMPLATE(HAVE_FS_AUFS, "Define to 1 if aufs filesystem module is build")
  AH_TEMPLATE(HAVE_FS_DISKD, "Define to 1 if diskd filesystem module is build")
  AH_TEMPLATE(HAVE_FS_COSS, "Define to 1 if coss filesystem module is build")
+AH_TEMPLATE(HAVE_FS_ROCK, "Define to 1 if rock filesystem module is build")
  
  
  dnl got final squid_storeio_module_candidates, build library lists
diff --git a/include/Array.h b/include/Array.h

index 85b7364d8f00bea55414aeb09ff3fcbc42b0b00b..88b288b9b7f3c334fcafe299a6925d8faadd6e10 100644 (file)
--- a/include/Array.h
+++ b/include/Array.h
@@ -97,6 +97,8 @@ public:
      Vector &operator += (E item) {push_back(item); return *this;};
  
      void insert (E);
+    const E &front() const;
+    E &front();
      E &back();
      E pop_back();
      E shift();         // aka pop_front
@@ -247,6 +249,22 @@ Vector<E>::back()
      return items[size() - 1];
  }
  
+template<class E>
+const E &
+Vector<E>::front() const
+{
+    assert (size());
+    return items[0];
+}
+
+template<class E>
+E &
+Vector<E>::front()
+{
+    assert (size());
+    return items[0];
+}
+
  template<class E>
  void
  Vector<E>::prune(E item)
diff --git a/src/DiskIO/Blocking/BlockingFile.cc b/src/DiskIO/Blocking/BlockingFile.cc

index eb7a4104c8f15f0cfdfe667e0964d63bbb0a3d5b..2a62698ce7fa32e5591a22b5e53560c34b17c522 100644 (file)
--- a/src/DiskIO/Blocking/BlockingFile.cc
+++ b/src/DiskIO/Blocking/BlockingFile.cc
@@ -146,6 +146,7 @@ BlockingFile::read(ReadRequest *aRequest)
      assert (fd > -1);
      assert (ioRequestor.getRaw());
      readRequest = aRequest;
+    debugs(79, 3, HERE << aRequest->len << " for FD " << fd << " at " << aRequest->offset);
      file_read(fd, aRequest->buf, aRequest->len, aRequest->offset, ReadDone, this);
  }
  
@@ -160,7 +161,7 @@ BlockingFile::ReadDone(int fd, const char *buf, int len, int errflag, void *my_d
  void
  BlockingFile::write(WriteRequest *aRequest)
  {
-    debugs(79, 3, "storeUfsWrite: FD " << fd);
+    debugs(79, 3, HERE << aRequest->len << " for FD " << fd << " at " << aRequest->offset);
      writeRequest = aRequest;
      file_write(fd,
                 aRequest->offset,
@@ -216,7 +217,7 @@ void
  BlockingFile::writeDone(int rvfd, int errflag, size_t len)
  {
      assert (rvfd == fd);
-    debugs(79, 3, "storeUfsWriteDone: FD " << fd << ", len " << len);
+    debugs(79,3, HERE << "FD " << fd << ", len " << len);
  
      WriteRequest::Pointer result = writeRequest;
      writeRequest = NULL;
diff --git a/src/DiskIO/Mmapped/DiskIOMmapped.cc b/src/DiskIO/Mmapped/DiskIOMmapped.cc

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/DiskIO/Mmapped/MmappedDiskIOModule.cc b/src/DiskIO/Mmapped/MmappedDiskIOModule.cc

new file mode 100644 (file)

index 0000000..8418004
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedDiskIOModule.cc
@@ -0,0 +1,37 @@
+#include "squid.h"
+#include "MmappedDiskIOModule.h"
+#include "MmappedIOStrategy.h"
+
+MmappedDiskIOModule::MmappedDiskIOModule()
+{
+    ModuleAdd(*this);
+}
+
+MmappedDiskIOModule &
+MmappedDiskIOModule::GetInstance()
+{
+    return Instance;
+}
+
+void
+MmappedDiskIOModule::init()
+{}
+
+void
+MmappedDiskIOModule::shutdown()
+{}
+
+
+DiskIOStrategy*
+MmappedDiskIOModule::createStrategy()
+{
+    return new MmappedIOStrategy();
+}
+
+MmappedDiskIOModule MmappedDiskIOModule::Instance;
+
+char const *
+MmappedDiskIOModule::type () const
+{
+    return "Mmapped";
+}
diff --git a/src/DiskIO/Mmapped/MmappedDiskIOModule.h b/src/DiskIO/Mmapped/MmappedDiskIOModule.h

new file mode 100644 (file)

index 0000000..fe06e27
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedDiskIOModule.h
@@ -0,0 +1,21 @@
+#ifndef SQUID_MMAPPEDDISKIOMODULE_H
+#define SQUID_MMAPPEDDISKIOMODULE_H
+
+#include "DiskIO/DiskIOModule.h"
+
+class MmappedDiskIOModule : public DiskIOModule
+{
+
+public:
+    static MmappedDiskIOModule &GetInstance();
+    MmappedDiskIOModule();
+    virtual void init();
+    virtual void shutdown();
+    virtual char const *type () const;
+    virtual DiskIOStrategy* createStrategy();
+
+private:
+    static MmappedDiskIOModule Instance;
+};
+
+#endif /* SQUID_MMAPPEDDISKIOMODULE_H */
diff --git a/src/DiskIO/Mmapped/MmappedFile.cc b/src/DiskIO/Mmapped/MmappedFile.cc

new file mode 100644 (file)

index 0000000..a79b63b
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedFile.cc
@@ -0,0 +1,271 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 47    Store Directory Routines
+ */
+
+#include "DiskIO/Mmapped/MmappedFile.h"
+#include <sys/mman.h>
+#include "DiskIO/IORequestor.h"
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+
+CBDATA_CLASS_INIT(MmappedFile);
+
+// helper class to deal with mmap(2) offset alignment and other low-level specs
+class Mmapping {
+public:
+    Mmapping(int fd, size_t length, int prot, int flags, off_t offset);
+    ~Mmapping();
+
+    void *map(); ///< calls mmap(2); returns usable buffer or nil on failure
+    bool unmap(); ///< unmaps previously mapped buffer, if any
+
+private:
+    const int fd; ///< descriptor of the mmapped file
+    const size_t length; ///< user-requested data length, needed for munmap
+    const int prot; ///< mmap(2) "protection" flags
+    const int flags; ///< other mmap(2) flags
+    const off_t offset; ///< user-requested data offset
+
+    off_t delta; ///< mapped buffer increment to hit user offset
+    void *buf; ///< buffer returned by mmap, needed for munmap
+};
+
+
+void *
+MmappedFile::operator new(size_t sz)
+{
+    CBDATA_INIT_TYPE(MmappedFile);
+    MmappedFile *result = cbdataAlloc(MmappedFile);
+    /* Mark result as being owned - we want the refcounter to do the delete
+     * call */
+    return result;
+}
+
+void
+MmappedFile::operator delete(void *address)
+{
+    MmappedFile *t = static_cast<MmappedFile *>(address);
+    cbdataFree(t);
+}
+
+MmappedFile::MmappedFile(char const *aPath): fd(-1),
+    minOffset(0), maxOffset(-1), error_(false)
+{
+    assert(aPath);
+    path_ = xstrdup(aPath);
+    debugs(79,5, HERE << this << ' ' << path_);
+}
+
+MmappedFile::~MmappedFile()
+{
+    safe_free(path_);
+    doClose();
+}
+
+// XXX: almost a copy of BlockingFile::open
+void
+MmappedFile::open(int flags, mode_t mode, RefCount<IORequestor> callback)
+{
+    assert(fd < 0);
+
+    /* Simulate async calls */
+    fd = file_open(path_ , flags);
+    ioRequestor = callback;
+
+    if (fd < 0) {
+        debugs(79,3, HERE << "open error: " << xstrerror());
+        error_ = true;
+       } else {
+        store_open_disk_fd++;
+        debugs(79,3, HERE << "FD " << fd);
+
+        // setup mapping boundaries
+        struct stat sb;
+        if (fstat(fd, &sb) == 0)
+            maxOffset = sb.st_size; // we do not expect it to change
+       }
+
+    callback->ioCompletedNotification();
+}
+
+/**
+ * Alias for MmappedFile::open(...)
+ \copydoc MmappedFile::open(int flags, mode_t mode, RefCount<IORequestor> callback)
+ */
+void
+MmappedFile::create(int flags, mode_t mode, RefCount<IORequestor> callback)
+{
+    /* We use the same logic path for open */
+    open(flags, mode, callback);
+}
+
+void MmappedFile::doClose()
+{
+    if (fd >= 0) {
+        file_close(fd);
+        fd = -1;
+        store_open_disk_fd--;
+       }
+}
+
+void
+MmappedFile::close()
+{
+    debugs(79, 3, HERE << this << " closing for " << ioRequestor);
+    doClose();
+    assert(ioRequestor != NULL);
+    ioRequestor->closeCompleted();
+}
+
+bool
+MmappedFile::canRead() const
+{
+    return fd >= 0;
+}
+
+bool
+MmappedFile::canWrite() const
+{
+    return fd >= 0;
+}
+
+bool
+MmappedFile::error() const
+{
+    return error_;
+}
+
+void
+MmappedFile::read(ReadRequest *aRequest)
+{
+    debugs(79,3, HERE << "(FD " << fd << ", " << aRequest->len << ", " <<
+        aRequest->offset << ")");
+
+    assert(fd >= 0);
+    assert(ioRequestor != NULL);
+    assert(aRequest->len >= 0);
+    assert(aRequest->len > 0); // TODO: work around mmap failures on zero-len?
+    assert(aRequest->offset >= 0);
+    assert(!error_); // TODO: propagate instead?
+
+    assert(minOffset < 0 || minOffset <= aRequest->offset);
+    assert(maxOffset < 0 || aRequest->offset + aRequest->len <= (uint64_t)maxOffset);
+
+    Mmapping mapping(fd, aRequest->len, PROT_READ, MAP_PRIVATE | MAP_NORESERVE,
+        aRequest->offset);
+
+    bool done = false;
+       if (void *buf = mapping.map()) {
+        memcpy(aRequest->buf, buf, aRequest->len);
+        done = mapping.unmap();
+       }
+    error_ = !done;
+
+    const ssize_t rlen = error_ ? -1 : (ssize_t)aRequest->len;
+    const int errflag = error_ ? DISK_ERROR : DISK_OK;
+    ioRequestor->readCompleted(aRequest->buf, rlen, errflag, aRequest);
+}
+
+void
+MmappedFile::write(WriteRequest *aRequest)
+{
+    debugs(79,3, HERE << "(FD " << fd << ", " << aRequest->len << ", " <<
+        aRequest->offset << ")");
+
+    assert(fd >= 0);
+    assert(ioRequestor != NULL);
+    assert(aRequest->len >= 0);
+    assert(aRequest->len > 0); // TODO: work around mmap failures on zero-len?
+    assert(aRequest->offset >= 0);
+    assert(!error_); // TODO: propagate instead?
+
+    assert(minOffset < 0 || minOffset <= aRequest->offset);
+    assert(maxOffset < 0 || aRequest->offset + aRequest->len <= (uint64_t)maxOffset);
+
+    const ssize_t written =
+        pwrite(fd, aRequest->buf, aRequest->len, aRequest->offset);
+    if (written < 0) {
+        debugs(79,1, HERE << "error: " << xstrerr(errno));
+        error_ = true;
+    } else
+    if (static_cast<size_t>(written) != aRequest->len) {
+        debugs(79,1, HERE << "problem: " << written << " < " << aRequest->len);
+        error_ = true;
+    }
+
+    if (aRequest->free_func)
+        (aRequest->free_func)(const_cast<char*>(aRequest->buf)); // broken API?
+
+    if (!error_) {
+        debugs(79,5, HERE << "wrote " << aRequest->len << " to FD " << fd << " at " << aRequest->offset);
+    } else {
+        doClose();
+       }
+
+    const ssize_t rlen = error_ ? 0 : (ssize_t)aRequest->len;
+    const int errflag = error_ ? DISK_ERROR : DISK_OK;
+    ioRequestor->writeCompleted(errflag, rlen, aRequest);
+}
+
+/// we only support blocking I/O
+bool
+MmappedFile::ioInProgress() const
+{
+    return false;
+}
+
+Mmapping::Mmapping(int aFd, size_t aLength, int aProt, int aFlags, off_t anOffset):
+    fd(aFd), length(aLength), prot(aProt), flags(aFlags), offset(anOffset),
+    delta(-1), buf(NULL)
+{
+}
+
+Mmapping::~Mmapping()
+{
+    if (buf)
+        unmap();
+}
+
+void *
+Mmapping::map()
+{
+    // mmap(2) requires that offset is a multiple of the page size
+    static const int pageSize = getpagesize();
+    delta = offset % pageSize;
+
+    buf = mmap(NULL, length + delta, prot, flags, fd, offset - delta);
+
+    if (buf == MAP_FAILED) {
+        const int errNo = errno;
+        debugs(79,3, HERE << "error FD " << fd << "mmap(" << length << '+' <<
+            delta << ", " << offset << '-' << delta << "): " << xstrerr(errNo));
+        buf = NULL;
+        return NULL;
+    }
+
+    return static_cast<char*>(buf) + delta;
+}
+
+bool
+Mmapping::unmap()
+{
+    debugs(79,9, HERE << "FD " << fd <<
+        " munmap(" << buf << ", " << length << '+' << delta << ')');
+
+    if (!buf) // forgot or failed to map
+        return false;
+
+    const bool error = munmap(buf, length + delta) != 0;
+    if (error) {
+        const int errNo = errno;
+        debugs(79,3, HERE << "error FD " << fd <<
+            " munmap(" << buf << ", " << length << '+' << delta << "): " <<
+            "): " << xstrerr(errNo));
+    }
+    buf = NULL;
+    return !error;
+}
+
+// TODO: check MAP_NORESERVE, consider MAP_POPULATE and MAP_FIXED
diff --git a/src/DiskIO/Mmapped/MmappedFile.h b/src/DiskIO/Mmapped/MmappedFile.h

new file mode 100644 (file)

index 0000000..6d17377
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedFile.h
@@ -0,0 +1,46 @@
+#ifndef SQUID_MMAPPEDFILE_H
+#define SQUID_MMAPPEDFILE_H
+
+#include "cbdata.h"
+#include "DiskIO/DiskFile.h"
+#include "DiskIO/IORequestor.h"
+
+class MmappedFile : public DiskFile
+{
+
+public:
+    void *operator new(size_t);
+    void operator delete(void *);
+    MmappedFile(char const *path);
+    ~MmappedFile();
+    virtual void open(int flags, mode_t mode, RefCount<IORequestor> callback);
+    virtual void create(int flags, mode_t mode, RefCount<IORequestor> callback);
+    virtual void read(ReadRequest *);
+    virtual void write(WriteRequest *);
+    virtual void close();
+    virtual bool error() const;
+    virtual int getFD() const { return fd;}
+
+    virtual bool canRead() const;
+    virtual bool canWrite() const;
+    virtual bool ioInProgress() const;
+
+private:
+    CBDATA_CLASS(MmappedFile);
+
+    char const *path_;
+    RefCount<IORequestor> ioRequestor;
+    //RefCount<ReadRequest> readRequest;
+    //RefCount<WriteRequest> writeRequest;
+    int fd;
+
+    // mmapped memory leads to SEGV and bus errors if it maps beyond file
+    int64_t minOffset; ///< enforced if not negative (to preserve file headers)
+    int64_t maxOffset; ///< enforced if not negative (to avoid crashes)
+
+    bool error_;
+
+    void doClose();
+};
+
+#endif /* SQUID_MMAPPEDFILE_H */
diff --git a/src/DiskIO/Mmapped/MmappedIOStrategy.cc b/src/DiskIO/Mmapped/MmappedIOStrategy.cc

new file mode 100644 (file)

index 0000000..53141b8
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedIOStrategy.cc
@@ -0,0 +1,37 @@
+
+/*
+ * $Id$
+ *
+ * DEBUG: section 47    Store Directory Routines
+ */
+
+#include "MmappedIOStrategy.h"
+#include "MmappedFile.h"
+bool
+MmappedIOStrategy::shedLoad()
+{
+    return false;
+}
+
+int
+MmappedIOStrategy::load()
+{
+    /* Return 999 (99.9%) constant load */
+    return 999;
+}
+
+DiskFile::Pointer
+MmappedIOStrategy::newFile (char const *path)
+{
+    return new MmappedFile (path);
+}
+
+void
+MmappedIOStrategy::unlinkFile(char const *path)
+{
+#if USE_UNLINKD
+    unlinkdUnlink(path);
+#else
+    ::unlink(path);
+#endif
+}
diff --git a/src/DiskIO/Mmapped/MmappedIOStrategy.h b/src/DiskIO/Mmapped/MmappedIOStrategy.h

new file mode 100644 (file)

index 0000000..c295427
--- /dev/null
+++ b/src/DiskIO/Mmapped/MmappedIOStrategy.h
@@ -0,0 +1,15 @@
+#ifndef SQUID_MMAPPEDIOSTRATEGY_H
+#define SQUID_MMAPPEDIOSTRATEGY_H
+#include "DiskIO/DiskIOStrategy.h"
+
+class MmappedIOStrategy : public DiskIOStrategy
+{
+
+public:
+    virtual bool shedLoad();
+    virtual int load();
+    virtual RefCount<DiskFile> newFile(char const *path);
+    virtual void unlinkFile (char const *);
+};
+
+#endif /* SQUID_MMAPPEDIOSTRATEGY_H */
diff --git a/src/Makefile.am b/src/Makefile.am

index dc061644c354578e479828af8b316c2d4bc32842..c4c0b438b04701e589d0a0eddb919eaa50b8b6a4 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -155,7 +155,8 @@ else
  AIOPS_SOURCE = DiskIO/DiskThreads/aiops.cc
  endif
  
-EXTRA_LIBRARIES = libAIO.a libBlocking.a libDiskDaemon.a libDiskThreads.a
+EXTRA_LIBRARIES = libAIO.a libBlocking.a libDiskDaemon.a libDiskThreads.a \
+       libMmapped.a
  noinst_LIBRARIES = $(DISK_LIBS)
  noinst_LTLIBRARIES = libsquid.la
  
@@ -182,6 +183,7 @@ EXTRA_PROGRAMS = \
         recv-announce \
         tests/testUfs \
         tests/testCoss \
+       tests/testRock \
         tests/testNull \
         ufsdump
  
@@ -706,6 +708,14 @@ libBlocking_a_SOURCES = \
                 DiskIO/Blocking/BlockingDiskIOModule.cc \
                 DiskIO/Blocking/BlockingDiskIOModule.h 
  
+libMmapped_a_SOURCES = \
+               DiskIO/Mmapped/MmappedFile.cc \
+               DiskIO/Mmapped/MmappedFile.h \
+               DiskIO/Mmapped/MmappedIOStrategy.cc \
+               DiskIO/Mmapped/MmappedIOStrategy.h \
+               DiskIO/Mmapped/MmappedDiskIOModule.cc \
+               DiskIO/Mmapped/MmappedDiskIOModule.h 
+
  libDiskDaemon_a_SOURCES = \
                 DiskIO/DiskDaemon/DiskdFile.cc \
                 DiskIO/DiskDaemon/DiskdFile.h \
@@ -2156,6 +2166,39 @@ tests_testUfs_LDFLAGS = $(LIBADD_DL)
  tests_testUfs_DEPENDENCIES = \
         $(SWAP_TEST_DS)
  
+tests_testRock_SOURCES = \
+       tests/testRock.cc \
+       tests/testMain.cc \
+       tests/testRock.h \
+       tests/stub_cache_manager.cc \
+       tests/stub_HelperChildConfig.cc \
+       tests/stub_Port.cc \
+       tests/stub_TypedMsgHdr.cc \
+       tests/stub_UdsOp.cc \
+       $(SWAP_TEST_SOURCES)
+nodist_tests_testRock_SOURCES = \
+       swap_log_op.cc \
+       $(SWAP_TEST_GEN_SOURCES) \
+       SquidMath.cc \
+       SquidMath.h
+tests_testRock_LDADD = \
+       $(COMMON_LIBS) \
+        $(REPL_OBJS) \
+        $(DISK_LIBS) \
+        $(DISK_OS_LIBS) \
+        acl/libapi.la \
+       $(top_builddir)/lib/libmisccontainers.la \
+       $(top_builddir)/lib/libmiscencoding.la \
+       $(top_builddir)/lib/libmiscutil.la \
+       $(REGEXLIB) \
+        $(SQUID_CPPUNIT_LIBS) \
+       $(SSLLIB) \
+       $(COMPAT_LIB) \
+       $(XTRA_LIBS)
+tests_testRock_LDFLAGS = $(LIBADD_DL)
+tests_testRock_DEPENDENCIES = \
+       $(SWAP_TEST_DS)
+
  tests_testCoss_SOURCES = \
         tests/testCoss.cc \
         tests/testMain.cc \
diff --git a/src/StoreMeta.h b/src/StoreMeta.h

index 0cecba927e19046f16ba7c28f5c126b04bb56644..5657909f5a41605667da60b25ce33c0e707f73ae 100644 (file)
--- a/src/StoreMeta.h
+++ b/src/StoreMeta.h
@@ -155,6 +155,8 @@ public:
  /// \ingroup SwapStoreAPI
  SQUIDCEXTERN char *storeSwapMetaPack(tlv * tlv_list, int *length);
  /// \ingroup SwapStoreAPI
+SQUIDCEXTERN size_t storeSwapMetaSize(const StoreEntry * e);
+/// \ingroup SwapStoreAPI
  SQUIDCEXTERN tlv *storeSwapMetaBuild(StoreEntry * e);
  /// \ingroup SwapStoreAPI
  SQUIDCEXTERN void storeSwapTLVFree(tlv * n);
diff --git a/src/StoreMetaUnpacker.cc b/src/StoreMetaUnpacker.cc

index a92165192a9b4ade5650c28a9d4d8c55a8479c2a..855a4b4400e03db17f3044908ce79ca231209baf 100644 (file)
--- a/src/StoreMetaUnpacker.cc
+++ b/src/StoreMetaUnpacker.cc
@@ -39,6 +39,24 @@
  
  int const StoreMetaUnpacker::MinimumBufferLength = sizeof(char) + sizeof(int);
  
+/// useful for meta stored in pre-initialized (with zeros) db files
+bool
+StoreMetaUnpacker::isBufferZero()
+{
+    // We could memcmp the entire buffer, but it is probably safe enough
+    // to test a few bytes because if we do not detect a corrupted entry
+    // it is not a big deal. Empty entries are not isBufferSane anyway.
+    const int depth = 10;
+    if (buflen < depth)
+        return false; // cannot be sure enough
+
+    for (int i = 0; i < depth; ++i) {
+        if (buf[i])
+            return false;
+    }
+    return true;
+}
+
  bool
  StoreMetaUnpacker::isBufferSane()
  {
diff --git a/src/StoreMetaUnpacker.h b/src/StoreMetaUnpacker.h

index 7bc64fab8f74b07634e587c1c3b33999da5daca1..2764735383c972c73aa235159dc970eb754673b1 100644 (file)
--- a/src/StoreMetaUnpacker.h
+++ b/src/StoreMetaUnpacker.h
@@ -41,6 +41,7 @@ class StoreMetaUnpacker
  public:
      StoreMetaUnpacker (const char *buf, ssize_t bufferLength, int *hdrlen);
      StoreMeta *createStoreMeta();
+    bool isBufferZero(); ///< all-zeros buffer, implies !isBufferSane
      bool isBufferSane();
  
  private:
diff --git a/src/SwapDir.cc b/src/SwapDir.cc

index ba48794dc9249c9ae07deb68b7549a1d686c44a1..f91db78408f09cbf50c4486215da3636bb80d23f 100644 (file)
--- a/src/SwapDir.cc
+++ b/src/SwapDir.cc
@@ -38,8 +38,18 @@
  #include "StoreFileSystem.h"
  #include "ConfigOption.h"
  
+SwapDir::SwapDir(char const *aType): theType(aType),
+        cur_size (0), max_size(0),
+        path(NULL), index(-1), min_objsize(0), max_objsize (-1),
+        repl(NULL), removals(0), scanned(0),
+        cleanLog(NULL)
+{
+    fs.blksize = 1024;
+}
+
  SwapDir::~SwapDir()
  {
+    // TODO: should we delete repl?
      xfree(path);
  }
  
diff --git a/src/SwapDir.h b/src/SwapDir.h

index 534106bbfa397dce7e1ab23feffd88807280e04a..6789b085314d12dd7ac3d0ca8de7de40f9b9ae32 100644 (file)
--- a/src/SwapDir.h
+++ b/src/SwapDir.h
@@ -113,11 +113,7 @@ class SwapDir : public Store
  {
  
  public:
-    SwapDir(char const *aType) : theType (aType), cur_size(0), max_size(0), min_objsize(0), max_objsize(-1), cleanLog(NULL) {
-        fs.blksize = 1024;
-        path = NULL;
-    }
-
+    SwapDir(char const *aType);
      virtual ~SwapDir();
      virtual void reconfigure(int, char *) = 0;
      char const *type() const;
diff --git a/src/cf.data.pre b/src/cf.data.pre

index ef8fafb3a2bcc00cbfbdfc034a58f831d5678b12..36c859a2c488727a0ebc1eaeaa9ff8b31e89c53d 100644 (file)
--- a/src/cf.data.pre
+++ b/src/cf.data.pre
@@ -2692,6 +2692,16 @@ DOC_START
         higher hit ratio at the expense of an increase in response
         time.
  
+        The rock store type:
+
+           cache_dir rock Directory-Name Mbytes <max-size=bytes>
+
+        The Rock Store type is a database-style storage. All cached
+       entries are stored in a "database" file, using fixed-size slots,
+       one entry per slot. The database size is specified in MB. The
+       slot size is specified in bytes using the max-size option. See
+       below for more info on the max-size option.
+
         The coss store type:
  
         NP: COSS filesystem in Squid-3 has been deemed too unstable for
@@ -2842,8 +2852,9 @@ DOC_START
                 '       output as-is
  
                 -       left aligned
-               width   field width. If starting with 0 the
-                       output is zero padded
+               width   minimum and/or maximum field width: [min][.max]
+                       When minimum starts with 0, the field is zero-padded.
+                       String values exceeding maximum width are truncated.
                 {arg}   argument such as header name etc
  
         Format codes:
@@ -6480,11 +6491,12 @@ DOC_START
                 returning a chain of services to be used next. The services
                 are specified using the X-Next-Services ICAP response header
                 value, formatted as a comma-separated list of service names.
-               Each named service should be configured in squid.conf and
-               should have the same method and vectoring point as the current
-               ICAP transaction.  Services violating these rules are ignored.
-               An empty X-Next-Services value results in an empty plan which
-               ends the current adaptation. 
+               Each named service should be configured in squid.conf. Other
+               services are ignored. An empty X-Next-Services value results
+               in an empty plan which ends the current adaptation.
+
+               Dynamic adaptation plan may cross or cover multiple supported
+               vectoring points in their natural processing order.
  
                 Routing is not allowed by default: the ICAP X-Next-Services
                 response header is ignored.
diff --git a/src/disk.cc b/src/disk.cc

index b2cb3cdda7959b0922391adfa445693ac2bbd710..beed745da1396bc45a3ff7ae0f2007ed9886e55d 100644 (file)
--- a/src/disk.cc
+++ b/src/disk.cc
@@ -238,12 +238,14 @@ diskHandleWrite(int fd, void *notused)
  
      assert(fdd->write_q->len > fdd->write_q->buf_offset);
  
-    debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " << (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes");
+    debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
+           (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
+           fdd->write_q->file_offset);
  
      errno = 0;
  
      if (fdd->write_q->file_offset != -1)
-        lseek(fd, fdd->write_q->file_offset, SEEK_SET);
+        lseek(fd, fdd->write_q->file_offset, SEEK_SET); /* XXX ignore return? */
  
      len = FD_WRITE_METHOD(fd,
                            fdd->write_q->buf + fdd->write_q->buf_offset,
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am

index e7c7a0dad88d79b6f4072843080a0cbb921784f3..243d0ed7d51905a85bc35770ef1d118ba06dcba5 100644 (file)
--- a/src/fs/Makefile.am
+++ b/src/fs/Makefile.am
@@ -1,6 +1,6 @@
  include $(top_srcdir)/src/Common.am
  
-EXTRA_LTLIBRARIES = libaufs.la libdiskd.la libcoss.la libufs.la
+EXTRA_LTLIBRARIES = libaufs.la libdiskd.la libcoss.la libufs.la librock.la
  noinst_LTLIBRARIES =  $(STORE_LIBS_TO_BUILD) libfs.la
  
  # aufs is a "fake" legacy store
@@ -28,6 +28,24 @@ libufs_la_SOURCES = \
         ufs/ufscommon.cc \
         ufs/ufscommon.h 
  
+librock_la_SOURCES = \
+       rock/RockCommon.cc \
+       rock/RockCommon.h \
+       rock/RockDirMap.cc \
+       rock/RockDirMap.h \
+       rock/RockFile.cc \
+       rock/RockFile.h \
+       rock/RockIoState.cc \
+       rock/RockIoState.h \
+       rock/RockIoRequests.cc \
+       rock/RockIoRequests.h \
+       rock/RockRebuild.cc \
+       rock/RockRebuild.h \
+       rock/RockStoreFileSystem.cc \
+       rock/RockStoreFileSystem.h \
+       rock/RockSwapDir.cc \
+       rock/RockSwapDir.h
+
  libfs_la_SOURCES = Module.cc Module.h
  libfs_la_LIBADD =  $(STORE_LIBS_TO_BUILD)
  libfs_la_DEPENDENCIES = $(STORE_LIBS_TO_BUILD)
@@ -44,13 +62,15 @@ coss/all: libcoss.la
  coss/clean: clean
  ufs/all: libufs.la
  ufs/clean: clean
+rock/all: librock.la
+rock/clean: clean
  
  
  TESTS += testHeaders
  
  ## Special Universal .h dependency test script
  ## aborts if error encountered
-testHeaders: $(srcdir)/ufs/*.h $(srcdir)/coss/*.h
+testHeaders: $(srcdir)/ufs/*.h $(srcdir)/coss/*.h $(srcdir)/rock/*.h
         $(SHELL) $(top_srcdir)/test-suite/testheaders.sh "$(CXXCOMPILE)" $^ || exit 1
  ## diskd/ has no .h files
  ## aufs/ has no .h files
diff --git a/src/fs/Module.cc b/src/fs/Module.cc

index f1c65fb839590186ffc8303c45ca8c0d59a7082b..0c7d233ad4a3f4c40fceccce9fd3df24f859b6d0 100644 (file)
--- a/src/fs/Module.cc
+++ b/src/fs/Module.cc
@@ -22,6 +22,12 @@ static StoreFSufs<UFSSwapDir> *AufsInstance;
  static StoreFSufs<UFSSwapDir> *DiskdInstance;
  #endif
  
+#ifdef HAVE_FS_ROCK
+#include "fs/rock/RockStoreFileSystem.h"
+static Rock::StoreFileSystem *RockInstance = NULL;
+#endif
+
+
  /* TODO: Modify coss code to:
   * (a) remove the StoreFScoss::GetInstance method,
   * (b) declare the StoreFScoss::stats  as static and
@@ -48,6 +54,10 @@ void Fs::Init()
      DiskdInstance = new StoreFSufs<UFSSwapDir>("DiskDaemon", "diskd");;
  #endif
  
+#ifdef HAVE_FS_ROCK
+    RockInstance = new Rock::StoreFileSystem();
+#endif
+
  }
  
  
@@ -66,4 +76,8 @@ void Fs::Clean()
      delete DiskdInstance;
  #endif
  
+#ifdef HAVE_FS_ROCK
+    delete RockInstance;
+#endif
+
  }
diff --git a/src/fs/rock/RockCommon.cc b/src/fs/rock/RockCommon.cc

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/fs/rock/RockCommon.h b/src/fs/rock/RockCommon.h

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/fs/rock/RockDirMap.cc b/src/fs/rock/RockDirMap.cc

new file mode 100644 (file)

index 0000000..89f6640
--- /dev/null
+++ b/src/fs/rock/RockDirMap.cc
@@ -0,0 +1,253 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 79    Disk IO Routines
+ */
+
+#include "squid.h"
+#include "fs/rock/RockDirMap.h"
+
+Rock::DirMap::DirMap(int roughLimit): hintPast(-1), hintNext(0),
+    bitLimit(roundLimit(roughLimit)), bitCount(0), words(NULL), wordCount(0)
+{
+    syncWordCount();
+    allocate();
+}
+
+Rock::DirMap::DirMap(const DirMap &m):
+    hintPast(m.hintPast), hintNext(m.hintNext),
+    bitLimit(m.bitLimit), bitCount(m.bitCount),
+    words(NULL), wordCount(m.wordCount)
+{
+    syncWordCount();
+    copyFrom(m);
+}
+
+Rock::DirMap::~DirMap()
+{
+    deallocate();
+}
+
+Rock::DirMap &Rock::DirMap::operator =(const DirMap &m)
+{
+    deallocate();
+
+    hintPast = m.hintPast;
+    hintNext = m.hintNext;
+    bitLimit = m.bitLimit;
+    bitCount = m.bitCount;
+
+    wordCount = m.wordCount;
+    copyFrom(m);
+    return *this;
+}
+
+void
+Rock::DirMap::resize(const int roughLimit)
+{
+    const int newLimit = roundLimit(roughLimit);
+    // TODO: optimize?
+    if (newLimit != bitLimit) {
+        DirMap old(*this);
+        deallocate();
+        bitLimit = newLimit;
+        syncWordCount();
+        copyFrom(old);
+       }
+}
+
+int
+Rock::DirMap::entryLimit() const
+{
+    return bitLimit;
+}
+
+int
+Rock::DirMap::entryCount() const
+{
+    return bitCount;
+}
+
+bool
+Rock::DirMap::full() const
+{
+    return bitCount >= bitLimit;
+}
+
+bool
+Rock::DirMap::valid(const int pos) const
+{
+    return 0 <= pos && pos < bitLimit;
+}
+
+int
+Rock::DirMap::useNext()
+{
+    assert(!full());
+    const int next = findNext();
+    assert(valid(next)); // because we were not full
+    use(next);
+    return next;
+}
+
+/// low-level allocation, assumes wordCount is set
+void
+Rock::DirMap::allocate()
+{
+    assert(!words);
+    words = new unsigned long[wordCount];
+    memset(words, 0, ramSize());
+}
+
+/// low-level deallocation; may place the object in an inconsistent state
+void
+Rock::DirMap::deallocate()
+{
+    delete[] words;
+    words = NULL;
+}
+
+/// low-level copy; assumes all counts have been setup
+void
+Rock::DirMap::copyFrom(const DirMap &m)
+{
+    allocate();
+    if (m.wordCount)
+        memcpy(words, m.words, min(ramSize(), m.ramSize()));
+}
+
+/// low-level ram size calculation for mem*() calls
+int
+Rock::DirMap::ramSize() const
+{
+    return sizeof(*words) * wordCount;
+}
+
+/* XXX: Number of bits in a long and other constants from filemap.cc */
+#if SIZEOF_LONG == 8
+#define LONG_BIT_SHIFT 6
+#define BITS_IN_A_LONG 0x40
+#define LONG_BIT_MASK  0x3F
+#define ALL_ONES (unsigned long) 0xFFFFFFFFFFFFFFFF
+#elif SIZEOF_LONG == 4
+#define LONG_BIT_SHIFT 5
+#define BITS_IN_A_LONG 0x20
+#define LONG_BIT_MASK  0x1F
+#define ALL_ONES (unsigned long) 0xFFFFFFFF
+#else
+#define LONG_BIT_SHIFT 5
+#define BITS_IN_A_LONG 0x20
+#define LONG_BIT_MASK  0x1F
+#define ALL_ONES (unsigned long) 0xFFFFFFFF
+#endif
+
+#define FM_INITIAL_NUMBER (1<<14)
+
+int
+Rock::DirMap::AbsoluteEntryLimit()
+{
+    const int sfilenoMax = 0xFFFFFF; // Core sfileno maximum
+    return ((sfilenoMax+1) >> LONG_BIT_SHIFT) << LONG_BIT_SHIFT;
+}
+
+/// Adjust limit so that there are no "extra" bits in the last word
+//  that are above the limit but still found by findNext.
+int
+Rock::DirMap::roundLimit(const int roughLimit) const
+{
+    const int allowedLimit = min(roughLimit, AbsoluteEntryLimit());
+    const int newLimit = (allowedLimit >> LONG_BIT_SHIFT) << LONG_BIT_SHIFT;
+    debugs(8, 3, HERE << "adjusted map limit from " << roughLimit << " to " <<
+        newLimit);
+    return newLimit;
+}
+
+/// calculate wordCount for the number of entries (bitLimit)
+void
+Rock::DirMap::syncWordCount()
+{
+    wordCount = bitLimit >> LONG_BIT_SHIFT;
+    debugs(8, 3, HERE << wordCount << ' ' << BITS_IN_A_LONG <<
+        "-bit long words for " << bitLimit << " bits");
+}
+
+void
+Rock::DirMap::use(const int pos)
+{
+    if (!has(pos)) {
+        assert(valid(pos));
+
+        const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+        words[pos >> LONG_BIT_SHIFT] |= bitmask;
+
+        ++bitCount;
+        debugs(8, 6, HERE << pos);
+       } else {
+        debugs(8, 3, HERE << pos << " in vain");
+       }
+}
+
+void
+Rock::DirMap::clear(const int pos)
+{
+    if (has(pos)) {
+        const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+        words[pos >> LONG_BIT_SHIFT] &= ~bitmask;
+        --bitCount;
+        debugs(8, 6, HERE << pos);
+       } else {
+        debugs(8, 3, HERE << pos << " in vain");
+        assert(valid(pos));
+       }
+    if (hintPast < 0)
+        hintPast = pos; // remember cleared slot
+}
+
+bool
+Rock::DirMap::has(const int pos) const
+{
+    if (!valid(pos)) // the only place where we are forgiving
+        return false;
+
+    const unsigned long bitmask = (1L << (pos & LONG_BIT_MASK));
+    return words[pos >> LONG_BIT_SHIFT] & bitmask;
+}
+
+/// low-level empty-slot search routine, uses and updates hints
+int
+Rock::DirMap::findNext() const
+{
+    // try the clear-based hint, if any
+    if (hintPast >= 0) {
+        const int result = hintPast;
+        hintPast = -1; // assume used; or we could update it in set()
+        if (valid(result) && !has(result))
+            return result;
+       }
+
+    // adjust and try the scan-based hint
+    if (!valid(hintNext))
+        hintNext = 0;
+    if (valid(hintNext) && !has(hintNext))
+        return hintNext++;
+
+    // start scan with the scan-based hint
+    int wordPos = hintNext >> LONG_BIT_SHIFT;
+
+    for (int i = 0; i < wordCount; ++i) {
+        if (words[wordPos] != ALL_ONES)
+            break;
+
+        wordPos = (wordPos + 1) % wordCount;
+    }
+
+    for (int bitPos = 0; bitPos < BITS_IN_A_LONG; ++bitPos) {
+        hintNext = ((unsigned long) wordPos << LONG_BIT_SHIFT) | bitPos;
+
+        if (hintNext < bitLimit && !has(hintNext))
+            return hintNext++;
+    }
+
+    // the map is full
+    return -1;
+}
diff --git a/src/fs/rock/RockDirMap.h b/src/fs/rock/RockDirMap.h

new file mode 100644 (file)

index 0000000..86d32f4
--- /dev/null
+++ b/src/fs/rock/RockDirMap.h
@@ -0,0 +1,60 @@
+#ifndef SQUID_FS_ROCK_DIR_MAP_H
+#define SQUID_FS_ROCK_DIR_MAP_H
+
+namespace Rock {
+
+/// \ingroup Rock
+/// bitmap of used db slots indexed by sfileno
+class DirMap
+{
+public:
+    // the map may adjust the limit down a little; see roundLimit()
+    DirMap(const int roughLimit = 0);
+    DirMap(const DirMap &map);
+    ~DirMap();
+
+    DirMap &operator =(const DirMap &map);
+    void resize(const int newLimit); ///< forgets higher bits or appends zeros
+
+    bool full() const; ///< there are no empty slots left
+    bool has(int n) const; ///< whether slot n is occupied
+    bool valid(int n) const; ///< whether n is a valid slot coordinate
+    int entryCount() const; ///< number of bits turned on
+    int entryLimit() const; ///< maximum number of bits that can be turned on
+
+    void use(int n); ///< mark slot n as used
+    void clear(int n); ///< mark slot n as unused
+    int useNext(); ///< finds and uses an empty slot, returning its coordinate
+
+    static int AbsoluteEntryLimit(); ///< maximum entryLimit() possible
+
+private:
+    /// unreliable next empty slot suggestion #1 (clear based)
+    mutable int hintPast;
+    ///< unreliable next empty slot suggestion #2 (scan based)
+    mutable int hintNext;
+
+    int bitLimit; ///< maximum number of map entries
+    int bitCount; ///< current number of map entries
+
+    unsigned long *words; ///< low level storage
+    int wordCount; ///< number of words allocated
+
+    int roundLimit(const int roughLimit) const;
+    void syncWordCount();
+    int ramSize() const;
+    void allocate();
+    void deallocate();
+    void copyFrom(const DirMap &map);
+    int findNext() const;
+};
+
+} // namespace Rock
+
+// We do not reuse struct _fileMap because we cannot control its size,
+// resulting in sfilenos that are pointing beyond the database.
+
+// TODO: Consider using std::bitset. Is it really slower for findNext()?
+
+
+#endif /* SQUID_FS_ROCK_DIR_MAP_H */
diff --git a/src/fs/rock/RockFile.cc b/src/fs/rock/RockFile.cc

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/fs/rock/RockFile.h b/src/fs/rock/RockFile.h

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/fs/rock/RockIoRequests.cc b/src/fs/rock/RockIoRequests.cc

new file mode 100644 (file)

index 0000000..0754e3e
--- /dev/null
+++ b/src/fs/rock/RockIoRequests.cc
@@ -0,0 +1,24 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 79    Disk IO Routines
+ */
+
+#include "fs/rock/RockIoRequests.h"
+
+CBDATA_NAMESPACED_CLASS_INIT(Rock, ReadRequest);
+CBDATA_NAMESPACED_CLASS_INIT(Rock, WriteRequest);
+
+Rock::ReadRequest::ReadRequest(const ::ReadRequest &base,
+    const IoState::Pointer &anSio):
+    ::ReadRequest(base),
+    sio(anSio)
+{
+}
+
+Rock::WriteRequest::WriteRequest(const ::WriteRequest &base,
+    const IoState::Pointer &anSio):
+    ::WriteRequest(base),
+    sio(anSio)
+{
+}
diff --git a/src/fs/rock/RockIoRequests.h b/src/fs/rock/RockIoRequests.h

new file mode 100644 (file)

index 0000000..e6914e8
--- /dev/null
+++ b/src/fs/rock/RockIoRequests.h
@@ -0,0 +1,38 @@
+#ifndef SQUID_FS_ROCK_IO_REQUESTS_H
+#define SQUID_FS_ROCK_IO_REQUESTS_H
+
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockIoState.h"
+
+class DiskFile;
+
+namespace Rock {
+
+/// \ingroup Rock
+class ReadRequest: public ::ReadRequest
+{
+public:
+    ReadRequest(const ::ReadRequest &base, const IoState::Pointer &anSio);
+    IoState::Pointer sio;
+
+private:
+    CBDATA_CLASS2(ReadRequest);
+};
+
+
+/// \ingroup Rock
+class WriteRequest: public ::WriteRequest
+{
+public:
+    WriteRequest(const ::WriteRequest &base, const IoState::Pointer &anSio);
+    IoState::Pointer sio;
+
+private:
+    CBDATA_CLASS2(WriteRequest);
+};
+
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_IO_REQUESTS_H */
diff --git a/src/fs/rock/RockIoState.cc b/src/fs/rock/RockIoState.cc

new file mode 100644 (file)

index 0000000..0e18444
--- /dev/null
+++ b/src/fs/rock/RockIoState.cc
@@ -0,0 +1,199 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 79    Disk IO Routines
+ */
+
+#include "Parsing.h"
+#include "DiskIO/DiskIOModule.h"
+#include "DiskIO/DiskIOStrategy.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockIoState.h"
+#include "fs/rock/RockIoRequests.h"
+#include "fs/rock/RockSwapDir.h"
+
+Rock::IoState::IoState(SwapDir *dir,
+    StoreEntry *anEntry,
+    StoreIOState::STFNCB *cbFile,
+    StoreIOState::STIOCB *cbIo,
+    void *data):
+    slotSize(0),
+    entrySize(0)
+{
+    e = anEntry;
+    swap_filen = e->swap_filen;
+    swap_dirn = dir->index;
+    slotSize = dir->max_objsize;
+    file_callback = cbFile;
+    callback = cbIo;
+    callback_data = cbdataReference(data);
+    ++store_open_disk_fd; // TODO: use a dedicated counter?
+    //theFile is set by SwapDir because it depends on DiskIOStrategy
+}
+
+Rock::IoState::~IoState()
+{
+    --store_open_disk_fd;
+    if (callback_data)
+        cbdataReferenceDone(callback_data);
+    theFile = NULL;
+}
+
+void
+Rock::IoState::file(const RefCount<DiskFile> &aFile)
+{
+    assert(!theFile);
+    assert(aFile != NULL);
+    theFile = aFile;
+}
+
+void
+Rock::IoState::read_(char *buf, size_t len, off_t off, STRCB *cb, void *data)
+{
+    assert(theFile != NULL);
+    assert(theFile->canRead());
+
+    // Core specifies buffer length, but we must not exceed stored entry size
+    assert(off >= 0);
+    assert(entrySize >= 0);
+    const int64_t offset = static_cast<int64_t>(off);
+    assert(offset <= entrySize);
+    if (offset + (int64_t)len > entrySize)
+        len = entrySize - offset;
+
+    assert(read.callback == NULL);
+    assert(read.callback_data == NULL);
+    read.callback = cb;
+    read.callback_data = cbdataReference(data);
+
+    theFile->read(new ReadRequest(::ReadRequest(buf, offset_ + offset, len), this));
+}
+
+// We only buffer data here; we actually write when close() is called.
+// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions
+// of the slot when the write does not end at the page or sector boundary.
+void
+Rock::IoState::write(char const *buf, size_t size, off_t offset, FREE *dtor)
+{
+    // TODO: move to create?
+    if (!offset) {
+        assert(theBuf.isNull());
+        assert(entrySize >= 0);
+        theBuf.init(min(entrySize, slotSize), slotSize);
+    } else {
+        // Core uses -1 offset as "append". Sigh.
+        assert(offset == -1);
+        assert(!theBuf.isNull());
+    }
+
+    theBuf.append(buf, size);
+
+    if (dtor)
+        (dtor)(const_cast<char*>(buf)); // cast due to a broken API?
+}
+
+// write what was buffered during write() calls
+void
+Rock::IoState::startWriting()
+{
+    assert(theFile != NULL);
+    assert(theFile->canWrite());
+    assert(!theBuf.isNull());
+
+    static const int ps = getpagesize();
+    const size_t tail = (ps - (theBuf.contentSize() % ps)) % ps;
+    const int64_t writeSize = theBuf.contentSize() + tail;
+    debugs(79, 5, HERE << swap_filen << " at " << offset_ << '+' <<
+        theBuf.contentSize() << '+' << tail);
+
+    assert(theBuf.contentSize() <= slotSize);
+    assert(writeSize <= slotSize);
+    // theFile->write may call writeCompleted immediatelly
+    theFile->write(new WriteRequest(::WriteRequest(theBuf.content(), offset_,
+        writeSize, theBuf.freeFunc()), this));
+}
+
+// 
+void
+Rock::IoState::finishedWriting(const int errFlag)
+{
+    callBack(errFlag);
+}
+
+void
+Rock::IoState::close()
+{
+    debugs(79, 3, HERE << swap_filen << " at " << offset_);
+    if (!theBuf.isNull())
+        startWriting();
+    else
+        callBack(0);
+}
+
+/// close callback (STIOCB) dialer: breaks dependencies and 
+/// counts IOState concurrency level
+class StoreIOStateCb: public CallDialer 
+{
+public:
+    StoreIOStateCb(StoreIOState::STIOCB *cb, void *data, int err, const Rock::IoState::Pointer &anSio):
+        callback(NULL),
+        callback_data(NULL),
+        errflag(err),
+        sio(anSio) {
+
+        callback = cb;
+        callback_data = cbdataReference(data);
+    }
+
+    StoreIOStateCb(const StoreIOStateCb &cb):
+        callback(NULL),
+        callback_data(NULL),
+        errflag(cb.errflag),
+        sio(cb.sio) {
+
+        callback = cb.callback;
+        callback_data = cbdataReference(cb.callback_data);
+    }
+
+    virtual ~StoreIOStateCb() {
+        cbdataReferenceDone(callback_data); // may be nil already
+    }
+
+    void dial(AsyncCall &call) {
+        void *cbd;
+        if (cbdataReferenceValidDone(callback_data, &cbd) && callback)
+            callback(cbd, errflag, sio.getRaw());
+    }
+
+    bool canDial(AsyncCall &call) const {
+        return cbdataReferenceValid(callback_data) && callback;
+    }
+
+    virtual void print(std::ostream &os) const {
+        os << '(' << callback_data << ", err=" << errflag << ')';
+    }
+
+private:
+    StoreIOStateCb &operator =(const StoreIOStateCb &cb); // not defined
+
+    StoreIOState::STIOCB *callback;
+    void *callback_data;
+    int errflag;
+    Rock::IoState::Pointer sio;
+};
+
+
+void
+Rock::IoState::callBack(int errflag)
+{
+    debugs(79,3, HERE << "errflag=" << errflag);
+    theFile = NULL;
+
+    AsyncCall::Pointer call = asyncCall(79,3, "SomeIoStateCloseCb",
+        StoreIOStateCb(callback, callback_data, errflag, this));
+    ScheduleCallHere(call);
+
+    callback = NULL;
+    cbdataReferenceDone(callback_data);
+}
+
diff --git a/src/fs/rock/RockIoState.h b/src/fs/rock/RockIoState.h

new file mode 100644 (file)

index 0000000..5cb3e6b
--- /dev/null
+++ b/src/fs/rock/RockIoState.h
@@ -0,0 +1,49 @@
+#ifndef SQUID_FS_ROCK_IO_STATE_H
+#define SQUID_FS_ROCK_IO_STATE_H
+
+#include "MemBuf.h"
+#include "SwapDir.h"
+
+class DiskFile;
+
+namespace Rock {
+
+class SwapDir;
+
+/// \ingroup Rock
+class IoState: public ::StoreIOState
+{
+public:
+    typedef RefCount<IoState> Pointer;
+
+    IoState(SwapDir *dir, StoreEntry *e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data);
+    virtual ~IoState();
+
+    void file(const RefCount<DiskFile> &aFile);
+
+    // ::StoreIOState API
+    virtual void read_(char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data);
+    virtual void write(char const *buf, size_t size, off_t offset, FREE * free_func);
+    virtual void close();
+
+    /// called by SwapDir when writing is done
+    void finishedWriting(int errFlag);
+
+    int64_t slotSize; ///< db cell size
+    int64_t entrySize; ///< planned or actual stored size for the entry
+
+    MEMPROXY_CLASS(IoState);
+
+private:
+    void startWriting();
+    void callBack(int errflag);
+
+    RefCount<DiskFile> theFile; // "file" responsible for this I/O
+    MemBuf theBuf; // use for write content accumulation only
+};
+
+MEMPROXY_CLASS_INLINE(IoState);
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_IO_STATE_H */
diff --git a/src/fs/rock/RockRebuild.cc b/src/fs/rock/RockRebuild.cc

new file mode 100644 (file)

index 0000000..3074605
--- /dev/null
+++ b/src/fs/rock/RockRebuild.cc
@@ -0,0 +1,140 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 79    Disk IO Routines
+ */
+
+#include "fs/rock/RockRebuild.h"
+#include "fs/rock/RockSwapDir.h"
+
+CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
+
+Rock::Rebuild::Rebuild(SwapDir *dir):
+    sd(dir),
+    dbSize(0),
+    dbEntrySize(0),
+    dbEntryLimit(0),
+    fd(-1),
+    dbOffset(0),
+    fileno(0)
+{
+    assert(sd);
+    memset(&counts, 0, sizeof(counts));
+    dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
+    dbEntrySize = sd->max_objsize;
+    dbEntryLimit = sd->entryLimit();
+}
+
+Rock::Rebuild::~Rebuild()
+{
+    if (fd >= 0)
+        file_close(fd);
+}
+
+/// prepares and initiates entry loading sequence
+void
+Rock::Rebuild::start() {
+    debugs(47,2, HERE << sd->index);
+
+    fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
+    if (fd < 0)
+        failure("cannot open db", errno);
+
+    char buf[SwapDir::HeaderSize];
+    if (read(fd, buf, sizeof(buf)) != SwapDir::HeaderSize)
+        failure("cannot read db header", errno);
+
+    dbOffset = SwapDir::HeaderSize;
+    fileno = 0;    
+
+    checkpoint();
+}
+
+/// quits if done; otherwise continues after a pause
+void
+Rock::Rebuild::checkpoint()
+{
+    if (dbOffset < dbSize)
+        eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
+    else
+        complete();
+}
+
+void
+Rock::Rebuild::Steps(void *data)
+{
+    static_cast<Rebuild*>(data)->steps();
+}
+
+void
+Rock::Rebuild::steps() {
+    debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+        dbOffset << " <= " << dbSize);
+
+    const int maxCount = dbEntryLimit;
+    const int wantedCount = opt_foreground_rebuild ? maxCount : 50;
+    const int stepCount = min(wantedCount, maxCount);
+    for (int i = 0; i < stepCount && dbOffset < dbSize; ++i, ++fileno) {
+        doOneEntry();
+        dbOffset += dbEntrySize;
+
+        if (counts.scancount % 1000 == 0)
+            storeRebuildProgress(sd->index, maxCount, counts.scancount);
+       }
+
+    checkpoint();
+}
+
+void
+Rock::Rebuild::doOneEntry() {
+    debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+        dbOffset << " <= " << dbSize);
+
+    if (lseek(fd, dbOffset, SEEK_SET) < 0)
+        failure("cannot seek to db entry", errno);
+
+    cache_key key[SQUID_MD5_DIGEST_LENGTH];
+    StoreEntry loadedE;
+    if (!storeRebuildLoadEntry(fd, loadedE, key, counts, 0)) {
+        // skip empty slots
+        if (loadedE.swap_filen > 0 || loadedE.swap_file_sz > 0) {
+            counts.invalid++;
+            sd->unlink(fileno);
+        }
+        return;
+       }
+
+    assert(loadedE.swap_filen < dbEntryLimit);
+    if (!storeRebuildKeepEntry(loadedE, key, counts))
+        return;
+
+    counts.objcount++;
+    // loadedE->dump(5);
+
+    //StoreEntry *e =
+    sd->addEntry(fileno, loadedE);
+    //storeDirSwapLog(e, SWAP_LOG_ADD);
+}
+
+void
+Rock::Rebuild::complete() {
+    debugs(47,3, HERE << sd->index);
+    close(fd);
+    StoreController::store_dirs_rebuilding--;
+    storeRebuildComplete(&counts);
+    delete this; // same as cbdataFree
+}
+
+void
+Rock::Rebuild::failure(const char *msg, int errNo) {
+    debugs(47,5, HERE << sd->index << " fileno " << fileno << " at " <<
+        dbOffset << " <= " << dbSize);
+
+    if (errNo)
+        debugs(47,0, "Rock cache_dir rebuild failure: " << xstrerr(errNo));
+    debugs(47,0, "Do you need to run 'squid -z' to initialize storage?");
+
+    assert(sd);
+    fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
+        sd->index, sd->filePath, msg);
+}
diff --git a/src/fs/rock/RockRebuild.h b/src/fs/rock/RockRebuild.h

new file mode 100644 (file)

index 0000000..1f7a9fe
--- /dev/null
+++ b/src/fs/rock/RockRebuild.h
@@ -0,0 +1,45 @@
+#ifndef SQUID_FS_ROCK_REBUILD_H
+#define SQUID_FS_ROCK_REBUILD_H
+
+#include "config.h"
+#include "structs.h"
+
+namespace Rock {
+
+class SwapDir;
+
+/// \ingroup Rock
+/// manages store rebuild process: loading meta information from db on disk
+class Rebuild {
+public:
+    Rebuild(SwapDir *dir);
+    ~Rebuild();
+    void start();
+
+private:
+    CBDATA_CLASS2(Rebuild);
+
+    void checkpoint();
+    void steps();
+    void doOneEntry();
+    void complete();
+    void failure(const char *msg, int errNo = 0);
+
+    SwapDir *sd;
+
+    int64_t dbSize;
+    int dbEntrySize;
+    int dbEntryLimit;
+
+    int fd; // store db file descriptor
+    int64_t dbOffset;
+    int fileno;
+
+    struct _store_rebuild_data counts;
+
+    static void Steps(void *data);
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_REBUILD_H */
diff --git a/src/fs/rock/RockStoreFileSystem.cc b/src/fs/rock/RockStoreFileSystem.cc

new file mode 100644 (file)

index 0000000..35685a1
--- /dev/null
+++ b/src/fs/rock/RockStoreFileSystem.cc
@@ -0,0 +1,53 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 92    Storage File System
+ */
+
+#include "fs/rock/RockStoreFileSystem.h"
+#include "fs/rock/RockSwapDir.h"
+
+
+Rock::StoreFileSystem::StoreFileSystem()
+{
+    FsAdd(*this);
+}
+
+Rock::StoreFileSystem::~StoreFileSystem()
+{
+}
+
+char const *
+Rock::StoreFileSystem::type() const
+{
+    return "rock";
+}
+
+SwapDir *
+Rock::StoreFileSystem::createSwapDir()
+{
+    return new SwapDir();
+}
+
+void
+Rock::StoreFileSystem::done()
+{
+}
+
+void
+Rock::StoreFileSystem::registerWithCacheManager()
+{
+    assert(false); // XXX: implement
+}
+
+void
+Rock::StoreFileSystem::setup()
+{
+    debugs(92,2, HERE << "Will use Rock FS");
+}
+
+void
+Rock::StoreFileSystem::Stats(StoreEntry *sentry)
+{
+    assert(false); // XXX: implement
+}
diff --git a/src/fs/rock/RockStoreFileSystem.h b/src/fs/rock/RockStoreFileSystem.h

new file mode 100644 (file)

index 0000000..cba0610
--- /dev/null
+++ b/src/fs/rock/RockStoreFileSystem.h
@@ -0,0 +1,33 @@
+#ifndef SQUID_FS_ROCK_FS_H
+#define SQUID_FS_ROCK_FS_H
+
+#include "StoreFileSystem.h"
+
+namespace Rock {
+
+/// \ingroup Rock, FileSystems
+class StoreFileSystem: public ::StoreFileSystem
+{
+
+public:
+    static void Stats(StoreEntry * sentry);
+
+    StoreFileSystem();
+    virtual ~StoreFileSystem();
+
+    virtual char const *type() const;
+    virtual SwapDir *createSwapDir();
+    virtual void done();
+    virtual void registerWithCacheManager();
+    virtual void setup();
+
+private:
+    //static Stats Stats_;
+
+    StoreFileSystem(const StoreFileSystem &); // not implemented
+    StoreFileSystem &operator=(const StoreFileSystem &); // not implemented
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_FS_H */
diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc

new file mode 100644 (file)

index 0000000..0bdf805
--- /dev/null
+++ b/src/fs/rock/RockSwapDir.cc
@@ -0,0 +1,554 @@
+/*
+ * $Id$
+ *
+ * DEBUG: section 47    Store Directory Routines
+ */
+
+#include "config.h"
+#include "Parsing.h"
+#include <iomanip>
+#include "MemObject.h"
+#include "DiskIO/DiskIOModule.h"
+#include "DiskIO/DiskIOStrategy.h"
+#include "DiskIO/ReadRequest.h"
+#include "DiskIO/WriteRequest.h"
+#include "fs/rock/RockSwapDir.h"
+#include "fs/rock/RockIoState.h"
+#include "fs/rock/RockIoRequests.h"
+#include "fs/rock/RockRebuild.h"
+
+// must be divisible by 1024 due to cur_size and max_size KB madness
+const int64_t Rock::SwapDir::HeaderSize = 16*1024;
+
+Rock::SwapDir::SwapDir(): ::SwapDir("rock"), filePath(NULL), io(NULL)
+{
+}
+
+Rock::SwapDir::~SwapDir()
+{
+    delete io;
+    safe_free(filePath);
+}
+
+StoreSearch *
+Rock::SwapDir::search(String const url, HttpRequest *)
+{
+    assert(false); return NULL; // XXX: implement
+}
+
+// TODO: encapsulate as a tool; identical to CossSwapDir::create()
+void
+Rock::SwapDir::create()
+{
+    assert(path);
+    assert(filePath);
+
+    debugs (47,3, HERE << "creating in " << path);
+
+    struct stat swap_sb;
+    if (::stat(path, &swap_sb) < 0) {
+        debugs (47, 1, "Creating Rock db directory: " << path);
+#ifdef _SQUID_MSWIN_
+        const int res = mkdir(path);
+#else
+        const int res = mkdir(path, 0700);
+#endif
+        if (res != 0) {
+            debugs(47,0, "Failed to create Rock db dir " << path <<
+                ": " << xstrerror());
+            fatal("Rock Store db creation error");
+               }
+       }
+
+#if SLOWLY_FILL_WITH_ZEROS
+    /* TODO just set the file size */
+    char block[1024]; // max_size is in KB so this is one unit of max_size
+    memset(block, '\0', sizeof(block));
+
+    const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+    for (off_t offset = 0; offset < max_size; ++offset) {
+        if (write(swap, block, sizeof(block)) != sizeof(block)) {
+            debugs(47,0, "Failed to create Rock Store db in " << filePath <<
+                ": " << xstrerror());
+            fatal("Rock Store db creation error");
+               }
+       }
+    close(swap);
+#else
+    const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+    if (swap < 0) {
+        debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+            "; create error: " << xstrerror());
+        fatal("Rock Store db creation error");
+    }
+
+    if (ftruncate(swap, maximumSize()) != 0) {
+        debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+            "; truncate error: " << xstrerror());
+        fatal("Rock Store db creation error");
+    }
+
+    char header[HeaderSize];
+    memset(header, '\0', sizeof(header));
+    if (write(swap, header, sizeof(header)) != sizeof(header)) {
+        debugs(47,0, "Failed to initialize Rock Store db in " << filePath <<
+                "; write error: " << xstrerror());
+        fatal("Rock Store db initialization error");
+    }
+    close(swap);
+#endif
+
+}
+
+void
+Rock::SwapDir::init()
+{
+    debugs(47,2, HERE);
+
+    // XXX: SwapDirs aren't refcounted. We make IORequestor calls, which
+    // are refcounted. We up our count once to avoid implicit delete's.
+    RefCountReference();
+
+    DiskIOModule *m = DiskIOModule::Find("Mmapped"); // TODO: configurable?
+    assert(m);
+    io = m->createStrategy();
+    io->init();
+
+    theFile = io->newFile(filePath);
+    theFile->open(O_RDWR, 0644, this);
+
+    rebuild();
+}
+
+void
+Rock::SwapDir::parse(int anIndex, char *aPath)
+{
+    index = anIndex;
+
+    path = xstrdup(aPath);
+
+    // cache store is located at path/db
+    String fname(path);
+    fname.append("/rock");
+    filePath = xstrdup(fname.termedBuf());
+
+    parseSize();
+    parseOptions(0);
+
+    repl = createRemovalPolicy(Config.replPolicy);
+
+    validateOptions();
+}
+
+void
+Rock::SwapDir::reconfigure(int, char *)
+{
+    parseSize();
+    parseOptions(1);
+    // TODO: can we reconfigure the replacement policy (repl)?
+    validateOptions();
+}
+
+/// parse maximum db disk size
+void
+Rock::SwapDir::parseSize()
+{
+    max_size = GetInteger() << 10; // MBytes to KBytes
+    if (max_size < 0)
+        fatal("negative Rock cache_dir size value");
+}
+
+/// check the results of the configuration; only level-0 debugging works here
+void
+Rock::SwapDir::validateOptions()
+{
+    if (max_objsize <= 0)
+        fatal("Rock store requires a positive max-size");
+
+    const int64_t eLimitHi = 0xFFFFFF; // Core sfileno maximum
+    const int64_t eLimitLo = map.entryLimit(); // dynamic shrinking unsupported
+    const int64_t eWanted = (maximumSize() - HeaderSize)/max_objsize;
+    const int64_t eAllowed = min(max(eLimitLo, eWanted), eLimitHi);
+
+    map.resize(eAllowed); // the map may decide to use an even lower limit
+
+    // Note: We could try to shrink max_size now. It is stored in KB so we
+    // may not be able to make it match the end of the last entry exactly.
+    const int64_t mapRoundWasteMx = max_objsize*sizeof(long)*8;
+    const int64_t sizeRoundWasteMx = 1024; // max_size stored in KB
+    const int64_t roundingWasteMx = max(mapRoundWasteMx, sizeRoundWasteMx);
+    const int64_t totalWaste = maximumSize() - diskOffsetLimit();
+    assert(diskOffsetLimit() <= maximumSize());
+
+    // warn if maximum db size is not reachable due to sfileno limit
+    if (map.entryLimit() == map.AbsoluteEntryLimit() &&
+        totalWaste > roundingWasteMx) {
+        debugs(47, 0, "Rock store cache_dir[" << index << "]:");
+        debugs(47, 0, "\tmaximum number of entries: " << map.entryLimit());
+        debugs(47, 0, "\tmaximum entry size: " << max_objsize << " bytes");
+        debugs(47, 0, "\tmaximum db size: " << maximumSize() << " bytes");
+        debugs(47, 0, "\tusable db size:  " << diskOffsetLimit() << " bytes");
+        debugs(47, 0, "\tdisk space waste: " << totalWaste << " bytes");
+        debugs(47, 0, "WARNING: Rock store config wastes space.");
+       }
+
+    if (!repl) {
+        debugs(47,0, "ERROR: Rock cache_dir[" << index << "] " <<
+            "lacks replacement policy and will overflow.");
+        // not fatal because it can be added later
+       }
+
+    cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+}
+
+void
+Rock::SwapDir::rebuild() {
+    ++StoreController::store_dirs_rebuilding;
+    Rebuild *r = new Rebuild(this);
+    r->start(); // will delete self when done
+}
+
+/* Add a new object to the cache with empty memory copy and pointer to disk
+ * use to rebuild store from disk. XXX: dupes UFSSwapDir::addDiskRestore */
+StoreEntry *
+Rock::SwapDir::addEntry(int fileno, const StoreEntry &from)
+{
+    /* if you call this you'd better be sure file_number is not
+     * already in use! */
+    StoreEntry *e = new StoreEntry(); // TODO: optimize by reusing "from"?
+    debugs(47, 5, HERE << e << ' ' << storeKeyText((const cache_key*)from.key)
+       << ", fileno="<< std::setfill('0') << std::hex << std::uppercase <<
+       std::setw(8) << fileno);
+    e->store_status = STORE_OK;
+    e->setMemStatus(NOT_IN_MEMORY);
+    e->swap_status = SWAPOUT_DONE;
+    e->swap_filen = fileno;
+    e->swap_dirn = index;
+    e->swap_file_sz = from.swap_file_sz;
+    e->lock_count = 0;
+    e->lastref = from.lastref;
+    e->timestamp = from.timestamp;
+    e->expires = from.expires;
+    e->lastmod = from.lastmod;
+    e->refcount = from.refcount;
+    e->flags = from.flags;
+    EBIT_SET(e->flags, ENTRY_CACHABLE);
+    EBIT_CLR(e->flags, RELEASE_REQUEST);
+    EBIT_CLR(e->flags, KEY_PRIVATE);
+    e->ping_status = PING_NONE;
+    EBIT_CLR(e->flags, ENTRY_VALIDATED);
+    map.use(e->swap_filen);
+    e->hashInsert((const cache_key*)from.key); /* do it after we clear KEY_PRIVATE */
+    trackReferences(*e);
+    return e;
+}
+
+
+int
+Rock::SwapDir::canStore(const StoreEntry &e) const
+{
+    debugs(47,8, HERE << e.swap_file_sz << " ? " << max_objsize);
+
+    if (EBIT_TEST(e.flags, ENTRY_SPECIAL))
+        return -1;
+
+    if (io->shedLoad())
+        return -1;
+
+    return io->load();
+}
+
+StoreIOState::Pointer
+Rock::SwapDir::createStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
+{
+    if (!theFile || theFile->error()) {
+        debugs(47,4, HERE << theFile);
+        return NULL;
+    }
+
+    if (full()) {
+        maintain();
+        if (full()) // maintain() above warns when it fails
+            return NULL;
+    }
+
+    IoState *sio = new IoState(this, &e, cbFile, cbIo, data);
+
+    sio->swap_dirn = index;
+    sio->swap_filen = map.useNext();
+    sio->offset_ = diskOffset(sio->swap_filen);
+    sio->entrySize = e.objectLen() + e.mem_obj->swap_hdr_sz;
+
+    debugs(47,5, HERE << "dir " << index << " created new fileno " <<
+        std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+        sio->swap_filen << std::dec << " at " << sio->offset_ << " size: " <<
+        sio->entrySize << " (" << e.objectLen() << '+' <<
+        e.mem_obj->swap_hdr_sz << ")");
+
+    assert(sio->offset_ + sio->entrySize <= diskOffsetLimit());
+
+    sio->file(theFile);
+
+    trackReferences(e);
+    return sio;
+}
+
+int64_t
+Rock::SwapDir::diskOffset(int filen) const
+{
+    return HeaderSize + max_objsize*filen;
+}
+
+int64_t
+Rock::SwapDir::diskOffsetLimit() const
+{
+    return diskOffset(map.entryLimit());
+}
+
+StoreIOState::Pointer
+Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
+{
+    if (!theFile || theFile->error()) {
+        debugs(47,4, HERE << theFile);
+        return NULL;
+    }
+
+    IoState *sio = new IoState(this, &e, cbFile, cbIo, data);
+
+    sio->swap_dirn = index;
+    sio->swap_filen = e.swap_filen;
+    debugs(47,5, HERE << "dir " << index << " has old fileno: " <<
+        std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+        sio->swap_filen);
+
+    assert(map.valid(sio->swap_filen));
+    sio->offset_ = diskOffset(sio->swap_filen);
+    sio->entrySize = e.swap_file_sz;
+    assert(sio->entrySize <= max_objsize);
+
+    if (!map.has(sio->swap_filen)) {
+        debugs(47,1, HERE << "bug: dir " << index << " lost fileno: " <<
+            std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
+            sio->swap_filen);
+        return NULL;
+    }
+
+    assert(sio->offset_ + sio->entrySize <= diskOffsetLimit());
+
+    sio->file(theFile);
+    return sio;
+}
+
+void
+Rock::SwapDir::ioCompletedNotification()
+{
+    if (!theFile) {
+        debugs(47, 1, HERE << filePath << ": initialization failure or " <<
+            "premature close of rock db file");
+        fatalf("Rock cache_dir failed to initialize db file: %s", filePath);
+    }
+
+    if (theFile->error()) {
+        debugs(47, 1, HERE << filePath << ": " << xstrerror());
+        fatalf("Rock cache_dir failed to open db file: %s", filePath);
+       }
+
+    cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+
+    // TODO: lower debugging level
+    debugs(47,1, "Rock cache_dir[" << index << "] limits: " << 
+        std::setw(12) << maximumSize() << " disk bytes and " <<
+        std::setw(7) << map.entryLimit() << " entries");
+}
+
+void
+Rock::SwapDir::closeCompleted()
+{
+    theFile = NULL;
+}
+
+void
+Rock::SwapDir::readCompleted(const char *buf, int rlen, int errflag, RefCount< ::ReadRequest> r)
+{
+    ReadRequest *request = dynamic_cast<Rock::ReadRequest*>(r.getRaw());
+    assert(request);
+    IoState::Pointer sio = request->sio;
+
+    // do not increment sio->offset_: callers always supply relative offset
+
+    StoreIOState::STRCB *callback = sio->read.callback;
+    assert(callback);
+    sio->read.callback = NULL;
+    void *cbdata;
+    if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata))
+        callback(cbdata, r->buf, rlen, sio.getRaw());
+}
+
+void
+Rock::SwapDir::writeCompleted(int errflag, size_t rlen, RefCount< ::WriteRequest> r)
+{
+    Rock::WriteRequest *request = dynamic_cast<Rock::WriteRequest*>(r.getRaw());
+    assert(request);
+    assert(request->sio !=  NULL);
+    IoState &sio = *request->sio;
+    if (errflag != DISK_OK)
+        map.clear(sio.swap_filen); // TODO: test by forcing failure
+    // else sio.offset_ += rlen;
+
+    // TODO: always compute cur_size based on map, do not store it
+    cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+    assert(sio.offset_ <= diskOffsetLimit()); // post-factum check
+
+    sio.finishedWriting(errflag);
+}
+
+bool
+Rock::SwapDir::full() const
+{
+    return map.full();
+}
+
+void
+Rock::SwapDir::updateSize(int64_t size, int sign)
+{
+    // it is not clear what store_swap_size really is; TODO: move low-level
+       // size maintenance to individual store dir types
+    cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+    store_swap_size = cur_size;
+
+    if (sign > 0)
+        ++n_disk_objects;
+    else if (sign < 0)
+        --n_disk_objects;
+}
+
+// storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT,
+// but it should not happen for us
+void
+Rock::SwapDir::diskFull() {
+    debugs(20,1, "Internal ERROR: No space left error with rock cache_dir: " <<
+        filePath);
+}
+
+/// purge while full(); it should be sufficient to purge just one
+void
+Rock::SwapDir::maintain()
+{
+    debugs(47,3, HERE << "cache_dir[" << index << "] guards: " << 
+        StoreController::store_dirs_rebuilding << !repl << !full());
+
+    // XXX: UFSSwapDir::maintain says we must quit during rebuild
+    if (StoreController::store_dirs_rebuilding)
+        return;
+
+    if (!repl)
+        return; // no means (cannot find a victim)
+
+    if (!full())
+        return; // no need (to find a victim)
+
+    debugs(47,3, HERE << "cache_dir[" << index << "] state: " << 
+        map.full() << ' ' << currentSize() << " < " << diskOffsetLimit());
+
+    // Hopefully, we find a removable entry much sooner (TODO: use time?)
+    const int maxProbed = 10000;
+    RemovalPurgeWalker *walker = repl->PurgeInit(repl, maxProbed);
+
+    // It really should not take that long, but this will stop "infinite" loops
+    const int maxFreed = 1000;
+    int freed = 0;
+    // TODO: should we purge more than needed to minimize overheads?
+    for (; freed < maxFreed && full(); ++freed) {
+        if (StoreEntry *e = walker->Next(walker))
+            e->release(); // will call our unlink() method
+               else
+            break; // no more objects
+       }
+
+    debugs(47,2, HERE << "Rock cache_dir[" << index << "] freed " << freed <<
+        " scanned " << walker->scanned << '/' << walker->locked);
+
+    walker->Done(walker);
+
+    if (full()) {
+        debugs(47,0, "ERROR: Rock cache_dir[" << index << "] " <<
+            "is still full after freeing " << freed << " entries. A bug?");
+       }
+}
+
+void
+Rock::SwapDir::reference(StoreEntry &e)
+{
+    debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+    if (repl->Referenced)
+        repl->Referenced(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::dereference(StoreEntry &e)
+{
+    debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+    if (repl->Dereferenced)
+        repl->Dereferenced(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::unlink(int fileno)
+{
+    debugs(47,5, HERE << index << ' ' << fileno);
+    if (map.has(fileno)) {
+        map.clear(fileno);
+        cur_size = (HeaderSize + max_objsize * map.entryCount()) >> 10;
+        // XXX: update store
+       }
+}
+
+void
+Rock::SwapDir::unlink(StoreEntry &e)
+{
+    debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+    ignoreReferences(e);
+    unlink(e.swap_filen);
+}
+
+void
+Rock::SwapDir::trackReferences(StoreEntry &e)
+{
+    debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+    repl->Add(repl, &e, &e.repl);
+}
+
+
+void
+Rock::SwapDir::ignoreReferences(StoreEntry &e)
+{
+    debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
+    repl->Remove(repl, &e, &e.repl);
+}
+
+void
+Rock::SwapDir::statfs(StoreEntry &e) const
+{
+    storeAppendPrintf(&e, "\n");
+    storeAppendPrintf(&e, "Maximum Size: %"PRIu64" KB\n", max_size);
+    storeAppendPrintf(&e, "Current Size: %"PRIu64" KB %.2f%%\n", cur_size,
+                      100.0 * cur_size / max_size);
+
+    storeAppendPrintf(&e, "Maximum entries: %9d\n", map.entryLimit());
+    storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n",
+        map.entryCount(), (100.0 * map.entryCount() / map.entryLimit()));
+
+    storeAppendPrintf(&e, "Pending operations: %d out of %d\n",
+        store_open_disk_fd, Config.max_open_disk_fds);
+
+    storeAppendPrintf(&e, "Flags:");
+
+    if (flags.selected)
+        storeAppendPrintf(&e, " SELECTED");
+
+    if (flags.read_only)
+        storeAppendPrintf(&e, " READ-ONLY");
+
+    storeAppendPrintf(&e, "\n");
+
+}
diff --git a/src/fs/rock/RockSwapDir.h b/src/fs/rock/RockSwapDir.h

new file mode 100644 (file)

index 0000000..a70d21f
--- /dev/null
+++ b/src/fs/rock/RockSwapDir.h
@@ -0,0 +1,82 @@
+#ifndef SQUID_FS_ROCK_SWAP_DIR_H
+#define SQUID_FS_ROCK_SWAP_DIR_H
+
+#include "SwapDir.h"
+#include "DiskIO/IORequestor.h"
+#include "rock/RockDirMap.h"
+
+class DiskIOStrategy;
+class DiskFile;
+class ReadRequest;
+class WriteRequest;
+
+namespace Rock {
+
+class Rebuild;
+
+/// \ingroup Rock
+class SwapDir: public ::SwapDir, public IORequestor
+{
+public:
+    SwapDir();
+    virtual ~SwapDir();
+
+    /* public ::SwapDir API */
+    virtual void reconfigure(int, char *);
+    virtual StoreSearch *search(String const url, HttpRequest *);
+
+protected:
+    /* protected ::SwapDir API */
+    virtual void create();
+    virtual void init();
+    virtual int canStore(StoreEntry const &) const;
+    virtual StoreIOState::Pointer createStoreIO(StoreEntry &, StoreIOState::STFNCB *, StoreIOState::STIOCB *, void *);
+    virtual StoreIOState::Pointer openStoreIO(StoreEntry &, StoreIOState::STFNCB *, StoreIOState::STIOCB *, void *);
+    virtual void maintain();
+    virtual void updateSize(int64_t size, int sign);
+    virtual void diskFull();
+    virtual void reference(StoreEntry &e);
+    virtual void dereference(StoreEntry &e);
+    virtual void unlink(StoreEntry &e);
+    virtual void statfs(StoreEntry &e) const;
+
+    /* IORequestor API */
+    virtual void ioCompletedNotification();
+    virtual void closeCompleted();
+    virtual void readCompleted(const char *buf, int len, int errflag, RefCount< ::ReadRequest>);
+    virtual void writeCompleted(int errflag, size_t len, RefCount< ::WriteRequest>);
+
+    virtual void parse(int index, char *path);
+    void parseSize(); ///< parses anonymous cache_dir size option
+    void validateOptions(); ///< warns of configuration problems; may quit
+
+    void rebuild(); ///< starts loading and validating stored entry metadata
+    void unlink(int fileno); ///< used for entries failed to load in rebuild
+    ///< used to add entries successfully loaded during rebuild
+    StoreEntry *addEntry(int fileno, const StoreEntry &from);
+
+    bool full() const; ///< no more entries can be stored without purging
+    void trackReferences(StoreEntry &e); ///< add to replacement policy scope
+    void ignoreReferences(StoreEntry &e); ///< delete from repl policy scope
+
+    // TODO: change cur_size and max_size type to stop this madness
+    int64_t currentSize() const { return static_cast<int64_t>(cur_size) << 10;}
+    int64_t maximumSize() const { return static_cast<int64_t>(max_size) << 10;}
+    int64_t diskOffset(int filen) const;
+    int64_t diskOffsetLimit() const;
+    int entryLimit() const { return map.entryLimit(); }
+
+    friend class Rebuild;
+    const char *filePath; ///< location of cache storage file inside path/
+
+private:
+    DiskIOStrategy *io;
+    RefCount<DiskFile> theFile; ///< cache storage for this cache_dir
+    DirMap map;
+
+    static const int64_t HeaderSize; ///< on-disk db header size
+};
+
+} // namespace Rock
+
+#endif /* SQUID_FS_ROCK_SWAP_DIR_H */
diff --git a/src/fs/ufs/ufscommon.cc b/src/fs/ufs/ufscommon.cc

index 5c89a1d2ee12c924f7009b5b46795b4bdc272321..cd30b00b74523a5218536c6e524b42adf749bc59 100644 (file)
--- a/src/fs/ufs/ufscommon.cc
+++ b/src/fs/ufs/ufscommon.cc
@@ -362,63 +362,14 @@ RebuildState::rebuildStep()
          rebuildFromDirectory();
  }
  
-struct InitStoreEntry : public unary_function<StoreMeta, void> {
-    InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
-
-    void operator()(StoreMeta const &x) {
-        switch (x.getType()) {
-
-        case STORE_META_KEY:
-            assert(x.length == SQUID_MD5_DIGEST_LENGTH);
-            memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
-            break;
-
-        case STORE_META_STD:
-            struct old_metahdr {
-                time_t timestamp;
-                time_t lastref;
-                time_t expires;
-                time_t lastmod;
-                size_t swap_file_sz;
-                u_short refcount;
-                u_short flags;
-            } *tmp;
-            tmp = (struct old_metahdr *)x.value;
-            assert(x.length == STORE_HDR_METASIZE_OLD);
-            what->timestamp = tmp->timestamp;
-            what->lastref = tmp->lastref;
-            what->expires = tmp->expires;
-            what->lastmod = tmp->lastmod;
-            what->swap_file_sz = tmp->swap_file_sz;
-            what->refcount = tmp->refcount;
-            what->flags = tmp->flags;
-            break;
-
-        case STORE_META_STD_LFS:
-            assert(x.length == STORE_HDR_METASIZE);
-            memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
-            break;
-
-        default:
-            break;
-        }
-    }
-
-    StoreEntry *what;
-    cache_key *index;
-};
-
  void
  RebuildState::rebuildFromDirectory()
  {
-    LOCAL_ARRAY(char, hdr_buf, SM_PAGE_SIZE);
      currentEntry(NULL);
      cache_key key[SQUID_MD5_DIGEST_LENGTH];
  
      struct stat sb;
-    int swap_hdr_len;
      int fd = -1;
-    StoreMeta *tlv_list;
      assert(this != NULL);
      debugs(47, 3, "commonUfsDirRebuildFromDirectory: DIR #" << sd->index);
  
@@ -447,114 +398,22 @@ RebuildState::rebuildFromDirectory()
              continue;
          }
  
-        if ((++counts.scancount & 0xFFFF) == 0)
-            debugs(47, 3, "  " << sd->path  << " " << std::setw(7) << counts.scancount  << " files opened so far.");
-        debugs(47, 9, "file_in: fd=" << fd  << " "<< std::setfill('0') << std::hex << std::uppercase << std::setw(8) << filn);
-
-
-        statCounter.syscalls.disk.reads++;
-
-        int len;
-
-        if ((len = FD_READ_METHOD(fd, hdr_buf, SM_PAGE_SIZE)) < 0) {
-            debugs(47, 1, "commonUfsDirRebuildFromDirectory: read(FD " << fd << "): " << xstrerror());
-            file_close(fd);
-            store_open_disk_fd--;
-            fd = -1;
-            continue;
-        }
+        StoreEntry tmpe;
+        const bool loaded = storeRebuildLoadEntry(fd, tmpe, key, counts,
+            (int64_t)sb.st_size);
  
          file_close(fd);
          store_open_disk_fd--;
          fd = -1;
-        swap_hdr_len = 0;
-
-        StoreMetaUnpacker aBuilder(hdr_buf, len, &swap_hdr_len);
-
-        if (!aBuilder.isBufferSane()) {
-            debugs(47, 1, "commonUfsDirRebuildFromDirectory: Swap data buffer length is not sane.");
-            /* XXX shouldn't this be a call to commonUfsUnlink ? */
-            sd->unlinkFile ( filn);
-            continue;
-        }
-
-        tlv_list = aBuilder.createStoreMeta ();
  
-        if (tlv_list == NULL) {
-            debugs(47, 1, "commonUfsDirRebuildFromDirectory: failed to get meta data");
-            /* XXX shouldn't this be a call to commonUfsUnlink ? */
-            sd->unlinkFile (filn);
+        if (!loaded) {
+            // XXX: shouldn't this be a call to commonUfsUnlink?
+            sd->unlinkFile(filn); // should we unlink in all failure cases?
              continue;
          }
  
-        debugs(47, 3, "commonUfsDirRebuildFromDirectory: successful swap meta unpacking");
-        memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
-
-        StoreEntry tmpe;
-        InitStoreEntry visitor(&tmpe, key);
-        for_each(*tlv_list, visitor);
-        storeSwapTLVFree(tlv_list);
-        tlv_list = NULL;
-
-        if (storeKeyNull(key)) {
-            debugs(47, 1, "commonUfsDirRebuildFromDirectory: NULL key");
-            sd->unlinkFile(filn);
+        if (!storeRebuildKeepEntry(tmpe, key, counts))
              continue;
-        }
-
-        tmpe.key = key;
-        /* check sizes */
-
-        if (tmpe.swap_file_sz == 0) {
-            tmpe.swap_file_sz = (uint64_t) sb.st_size;
-        } else if (tmpe.swap_file_sz == (uint64_t)(sb.st_size - swap_hdr_len)) {
-            tmpe.swap_file_sz = (uint64_t) sb.st_size;
-        } else if (tmpe.swap_file_sz != (uint64_t)sb.st_size) {
-            debugs(47, 1, "commonUfsDirRebuildFromDirectory: SIZE MISMATCH " <<
-                   tmpe.swap_file_sz << "!=" <<
-                   sb.st_size);
-
-            sd->unlinkFile(filn);
-            continue;
-        }
-
-        if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
-            sd->unlinkFile(filn);
-            counts.badflags++;
-            continue;
-        }
-
-        /* this needs to become
-         * 1) unpack url
-         * 2) make synthetic request with headers ?? or otherwise search
-         * for a matching object in the store
-         * TODO FIXME change to new async api
-         * TODO FIXME I think there is a race condition here with the
-         * async api :
-         * store A reads in object foo, searchs for it, and finds nothing.
-         * store B reads in object foo, searchs for it, finds nothing.
-         * store A gets called back with nothing, so registers the object
-         * store B gets called back with nothing, so registers the object,
-         * which will conflict when the in core index gets around to scanning
-         * store B.
-         *
-         * this suggests that rather than searching for duplicates, the
-         * index rebuild should just assume its the most recent accurate
-         * store entry and whoever indexes the stores handles duplicates.
-         */
-        e = Store::Root().get(key);
-
-        if (e && e->lastref >= tmpe.lastref) {
-            /* key already exists, current entry is newer */
-            /* keep old, ignore new */
-            counts.dupcount++;
-            continue;
-        } else if (NULL != e) {
-            /* URL already exists, this swapfile not being used */
-            /* junk old, load new */
-            e->release();      /* release old entry */
-            counts.dupcount++;
-        }
  
          counts.objcount++;
          // tmpe.dump(5);
diff --git a/src/protos.h b/src/protos.h

index 7abeb22046373e8d855a2a38fae31e4e4a1e5bc8..57ba76a433c5dfe5750b6cd81b711a36b2ea9b41 100644 (file)
--- a/src/protos.h
+++ b/src/protos.h
@@ -535,6 +535,12 @@ SQUIDCEXTERN void storeRebuildStart(void);
  SQUIDCEXTERN void storeRebuildComplete(struct _store_rebuild_data *);
  SQUIDCEXTERN void storeRebuildProgress(int sd_index, int total, int sofar);
  
+/// tries to load and validate entry metadata; returns tmp entry on success
+SQUIDCEXTERN bool storeRebuildLoadEntry(int fd, StoreEntry &e, cache_key *key, struct _store_rebuild_data &counts, uint64_t expectedSize);
+/// checks whether the loaded entry should be kept; updates counters
+SQUIDCEXTERN bool storeRebuildKeepEntry(const StoreEntry &e, const cache_key *key, struct _store_rebuild_data &counts);
+
+
  /*
   * store_swapin.c
   */
diff --git a/src/stmem.cc b/src/stmem.cc

index 95257d6eb0653a11d7dfddc62145a7fb5ddc419c..426d9f4d745f1066f3a567fa14e862a70e3d0855 100644 (file)
--- a/src/stmem.cc
+++ b/src/stmem.cc
@@ -85,6 +85,7 @@ mem_hdr::freeContent()
  {
      nodes.destroy(SplayNode<mem_node *>::DefaultFree);
      inmem_hi = 0;
+    debugs(19, 9, HERE << this << " hi: " << inmem_hi);
  }
  
  bool
@@ -144,12 +145,15 @@ mem_hdr::writeAvailable(mem_node *aNode, int64_t location, size_t amount, char c
  
      memcpy(aNode->nodeBuffer.data + aNode->nodeBuffer.length, source, copyLen);
  
+    debugs(19, 9, HERE << this << " hi: " << inmem_hi);
      if (inmem_hi <= location)
          inmem_hi = location + copyLen;
  
      /* Adjust the ptr and len according to what was deposited in the page */
      aNode->nodeBuffer.length += copyLen;
  
+    debugs(19, 9, HERE << this << " hi: " << inmem_hi);
+    debugs(19, 9, HERE << this << " hi: " << endOffset());
      return copyLen;
  }
  
@@ -176,7 +180,7 @@ mem_hdr::makeAppendSpace()
  void
  mem_hdr::internalAppend(const char *data, int len)
  {
-    debugs(19, 6, "memInternalAppend: len " << len);
+    debugs(19, 6, "memInternalAppend: " << this << " len " << len);
  
      while (len > 0) {
          makeAppendSpace();
@@ -245,7 +249,7 @@ mem_hdr::copy(StoreIOBuffer const &target) const
  {
  
      assert(target.range().end > target.range().start);
-    debugs(19, 6, "memCopy: " << target.range());
+    debugs(19, 6, "memCopy: " << this << " " << target.range());
  
      /* we shouldn't ever ask for absent offsets */
  
@@ -361,7 +365,7 @@ bool
  mem_hdr::write (StoreIOBuffer const &writeBuffer)
  {
      PROF_start(mem_hdr_write);
-    debugs(19, 6, "mem_hdr::write: " << writeBuffer.range() << " object end " << endOffset());
+    debugs(19, 6, "mem_hdr::write: " << this << " " << writeBuffer.range() << " object end " << endOffset());
  
      if (unionNotEmpty(writeBuffer)) {
          debugs(19,0,"mem_hdr::write: writeBuffer: " << writeBuffer.range());
@@ -391,7 +395,9 @@ mem_hdr::write (StoreIOBuffer const &writeBuffer)
  }
  
  mem_hdr::mem_hdr() : inmem_hi(0)
-{}
+{
+    debugs(19, 9, HERE << this << " hi: " << inmem_hi);
+}
  
  mem_hdr::~mem_hdr()
  {
diff --git a/src/store.cc b/src/store.cc

index af732464eb7c999ebc8fcdfeefb2ca0130f5cba7..72f7d0829d14e3e45fa5efd5160aceefc6deb0f5 100644 (file)
--- a/src/store.cc
+++ b/src/store.cc
@@ -317,6 +317,7 @@ StoreEntry::storeClientType() const
  
          if (mem_obj->inmem_lo == 0 && !isEmpty()) {
              if (swap_status == SWAPOUT_DONE) {
+                debugs(20,7, HERE << mem_obj << " lo: " << mem_obj->inmem_lo << " hi: " << mem_obj->endOffset() << " size: " << mem_obj->object_sz);
                  if (mem_obj->endOffset() == mem_obj->object_sz) {
                      /* hot object fully swapped in */
                      return STORE_MEM_CLIENT;
@@ -352,7 +353,8 @@ StoreEntry::storeClientType() const
      return STORE_DISK_CLIENT;
  }
  
-StoreEntry::StoreEntry()
+StoreEntry::StoreEntry():
+        swap_file_sz(0)
  {
      debugs(20, 3, HERE << "new StoreEntry " << this);
      mem_obj = NULL;
@@ -363,7 +365,8 @@ StoreEntry::StoreEntry()
      swap_dirn = -1;
  }
  
-StoreEntry::StoreEntry(const char *aUrl, const char *aLogUrl)
+StoreEntry::StoreEntry(const char *aUrl, const char *aLogUrl):
+        swap_file_sz(0)
  {
      debugs(20, 3, HERE << "new StoreEntry " << this);
      mem_obj = new MemObject(aUrl, aLogUrl);
@@ -1594,7 +1597,7 @@ StoreEntry::setMemStatus(mem_status_t new_status)
              debugs(20, 4, "StoreEntry::setMemStatus: not inserting special " << mem_obj->url << " into policy");
          } else {
              mem_policy->Add(mem_policy, this, &mem_obj->repl);
-            debugs(20, 4, "StoreEntry::setMemStatus: inserted mem node " << mem_obj->url);
+            debugs(20, 4, "StoreEntry::setMemStatus: inserted mem node " << mem_obj->url << " key: " << getMD5Text());
          }
  
          hot_obj_count++;
@@ -1802,15 +1805,45 @@ StoreEntry::replaceHttpReply(HttpReply *rep)
  char const *
  StoreEntry::getSerialisedMetaData()
  {
+    const size_t swap_hdr_sz0 = storeSwapMetaSize(this);
+    assert (swap_hdr_sz0 >= 0);
+    mem_obj->swap_hdr_sz = (size_t) swap_hdr_sz0;
+    // now we can use swap_hdr_sz to calculate swap_file_sz
+    // so that storeSwapMetaBuild/Pack can pack corrent swap_file_sz
+    swap_file_sz = objectLen() + mem_obj->swap_hdr_sz;
+
      StoreMeta *tlv_list = storeSwapMetaBuild(this);
      int swap_hdr_sz;
      char *result = storeSwapMetaPack(tlv_list, &swap_hdr_sz);
+    assert(static_cast<int>(swap_hdr_sz0) == swap_hdr_sz);
      storeSwapTLVFree(tlv_list);
-    assert (swap_hdr_sz >= 0);
-    mem_obj->swap_hdr_sz = (size_t) swap_hdr_sz;
      return result;
  }
  
+/*
+ * Calculate TLV list size for a StoreEntry
+ * XXX: Must match the actual storeSwapMetaBuild result size
+ */
+size_t
+storeSwapMetaSize(const StoreEntry * e)
+{
+    size_t size = 0;
+    ++size; // STORE_META_OK
+    size += sizeof(int); // size of header to follow
+
+    const size_t pfx = sizeof(char) + sizeof(int); // in the start of list entries
+
+    size += pfx + SQUID_MD5_DIGEST_LENGTH;
+    size += pfx + STORE_HDR_METASIZE;
+    size += pfx + strlen(e->url()) + 1;
+
+    if (const char *vary = e->mem_obj->vary_headers)
+        size += pfx + strlen(vary) + 1;
+
+    debugs(20, 3, "storeSwapMetaSize(" << e->url() << "): " << size);
+    return size;
+}
+
  bool
  StoreEntry::swapoutPossible()
  {
diff --git a/src/store_rebuild.cc b/src/store_rebuild.cc

index 716e242b07f171c35a82dda9077e0be4756c2d94..c0c2227636de3cb19d687ae76701b11dd856d3e2 100644 (file)
--- a/src/store_rebuild.cc
+++ b/src/store_rebuild.cc
@@ -74,7 +74,6 @@ storeCleanup(void *datanotused)
      size_t statCount = 500;
  
      while (statCount-- && !currentSearch->isDone() && currentSearch->next()) {
-        ++validated;
          StoreEntry *e;
  
          e = currentSearch->currentItem();
@@ -99,7 +98,10 @@ storeCleanup(void *datanotused)
           * Only set the file bit if we know its a valid entry
           * otherwise, set it in the validation procedure
           */
-        e->store()->updateSize(e->swap_file_sz, 1);
+
+
+        if (e->swap_status == SWAPOUT_DONE)
+            e->store()->updateSize(e->swap_file_sz, 1);
  
          if ((++validated & 0x3FFFF) == 0)
              /* TODO format the int with with a stream operator */
@@ -231,3 +233,165 @@ storeRebuildProgress(int sd_index, int total, int sofar)
      debugs(20, 1, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete");
      last_report = squid_curtime;
  }
+
+#include "fde.h"
+#include "StoreMetaUnpacker.h"
+#include "StoreMeta.h"
+#include "Generic.h"
+
+struct InitStoreEntry : public unary_function<StoreMeta, void> {
+    InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
+
+    void operator()(StoreMeta const &x) {
+        switch (x.getType()) {
+
+        case STORE_META_KEY:
+            assert(x.length == SQUID_MD5_DIGEST_LENGTH);
+            memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
+            break;
+
+        case STORE_META_STD:
+            struct old_metahdr {
+                time_t timestamp;
+                time_t lastref;
+                time_t expires;
+                time_t lastmod;
+                size_t swap_file_sz;
+                u_short refcount;
+                u_short flags;
+            } *tmp;
+            tmp = (struct old_metahdr *)x.value;
+            assert(x.length == STORE_HDR_METASIZE_OLD);
+            what->timestamp = tmp->timestamp;
+            what->lastref = tmp->lastref;
+            what->expires = tmp->expires;
+            what->lastmod = tmp->lastmod;
+            what->swap_file_sz = tmp->swap_file_sz;
+            what->refcount = tmp->refcount;
+            what->flags = tmp->flags;
+            break;
+
+        case STORE_META_STD_LFS:
+            assert(x.length == STORE_HDR_METASIZE);
+            memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
+            break;
+
+        default:
+            break;
+        }
+    }
+
+    StoreEntry *what;
+    cache_key *index;
+};
+
+bool
+storeRebuildLoadEntry(int fd, StoreEntry &tmpe, cache_key *key,
+                      struct _store_rebuild_data &counts, uint64_t expectedSize)
+{
+    if (fd < 0)
+        return false;
+
+    char hdr_buf[SM_PAGE_SIZE];
+
+    ++counts.scancount;
+    statCounter.syscalls.disk.reads++;
+    int len;
+    if ((len = FD_READ_METHOD(fd, hdr_buf, SM_PAGE_SIZE)) < 0) {
+        debugs(47, 1, HERE << "failed to read swap entry meta data: " << xstrerror());
+        return false;
+    }
+
+    int swap_hdr_len = 0;
+    StoreMetaUnpacker aBuilder(hdr_buf, len, &swap_hdr_len);
+    if (aBuilder.isBufferZero()) {
+        debugs(47,5, HERE << "skipping empty record.");
+        return false;
+    }
+
+    if (!aBuilder.isBufferSane()) {
+        debugs(47,1, "Warning: Ignoring malformed cache entry.");
+        return false;
+    }
+
+    StoreMeta *tlv_list = aBuilder.createStoreMeta();
+    if (!tlv_list) {
+        debugs(47, 1, HERE << "failed to get swap entry meta data list");
+        return false;
+    }
+
+    debugs(47,7, HERE << "successful swap meta unpacking");
+    memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
+
+    InitStoreEntry visitor(&tmpe, key);
+    for_each(*tlv_list, visitor);
+    storeSwapTLVFree(tlv_list);
+    tlv_list = NULL;
+
+    if (storeKeyNull(key)) {
+        debugs(47,1, HERE << "NULL swap entry key");
+        return false;
+    }
+
+    tmpe.key = key;
+    /* check sizes */
+
+    if (expectedSize > 0) {
+        if (tmpe.swap_file_sz == 0) {
+            tmpe.swap_file_sz = expectedSize;
+        } else if (tmpe.swap_file_sz == (uint64_t)(expectedSize - swap_hdr_len)) {
+            tmpe.swap_file_sz = expectedSize;
+        } else if (tmpe.swap_file_sz != expectedSize) {
+            debugs(47, 1, HERE << "swap entry SIZE MISMATCH " <<
+                   tmpe.swap_file_sz << "!=" << expectedSize);
+            return false;
+        }
+    }
+
+    if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
+        counts.badflags++;
+        return false;
+    }
+
+    return true;
+}
+
+bool
+storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key,
+                      struct _store_rebuild_data &counts)
+{
+    /* this needs to become
+     * 1) unpack url
+     * 2) make synthetic request with headers ?? or otherwise search
+     * for a matching object in the store
+     * TODO FIXME change to new async api
+     * TODO FIXME I think there is a race condition here with the
+     * async api :
+     * store A reads in object foo, searchs for it, and finds nothing.
+     * store B reads in object foo, searchs for it, finds nothing.
+     * store A gets called back with nothing, so registers the object
+     * store B gets called back with nothing, so registers the object,
+     * which will conflict when the in core index gets around to scanning
+     * store B.
+     *
+     * this suggests that rather than searching for duplicates, the
+     * index rebuild should just assume its the most recent accurate
+     * store entry and whoever indexes the stores handles duplicates.
+     */
+    if (StoreEntry *e = Store::Root().get(key)) {
+
+        if (e->lastref >= tmpe.lastref) {
+            /* key already exists, old entry is newer */
+            /* keep old, ignore new */
+            counts.dupcount++;
+            return false;
+        } else {
+            /* URL already exists, this swapfile not being used */
+            /* junk old, load new */
+            e->release();      /* release old entry */
+            counts.dupcount++;
+        }
+    }
+
+    return true;
+}
diff --git a/src/store_swapin.cc b/src/store_swapin.cc

index ae3cb6159184042f2004de0fa80c1ec046751517..b6ca9b759f4c8a230e530ceac5be208a8ed19085 100644 (file)
--- a/src/store_swapin.cc
+++ b/src/store_swapin.cc
@@ -50,6 +50,9 @@ storeSwapInStart(store_client * sc)
          return;
      }
  
+    if (e->mem_status != NOT_IN_MEMORY)
+        debugs(20, 3, HERE << "already IN_MEMORY");
+
      debugs(20, 3, "storeSwapInStart: called for : " << e->swap_dirn << " " <<
             std::hex << std::setw(8) << std::setfill('0') << std::uppercase <<
             e->swap_filen << " " <<  e->getMD5Text());
diff --git a/src/store_swapout.cc b/src/store_swapout.cc

index f0ac547c9b08068e5fbfcdfdcd5559adc2424f75..83971398970c8beea66ee9f51818609fe956ac50 100644 (file)
--- a/src/store_swapout.cc
+++ b/src/store_swapout.cc
@@ -352,6 +352,9 @@ storeSwapOutFileClosed(void *data, int errflag, StoreIOState::Pointer self)
          debugs(20, 3, "storeSwapOutFileClosed: SwapOut complete: '" << e->url() << "' to " <<
                 e->swap_dirn  << ", " << std::hex << std::setw(8) << std::setfill('0') <<
                 std::uppercase << e->swap_filen);
+        debugs(20, 3, "storeSwapOutFileClosed: Should be:" <<
+               e->swap_file_sz << " = " << e->objectLen() << " + " <<
+               mem->swap_hdr_sz);
          e->swap_file_sz = e->objectLen() + mem->swap_hdr_sz;
          e->swap_status = SWAPOUT_DONE;
          e->store()->updateSize(e->swap_file_sz, 1);
diff --git a/src/tests/stub_store_rebuild.cc b/src/tests/stub_store_rebuild.cc

index e794b1333582bba3ba957a0daf2cce25ff5f2e74..357993b7f2c34879b0cd8be67cb33d8b3cb4249b 100644 (file)
--- a/src/tests/stub_store_rebuild.cc
+++ b/src/tests/stub_store_rebuild.cc
@@ -45,3 +45,16 @@ void
  storeRebuildComplete(struct _store_rebuild_data *dc)
  {}
  
+bool
+storeRebuildLoadEntry(int fd, StoreEntry &tmpe, cache_key *key,
+    struct _store_rebuild_data &counts, uint64_t expectedSize)
+{
+    return false;
+}
+
+bool
+storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key,
+    struct _store_rebuild_data &counts)
+{
+   return false;
+}
diff --git a/src/tests/testRock.cc b/src/tests/testRock.cc

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/tests/testRock.h b/src/tests/testRock.h

new file mode 100644 (file)

index 0000000..e69de29
author	Alex Rousskov <rousskov@measurement-factory.com>
	Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)
committer	Alex Rousskov <rousskov@measurement-factory.com>
	Thu, 27 Jan 2011 21:14:56 +0000 (14:14 -0700)
configure.ac		patch \| blob \| blame \| history
include/Array.h		patch \| blob \| blame \| history
src/DiskIO/Blocking/BlockingFile.cc		patch \| blob \| blame \| history
src/DiskIO/Mmapped/DiskIOMmapped.cc	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedDiskIOModule.cc	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedDiskIOModule.h	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedFile.cc	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedFile.h	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedIOStrategy.cc	[new file with mode: 0644]	patch \| blob
src/DiskIO/Mmapped/MmappedIOStrategy.h	[new file with mode: 0644]	patch \| blob
src/Makefile.am		patch \| blob \| blame \| history
src/StoreMeta.h		patch \| blob \| blame \| history
src/StoreMetaUnpacker.cc		patch \| blob \| blame \| history
src/StoreMetaUnpacker.h		patch \| blob \| blame \| history
src/SwapDir.cc		patch \| blob \| blame \| history
src/SwapDir.h		patch \| blob \| blame \| history
src/cf.data.pre		patch \| blob \| blame \| history
src/disk.cc		patch \| blob \| blame \| history
src/fs/Makefile.am		patch \| blob \| blame \| history
src/fs/Module.cc		patch \| blob \| blame \| history
src/fs/rock/RockCommon.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockCommon.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockDirMap.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockDirMap.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockFile.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockFile.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockIoRequests.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockIoRequests.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockIoState.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockIoState.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockRebuild.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockRebuild.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockStoreFileSystem.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockStoreFileSystem.h	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockSwapDir.cc	[new file with mode: 0644]	patch \| blob
src/fs/rock/RockSwapDir.h	[new file with mode: 0644]	patch \| blob
src/fs/ufs/ufscommon.cc		patch \| blob \| blame \| history
src/protos.h		patch \| blob \| blame \| history
src/stmem.cc		patch \| blob \| blame \| history
src/store.cc		patch \| blob \| blame \| history
src/store_rebuild.cc		patch \| blob \| blame \| history
src/store_swapin.cc		patch \| blob \| blame \| history
src/store_swapout.cc		patch \| blob \| blame \| history
src/tests/stub_store_rebuild.cc		patch \| blob \| blame \| history
src/tests/testRock.cc	[new file with mode: 0644]	patch \| blob
src/tests/testRock.h	[new file with mode: 0644]	patch \| blob