Merged from trunk (r12732, v3.3.3+).

author Alex Rousskov <rousskov@measurement-factory.com>

Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)

committer Alex Rousskov <rousskov@measurement-factory.com>

Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)
author Alex Rousskov <rousskov@measurement-factory.com>
Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)
committer Alex Rousskov <rousskov@measurement-factory.com>
Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)
diff --cc include/snmp_impl.h

index 6a6db9fce2d2e58804ce123e027f3e37554dacb2,6a6db9fce2d2e58804ce123e027f3e37554dacb2..124e93e3dfc09e115ff76b5a1c278bd84dec1d69
--- 1/include/snmp_impl.h
--- 2/include/snmp_impl.h
+++ b/include/snmp_impl.h
@@@ -65,7 -65,7 +65,4 @@@ struct trapVar 
       struct trapVar *next;
   };
   
--/* from snmp.c */
--extern u_char sid[];          /* size SID_MAX_LEN */
--
   #endif /* SQUID_SNMP_IMPL_H */
diff --cc scripts/find-alive.pl
Simple merge
diff --cc src/DiskIO/IpcIo/IpcIoFile.cc
Simple merge
diff --cc src/Makefile.am
Simple merge
diff --cc src/StoreIOState.h
Simple merge
diff --cc src/cf.data.pre

index e4b28735b516e39b979ec1ea19e44b3a9c8026bf,e38603b1b52ff5124d474471261db876ff2160f8..a4c91786f1a3b52f6b95396f865a9d10b79bc310
--- 1/src/cf.data.pre
--- 2/src/cf.data.pre
+++ b/src/cf.data.pre
@@@ -3202,12 -3299,20 +3299,18 @@@ DOC_STAR
   
         The rock store type:
   
- -          cache_dir rock Directory-Name Mbytes <max-size=bytes> [options]
+ +          cache_dir rock Directory-Name Mbytes [options]
   
         The Rock Store type is a database-style storage. All cached
- -      entries are stored in a "database" file, using fixed-size slots,
- -      one entry per slot. The database size is specified in MB. The
- -      slot size is specified in bytes using the max-size option. See
- -      below for more info on the max-size option.
+ +      entries are stored in a "database" file, using fixed-size slots.
+ +      A single entry occupies one or more slots.
   
+       If possible, Squid using Rock Store creates a dedicated kid
+       process called "disker" to avoid blocking Squid worker(s) on disk
+       I/O. One disker kid is created for each rock cache_dir.  Diskers
+       are created only when Squid, running in daemon mode, has support
+       for the IpcIo disk I/O module.
+ 
         swap-timeout=msec: Squid will not start writing a miss to or
         reading a hit from disk if it estimates that the swap operation
         will take more than the specified number of milliseconds. By
diff --cc src/client_side_reply.cc
Simple merge
diff --cc src/client_side_reply.h
Simple merge
diff --cc src/fs/coss/store_coss.h
Simple merge
diff --cc src/fs/coss/store_io_coss.cc
Simple merge
diff --cc src/fs/rock/RockIoState.cc

index 9fe072694eb9237751c526728dfca1128ec78ab9,a682fb5128573aa6a5258cfb5c97490edd41cb1d..4eab56931c429346f773681b3b9fa2161f952262
--- 1/src/fs/rock/RockIoState.cc
--- 2/src/fs/rock/RockIoState.cc
+++ b/src/fs/rock/RockIoState.cc
@@@ -113,104 -69,45 +113,104 @@@ Rock::IoState::read_(char *buf, size_t 
       read.callback = cb;
       read.callback_data = cbdataReference(data);
   
- -    theFile->read(new ReadRequest(
- -                      ::ReadRequest(buf, diskOffset + cellOffset, len), this));
+ +    const uint64_t diskOffset = dir->diskOffset(sidCurrent);
+ +    theFile->read(new ReadRequest(::ReadRequest(buf,
+ +        diskOffset + sizeof(DbCellHeader) + coreOff - objOffset, len), this));
   }
   
- -// We only buffer data here; we actually write when close() is called.
- -// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions
- -// of the slot when the write does not end at the page or sector boundary.
- -void
+ +/// wraps tryWrite() to handle deep write failures centrally and safely
+ +bool
   Rock::IoState::write(char const *buf, size_t size, off_t coreOff, FREE *dtor)
   {
- -    // TODO: move to create?
- -    if (!coreOff) {
- -        assert(theBuf.isNull());
- -        assert(payloadEnd <= slotSize);
- -        theBuf.init(min(payloadEnd, slotSize), slotSize);
- -        // start with our header; TODO: consider making it a trailer
- -        DbCellHeader header;
- -        assert(static_cast<int64_t>(sizeof(header)) <= payloadEnd);
- -        header.payloadSize = payloadEnd - sizeof(header);
- -        theBuf.append(reinterpret_cast<const char*>(&header), sizeof(header));
- -    } else {
- -        // Core uses -1 offset as "append". Sigh.
- -        assert(coreOff == -1);
- -        assert(!theBuf.isNull());
+ +    bool success = false;
+ +    try {
+ +        tryWrite(buf, size, coreOff);
+ +        success = true;
-     } catch (const std::exception &e) { // TODO: should we catch ... as well?
-         debugs(79, 2, "db write error: " << e.what());
++    } catch (const std::exception &ex) { // TODO: should we catch ... as well?
++        debugs(79, 2, "db write error: " << ex.what());
+ +        dir->writeError(swap_filen);
+ +        finishedWriting(DISK_ERROR);
+ +        // 'this' might be gone beyond this point; fall through to free buf
       }
   
- -    theBuf.append(buf, size);
- -    offset_ += size; // so that Core thinks we wrote it
- -
+ +    // careful: 'this' might be gone here
+ + 
       if (dtor)
           (dtor)(const_cast<char*>(buf)); // cast due to a broken API?
+ +
+ +    return success;
   }
   
- -// write what was buffered during write() calls
+ +/** We only write data when full slot is accumulated or when close() is called.
+ + We buffer, in part, to avoid forcing OS to _read_ old unwritten portions of
+ + the slot when the write does not end at the page or sector boundary. */
   void
- -Rock::IoState::startWriting()
+ +Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff)
+ +{
+ +    debugs(79, 7, swap_filen << " writes " << size << " more");
+ +
+ +    // either this is the first write or append; we do not support write gaps
+ +    assert(!coreOff || coreOff == -1);
+ +
+ +    // allocate the first slice diring the first write
+ +    if (!coreOff) {
+ +        assert(sidCurrent < 0);
+ +        sidCurrent = reserveSlotForWriting(); // throws on failures
+ +        assert(sidCurrent >= 0);
+ +        writeAnchor().start = sidCurrent;
+ +    }
+ +
+ +    // buffer incoming data in slot buffer and write overflowing or final slots
+ +    // quit when no data left or we stopped writing on reentrant error
+ +    while (size > 0 && theFile != NULL) {
+ +        assert(sidCurrent >= 0);
+ +        const size_t processed = writeToBuffer(buf, size);
+ +        buf += processed;
+ +        size -= processed;
+ +        const bool overflow = size > 0;
+ +
+ +        // We do not write a full buffer without overflow because
+ +        // we would not yet know what to set the nextSlot to.
+ +        if (overflow) {
+ +            const SlotId sidNext = reserveSlotForWriting(); // throws
+ +            assert(sidNext >= 0);
+ +            writeToDisk(sidNext);
+ +        }
+ +    }
+ +}
+ +
+ +/// Buffers incoming data for the current slot.
+ +/// Returns the number of bytes buffered.
+ +size_t
+ +Rock::IoState::writeToBuffer(char const *buf, size_t size)
+ +{
+ +    // do not buffer a cell header for nothing
+ +    if (!size)
+ +        return 0;
+ +
+ +    if (!theBuf.size) {
+ +        // will fill the header in writeToDisk when the next slot is known
+ +        theBuf.appended(sizeof(DbCellHeader));
+ +    }
+ +
+ +    size_t forCurrentSlot = min(size, static_cast<size_t>(theBuf.spaceSize()));
+ +    theBuf.append(buf, forCurrentSlot);
+ +    offset_ += forCurrentSlot; // so that Core thinks we wrote it
+ +    return forCurrentSlot;
+ +}
+ +
+ +/// write what was buffered during write() calls
+ +/// negative sidNext means this is the last write request for this entry
+ +void
+ +Rock::IoState::writeToDisk(const SlotId sidNext)
   {
       assert(theFile != NULL);
- -    assert(!theBuf.isNull());
+ +    assert(theBuf.size >= sizeof(DbCellHeader));
+ +
+ +    if (sidNext < 0) { // we are writing the last slot
+ +        e->swap_file_sz = offset_;
+ +        writeAnchor().basics.swap_file_sz = offset_; // would not hurt, right?
+ +    }
   
       // TODO: if DiskIO module is mmap-based, we should be writing whole pages
       // to avoid triggering read-page;new_head+old_tail;write-page overheads
diff --cc src/fs/rock/RockSwapDir.cc

index 3ede34bc87415db138fc8fc3540e678dfe638133,bb29426a0caad5cd43b9279fb3ee4a726d9e05f1..a0c31516689164a847a6104462fd79b1a5c08f58
--- 1/src/fs/rock/RockSwapDir.cc
--- 2/src/fs/rock/RockSwapDir.cc
+++ b/src/fs/rock/RockSwapDir.cc
@@@ -165,18 -160,29 +165,30 @@@ Rock::SwapDir::create(
   
       debugs (47,3, HERE << "creating in " << path);
   
-     struct stat swap_sb;
-     if (::stat(path, &swap_sb) < 0) {
+     struct stat dir_sb;
+     if (::stat(path, &dir_sb) == 0) {
+         struct stat file_sb;
+         if (::stat(filePath, &file_sb) == 0) {
+             debugs (47, DBG_IMPORTANT, "Skipping existing Rock db: " << filePath);
+             return;
+         }
+         // else the db file is not there or is not accessible, and we will try
+         // to create it later below, generating a detailed error on failures.
+     } else { // path does not exist or is inaccessible
+         // If path exists but is not accessible, mkdir() below will fail, and
+         // the admin should see the error and act accordingly, so there is
+         // no need to distinguish ENOENT from other possible stat() errors.
           debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path);
           const int res = mkdir(path, 0700);
- -        if (res != 0) {
- -            debugs(47, DBG_CRITICAL, "Failed to create Rock db dir " << path <<
- -                   ": " << xstrerror());
- -            fatal("Rock Store db creation error");
- -        }
+ +        if (res != 0)
+ +            createError("mkdir");
       }
   
+     debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath);
+ +    const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+ +    if (swap < 0)
+ +        createError("create");
+ +
   #if SLOWLY_FILL_WITH_ZEROS
       char block[1024];
       Must(maxSize() % sizeof(block) == 0);
@@@ -321,10 -332,10 +333,10 @@@ Rock::SwapDir::allowOptionReconfigure(c
   
   /// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse()
   bool
- Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfiguring)
+ Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig)
   {
       // TODO: ::SwapDir or, better, Config should provide time-parsing routines,
- -    // including time unit handling. Same for size.
+ +    // including time unit handling. Same for size and rate.
   
       time_msec_t *storedTime;
       if (strcmp(option, "swap-timeout") == 0)
@@@ -410,49 -421,6 +422,49 @@@ Rock::SwapDir::dumpRateOption(StoreEntr
           storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate);
   }
   
- Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfiguring)
+ +/// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse()
+ +bool
-     if (!reconfiguring)
++Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig)
+ +{
+ +    uint64_t *storedSize;
+ +    if (strcmp(option, "slot-size") == 0)
+ +        storedSize = &slotSize;
+ +    else
+ +        return false;
+ +
+ +    if (!value)
+ +        self_destruct();
+ +
+ +    // TODO: handle size units and detect parsing errors better
+ +    const uint64_t newSize = strtoll(value, NULL, 10);
+ +    if (newSize <= 0) {
+ +        debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize);
+ +        self_destruct();
+ +    }
+ +
+ +    if (newSize <= sizeof(DbCellHeader)) {
+ +        debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize);
+ +        self_destruct();
+ +    }
+ +
++    if (!reconfig)
+ +        *storedSize = newSize;
+ +    else if (*storedSize != newSize) {
+ +        debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
+ +               << " cannot be changed dynamically, value left unchanged: " <<
+ +               *storedSize);
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +/// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump()
+ +void
+ +Rock::SwapDir::dumpSizeOption(StoreEntry * e) const
+ +{
+ +    storeAppendPrintf(e, " slot-size=%" PRId64, slotSize);
+ +}
+ +
   /// check the results of the configuration; only level-0 debugging works here
   void
   Rock::SwapDir::validateOptions()
@@@ -712,9 -652,10 +724,9 @@@ Rock::SwapDir::readCompleted(const cha
   
       if (errflag == DISK_OK && rlen > 0)
           sio->offset_ += rlen;
- -    assert(sio->diskOffset + sio->offset_ <= diskOffsetLimit()); // post-factum
   
-     StoreIOState::STRCB *callback = sio->read.callback;
-     assert(callback);
+     StoreIOState::STRCB *callb = sio->read.callback;
+     assert(callb);
       sio->read.callback = NULL;
       void *cbdata;
       if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata))
diff --cc src/fs/ufs/RebuildState.cc
Simple merge
diff --cc src/ipc/StoreMap.h
Simple merge
diff --cc src/store.cc
Simple merge
diff --cc src/store_rebuild.cc
Simple merge
diff --cc src/store_swapout.cc
Simple merge
diff --cc src/tests/stub_store.cc
Simple merge
author	Alex Rousskov <rousskov@measurement-factory.com>
	Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)
committer	Alex Rousskov <rousskov@measurement-factory.com>
	Thu, 21 Mar 2013 21:06:48 +0000 (15:06 -0600)
		1	2
include/snmp_impl.h	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/find-alive.pl	patch \|	diff1 \|	diff2 \|	blob \| history
src/DiskIO/IpcIo/IpcIoFile.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/StoreIOState.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/cf.data.pre	patch \|	diff1 \|	diff2 \|	blob \| history
src/client_side_reply.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/client_side_reply.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/fs/coss/store_coss.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/fs/coss/store_io_coss.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/fs/rock/RockIoState.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/fs/rock/RockSwapDir.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/fs/ufs/RebuildState.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/ipc/StoreMap.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/store.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/store_rebuild.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/store_swapout.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/tests/stub_store.cc	patch \|	diff1 \|	diff2 \|	blob \| history