struct trapVar *next;
};
--/* from snmp.c */
--extern u_char sid[]; /* size SID_MAX_LEN */
--
#endif /* SQUID_SNMP_IMPL_H */
The rock store type:
- cache_dir rock Directory-Name Mbytes <max-size=bytes> [options]
+ cache_dir rock Directory-Name Mbytes [options]
The Rock Store type is a database-style storage. All cached
- entries are stored in a "database" file, using fixed-size slots,
- one entry per slot. The database size is specified in MB. The
- slot size is specified in bytes using the max-size option. See
- below for more info on the max-size option.
+ entries are stored in a "database" file, using fixed-size slots.
+ A single entry occupies one or more slots.
+ If possible, Squid using Rock Store creates a dedicated kid
+ process called "disker" to avoid blocking Squid worker(s) on disk
+ I/O. One disker kid is created for each rock cache_dir. Diskers
+ are created only when Squid, running in daemon mode, has support
+ for the IpcIo disk I/O module.
+
swap-timeout=msec: Squid will not start writing a miss to or
reading a hit from disk if it estimates that the swap operation
will take more than the specified number of milliseconds. By
read.callback = cb;
read.callback_data = cbdataReference(data);
- theFile->read(new ReadRequest(
- ::ReadRequest(buf, diskOffset + cellOffset, len), this));
+ const uint64_t diskOffset = dir->diskOffset(sidCurrent);
+ theFile->read(new ReadRequest(::ReadRequest(buf,
+ diskOffset + sizeof(DbCellHeader) + coreOff - objOffset, len), this));
}
-// We only buffer data here; we actually write when close() is called.
-// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions
-// of the slot when the write does not end at the page or sector boundary.
-void
+/// wraps tryWrite() to handle deep write failures centrally and safely
+bool
Rock::IoState::write(char const *buf, size_t size, off_t coreOff, FREE *dtor)
{
- // TODO: move to create?
- if (!coreOff) {
- assert(theBuf.isNull());
- assert(payloadEnd <= slotSize);
- theBuf.init(min(payloadEnd, slotSize), slotSize);
- // start with our header; TODO: consider making it a trailer
- DbCellHeader header;
- assert(static_cast<int64_t>(sizeof(header)) <= payloadEnd);
- header.payloadSize = payloadEnd - sizeof(header);
- theBuf.append(reinterpret_cast<const char*>(&header), sizeof(header));
- } else {
- // Core uses -1 offset as "append". Sigh.
- assert(coreOff == -1);
- assert(!theBuf.isNull());
+ bool success = false;
+ try {
+ tryWrite(buf, size, coreOff);
+ success = true;
- } catch (const std::exception &e) { // TODO: should we catch ... as well?
- debugs(79, 2, "db write error: " << e.what());
++ } catch (const std::exception &ex) { // TODO: should we catch ... as well?
++ debugs(79, 2, "db write error: " << ex.what());
+ dir->writeError(swap_filen);
+ finishedWriting(DISK_ERROR);
+ // 'this' might be gone beyond this point; fall through to free buf
}
- theBuf.append(buf, size);
- offset_ += size; // so that Core thinks we wrote it
-
+ // careful: 'this' might be gone here
+
if (dtor)
(dtor)(const_cast<char*>(buf)); // cast due to a broken API?
+
+ return success;
}
-// write what was buffered during write() calls
+/** We only write data when full slot is accumulated or when close() is called.
+ We buffer, in part, to avoid forcing OS to _read_ old unwritten portions of
+ the slot when the write does not end at the page or sector boundary. */
void
-Rock::IoState::startWriting()
+Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff)
+{
+ debugs(79, 7, swap_filen << " writes " << size << " more");
+
+ // either this is the first write or append; we do not support write gaps
+ assert(!coreOff || coreOff == -1);
+
+ // allocate the first slice diring the first write
+ if (!coreOff) {
+ assert(sidCurrent < 0);
+ sidCurrent = reserveSlotForWriting(); // throws on failures
+ assert(sidCurrent >= 0);
+ writeAnchor().start = sidCurrent;
+ }
+
+ // buffer incoming data in slot buffer and write overflowing or final slots
+ // quit when no data left or we stopped writing on reentrant error
+ while (size > 0 && theFile != NULL) {
+ assert(sidCurrent >= 0);
+ const size_t processed = writeToBuffer(buf, size);
+ buf += processed;
+ size -= processed;
+ const bool overflow = size > 0;
+
+ // We do not write a full buffer without overflow because
+ // we would not yet know what to set the nextSlot to.
+ if (overflow) {
+ const SlotId sidNext = reserveSlotForWriting(); // throws
+ assert(sidNext >= 0);
+ writeToDisk(sidNext);
+ }
+ }
+}
+
+/// Buffers incoming data for the current slot.
+/// Returns the number of bytes buffered.
+size_t
+Rock::IoState::writeToBuffer(char const *buf, size_t size)
+{
+ // do not buffer a cell header for nothing
+ if (!size)
+ return 0;
+
+ if (!theBuf.size) {
+ // will fill the header in writeToDisk when the next slot is known
+ theBuf.appended(sizeof(DbCellHeader));
+ }
+
+ size_t forCurrentSlot = min(size, static_cast<size_t>(theBuf.spaceSize()));
+ theBuf.append(buf, forCurrentSlot);
+ offset_ += forCurrentSlot; // so that Core thinks we wrote it
+ return forCurrentSlot;
+}
+
+/// write what was buffered during write() calls
+/// negative sidNext means this is the last write request for this entry
+void
+Rock::IoState::writeToDisk(const SlotId sidNext)
{
assert(theFile != NULL);
- assert(!theBuf.isNull());
+ assert(theBuf.size >= sizeof(DbCellHeader));
+
+ if (sidNext < 0) { // we are writing the last slot
+ e->swap_file_sz = offset_;
+ writeAnchor().basics.swap_file_sz = offset_; // would not hurt, right?
+ }
// TODO: if DiskIO module is mmap-based, we should be writing whole pages
// to avoid triggering read-page;new_head+old_tail;write-page overheads
debugs (47,3, HERE << "creating in " << path);
- struct stat swap_sb;
- if (::stat(path, &swap_sb) < 0) {
+ struct stat dir_sb;
+ if (::stat(path, &dir_sb) == 0) {
+ struct stat file_sb;
+ if (::stat(filePath, &file_sb) == 0) {
+ debugs (47, DBG_IMPORTANT, "Skipping existing Rock db: " << filePath);
+ return;
+ }
+ // else the db file is not there or is not accessible, and we will try
+ // to create it later below, generating a detailed error on failures.
+ } else { // path does not exist or is inaccessible
+ // If path exists but is not accessible, mkdir() below will fail, and
+ // the admin should see the error and act accordingly, so there is
+ // no need to distinguish ENOENT from other possible stat() errors.
debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path);
const int res = mkdir(path, 0700);
- if (res != 0) {
- debugs(47, DBG_CRITICAL, "Failed to create Rock db dir " << path <<
- ": " << xstrerror());
- fatal("Rock Store db creation error");
- }
+ if (res != 0)
+ createError("mkdir");
}
+ debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath);
+ const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
+ if (swap < 0)
+ createError("create");
+
#if SLOWLY_FILL_WITH_ZEROS
char block[1024];
Must(maxSize() % sizeof(block) == 0);
/// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse()
bool
- Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfiguring)
+ Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig)
{
// TODO: ::SwapDir or, better, Config should provide time-parsing routines,
- // including time unit handling. Same for size.
+ // including time unit handling. Same for size and rate.
time_msec_t *storedTime;
if (strcmp(option, "swap-timeout") == 0)
storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate);
}
- Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfiguring)
+/// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse()
+bool
- if (!reconfiguring)
++Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig)
+{
+ uint64_t *storedSize;
+ if (strcmp(option, "slot-size") == 0)
+ storedSize = &slotSize;
+ else
+ return false;
+
+ if (!value)
+ self_destruct();
+
+ // TODO: handle size units and detect parsing errors better
+ const uint64_t newSize = strtoll(value, NULL, 10);
+ if (newSize <= 0) {
+ debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize);
+ self_destruct();
+ }
+
+ if (newSize <= sizeof(DbCellHeader)) {
+ debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize);
+ self_destruct();
+ }
+
++ if (!reconfig)
+ *storedSize = newSize;
+ else if (*storedSize != newSize) {
+ debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
+ << " cannot be changed dynamically, value left unchanged: " <<
+ *storedSize);
+ }
+
+ return true;
+}
+
+/// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump()
+void
+Rock::SwapDir::dumpSizeOption(StoreEntry * e) const
+{
+ storeAppendPrintf(e, " slot-size=%" PRId64, slotSize);
+}
+
/// check the results of the configuration; only level-0 debugging works here
void
Rock::SwapDir::validateOptions()
if (errflag == DISK_OK && rlen > 0)
sio->offset_ += rlen;
- assert(sio->diskOffset + sio->offset_ <= diskOffsetLimit()); // post-factum
- StoreIOState::STRCB *callback = sio->read.callback;
- assert(callback);
+ StoreIOState::STRCB *callb = sio->read.callback;
+ assert(callb);
sio->read.callback = NULL;
void *cbdata;
if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata))