From: Alex Rousskov Date: Thu, 21 Mar 2013 21:06:48 +0000 (-0600) Subject: Merged from trunk (r12732, v3.3.3+). X-Git-Tag: SQUID_3_5_0_1~444^2~63^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a57a662c34744c827637a5cf8ddce25aa6048c9a;p=thirdparty%2Fsquid.git Merged from trunk (r12732, v3.3.3+). --- a57a662c34744c827637a5cf8ddce25aa6048c9a diff --cc include/snmp_impl.h index 6a6db9fce2,6a6db9fce2..124e93e3df --- a/include/snmp_impl.h +++ b/include/snmp_impl.h @@@ -65,7 -65,7 +65,4 @@@ struct trapVar struct trapVar *next; }; --/* from snmp.c */ --extern u_char sid[]; /* size SID_MAX_LEN */ -- #endif /* SQUID_SNMP_IMPL_H */ diff --cc src/cf.data.pre index e4b28735b5,e38603b1b5..a4c91786f1 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@@ -3202,12 -3299,20 +3299,18 @@@ DOC_STAR The rock store type: - cache_dir rock Directory-Name Mbytes [options] + cache_dir rock Directory-Name Mbytes [options] The Rock Store type is a database-style storage. All cached - entries are stored in a "database" file, using fixed-size slots, - one entry per slot. The database size is specified in MB. The - slot size is specified in bytes using the max-size option. See - below for more info on the max-size option. + entries are stored in a "database" file, using fixed-size slots. + A single entry occupies one or more slots. + If possible, Squid using Rock Store creates a dedicated kid + process called "disker" to avoid blocking Squid worker(s) on disk + I/O. One disker kid is created for each rock cache_dir. Diskers + are created only when Squid, running in daemon mode, has support + for the IpcIo disk I/O module. + swap-timeout=msec: Squid will not start writing a miss to or reading a hit from disk if it estimates that the swap operation will take more than the specified number of milliseconds. By diff --cc src/fs/rock/RockIoState.cc index 9fe072694e,a682fb5128..4eab56931c --- a/src/fs/rock/RockIoState.cc +++ b/src/fs/rock/RockIoState.cc @@@ -113,104 -69,45 +113,104 @@@ Rock::IoState::read_(char *buf, size_t read.callback = cb; read.callback_data = cbdataReference(data); - theFile->read(new ReadRequest( - ::ReadRequest(buf, diskOffset + cellOffset, len), this)); + const uint64_t diskOffset = dir->diskOffset(sidCurrent); + theFile->read(new ReadRequest(::ReadRequest(buf, + diskOffset + sizeof(DbCellHeader) + coreOff - objOffset, len), this)); } -// We only buffer data here; we actually write when close() is called. -// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions -// of the slot when the write does not end at the page or sector boundary. -void +/// wraps tryWrite() to handle deep write failures centrally and safely +bool Rock::IoState::write(char const *buf, size_t size, off_t coreOff, FREE *dtor) { - // TODO: move to create? - if (!coreOff) { - assert(theBuf.isNull()); - assert(payloadEnd <= slotSize); - theBuf.init(min(payloadEnd, slotSize), slotSize); - // start with our header; TODO: consider making it a trailer - DbCellHeader header; - assert(static_cast(sizeof(header)) <= payloadEnd); - header.payloadSize = payloadEnd - sizeof(header); - theBuf.append(reinterpret_cast(&header), sizeof(header)); - } else { - // Core uses -1 offset as "append". Sigh. - assert(coreOff == -1); - assert(!theBuf.isNull()); + bool success = false; + try { + tryWrite(buf, size, coreOff); + success = true; - } catch (const std::exception &e) { // TODO: should we catch ... as well? - debugs(79, 2, "db write error: " << e.what()); ++ } catch (const std::exception &ex) { // TODO: should we catch ... as well? ++ debugs(79, 2, "db write error: " << ex.what()); + dir->writeError(swap_filen); + finishedWriting(DISK_ERROR); + // 'this' might be gone beyond this point; fall through to free buf } - theBuf.append(buf, size); - offset_ += size; // so that Core thinks we wrote it - + // careful: 'this' might be gone here + if (dtor) (dtor)(const_cast(buf)); // cast due to a broken API? + + return success; } -// write what was buffered during write() calls +/** We only write data when full slot is accumulated or when close() is called. + We buffer, in part, to avoid forcing OS to _read_ old unwritten portions of + the slot when the write does not end at the page or sector boundary. */ void -Rock::IoState::startWriting() +Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff) +{ + debugs(79, 7, swap_filen << " writes " << size << " more"); + + // either this is the first write or append; we do not support write gaps + assert(!coreOff || coreOff == -1); + + // allocate the first slice diring the first write + if (!coreOff) { + assert(sidCurrent < 0); + sidCurrent = reserveSlotForWriting(); // throws on failures + assert(sidCurrent >= 0); + writeAnchor().start = sidCurrent; + } + + // buffer incoming data in slot buffer and write overflowing or final slots + // quit when no data left or we stopped writing on reentrant error + while (size > 0 && theFile != NULL) { + assert(sidCurrent >= 0); + const size_t processed = writeToBuffer(buf, size); + buf += processed; + size -= processed; + const bool overflow = size > 0; + + // We do not write a full buffer without overflow because + // we would not yet know what to set the nextSlot to. + if (overflow) { + const SlotId sidNext = reserveSlotForWriting(); // throws + assert(sidNext >= 0); + writeToDisk(sidNext); + } + } +} + +/// Buffers incoming data for the current slot. +/// Returns the number of bytes buffered. +size_t +Rock::IoState::writeToBuffer(char const *buf, size_t size) +{ + // do not buffer a cell header for nothing + if (!size) + return 0; + + if (!theBuf.size) { + // will fill the header in writeToDisk when the next slot is known + theBuf.appended(sizeof(DbCellHeader)); + } + + size_t forCurrentSlot = min(size, static_cast(theBuf.spaceSize())); + theBuf.append(buf, forCurrentSlot); + offset_ += forCurrentSlot; // so that Core thinks we wrote it + return forCurrentSlot; +} + +/// write what was buffered during write() calls +/// negative sidNext means this is the last write request for this entry +void +Rock::IoState::writeToDisk(const SlotId sidNext) { assert(theFile != NULL); - assert(!theBuf.isNull()); + assert(theBuf.size >= sizeof(DbCellHeader)); + + if (sidNext < 0) { // we are writing the last slot + e->swap_file_sz = offset_; + writeAnchor().basics.swap_file_sz = offset_; // would not hurt, right? + } // TODO: if DiskIO module is mmap-based, we should be writing whole pages // to avoid triggering read-page;new_head+old_tail;write-page overheads diff --cc src/fs/rock/RockSwapDir.cc index 3ede34bc87,bb29426a0c..a0c3151668 --- a/src/fs/rock/RockSwapDir.cc +++ b/src/fs/rock/RockSwapDir.cc @@@ -165,18 -160,29 +165,30 @@@ Rock::SwapDir::create( debugs (47,3, HERE << "creating in " << path); - struct stat swap_sb; - if (::stat(path, &swap_sb) < 0) { + struct stat dir_sb; + if (::stat(path, &dir_sb) == 0) { + struct stat file_sb; + if (::stat(filePath, &file_sb) == 0) { + debugs (47, DBG_IMPORTANT, "Skipping existing Rock db: " << filePath); + return; + } + // else the db file is not there or is not accessible, and we will try + // to create it later below, generating a detailed error on failures. + } else { // path does not exist or is inaccessible + // If path exists but is not accessible, mkdir() below will fail, and + // the admin should see the error and act accordingly, so there is + // no need to distinguish ENOENT from other possible stat() errors. debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path); const int res = mkdir(path, 0700); - if (res != 0) { - debugs(47, DBG_CRITICAL, "Failed to create Rock db dir " << path << - ": " << xstrerror()); - fatal("Rock Store db creation error"); - } + if (res != 0) + createError("mkdir"); } + debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath); + const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); + if (swap < 0) + createError("create"); + #if SLOWLY_FILL_WITH_ZEROS char block[1024]; Must(maxSize() % sizeof(block) == 0); @@@ -321,10 -332,10 +333,10 @@@ Rock::SwapDir::allowOptionReconfigure(c /// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse() bool - Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfiguring) + Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig) { // TODO: ::SwapDir or, better, Config should provide time-parsing routines, - // including time unit handling. Same for size. + // including time unit handling. Same for size and rate. time_msec_t *storedTime; if (strcmp(option, "swap-timeout") == 0) @@@ -410,49 -421,6 +422,49 @@@ Rock::SwapDir::dumpRateOption(StoreEntr storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate); } +/// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse() +bool - Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfiguring) ++Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig) +{ + uint64_t *storedSize; + if (strcmp(option, "slot-size") == 0) + storedSize = &slotSize; + else + return false; + + if (!value) + self_destruct(); + + // TODO: handle size units and detect parsing errors better + const uint64_t newSize = strtoll(value, NULL, 10); + if (newSize <= 0) { + debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize); + self_destruct(); + } + + if (newSize <= sizeof(DbCellHeader)) { + debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize); + self_destruct(); + } + - if (!reconfiguring) ++ if (!reconfig) + *storedSize = newSize; + else if (*storedSize != newSize) { + debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option + << " cannot be changed dynamically, value left unchanged: " << + *storedSize); + } + + return true; +} + +/// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump() +void +Rock::SwapDir::dumpSizeOption(StoreEntry * e) const +{ + storeAppendPrintf(e, " slot-size=%" PRId64, slotSize); +} + /// check the results of the configuration; only level-0 debugging works here void Rock::SwapDir::validateOptions() @@@ -712,9 -652,10 +724,9 @@@ Rock::SwapDir::readCompleted(const cha if (errflag == DISK_OK && rlen > 0) sio->offset_ += rlen; - assert(sio->diskOffset + sio->offset_ <= diskOffsetLimit()); // post-factum - StoreIOState::STRCB *callback = sio->read.callback; - assert(callback); + StoreIOState::STRCB *callb = sio->read.callback; + assert(callb); sio->read.callback = NULL; void *cbdata; if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata))