]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/rock/RockSwapDir.cc
Collapse internal revalidation requests (SMP-unaware caches), again.
[thirdparty/squid.git] / src / fs / rock / RockSwapDir.cc
1 /*
2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 47 Store Directory Routines */
10
11 #include "squid.h"
12 #include "cache_cf.h"
13 #include "CollapsedForwarding.h"
14 #include "ConfigOption.h"
15 #include "DiskIO/DiskIOModule.h"
16 #include "DiskIO/DiskIOStrategy.h"
17 #include "DiskIO/ReadRequest.h"
18 #include "DiskIO/WriteRequest.h"
19 #include "fs/rock/RockHeaderUpdater.h"
20 #include "fs/rock/RockIoRequests.h"
21 #include "fs/rock/RockIoState.h"
22 #include "fs/rock/RockRebuild.h"
23 #include "fs/rock/RockSwapDir.h"
24 #include "globals.h"
25 #include "ipc/mem/Pages.h"
26 #include "MemObject.h"
27 #include "Parsing.h"
28 #include "SquidConfig.h"
29 #include "SquidMath.h"
30 #include "tools.h"
31
32 #include <cstdlib>
33 #include <iomanip>
34 #include <limits>
35
36 #if HAVE_SYS_STAT_H
37 #include <sys/stat.h>
38 #endif
39
40 const int64_t Rock::SwapDir::HeaderSize = 16*1024;
41
42 Rock::SwapDir::SwapDir(): ::SwapDir("rock"),
43 slotSize(HeaderSize), filePath(NULL), map(NULL), io(NULL),
44 waitingForPage(NULL)
45 {
46 }
47
48 Rock::SwapDir::~SwapDir()
49 {
50 delete io;
51 delete map;
52 safe_free(filePath);
53 }
54
55 // called when Squid core needs a StoreEntry with a given key
56 StoreEntry *
57 Rock::SwapDir::get(const cache_key *key)
58 {
59 if (!map || !theFile || !theFile->canRead())
60 return NULL;
61
62 sfileno filen;
63 const Ipc::StoreMapAnchor *const slot = map->openForReading(key, filen);
64 if (!slot)
65 return NULL;
66
67 // create a brand new store entry and initialize it with stored basics
68 StoreEntry *e = new StoreEntry();
69 anchorEntry(*e, filen, *slot);
70
71 e->hashInsert(key);
72 trackReferences(*e);
73
74 return e;
75 // the disk entry remains open for reading, protected from modifications
76 }
77
78 bool
79 Rock::SwapDir::anchorCollapsed(StoreEntry &collapsed, bool &inSync)
80 {
81 if (!map || !theFile || !theFile->canRead())
82 return false;
83
84 sfileno filen;
85 const Ipc::StoreMapAnchor *const slot = map->openForReading(
86 reinterpret_cast<cache_key*>(collapsed.key), filen);
87 if (!slot)
88 return false;
89
90 anchorEntry(collapsed, filen, *slot);
91 inSync = updateCollapsedWith(collapsed, *slot);
92 return true; // even if inSync is false
93 }
94
95 bool
96 Rock::SwapDir::updateCollapsed(StoreEntry &collapsed)
97 {
98 if (!map || !theFile || !theFile->canRead())
99 return false;
100
101 if (collapsed.swap_filen < 0) // no longer using a disk cache
102 return true;
103 assert(collapsed.swap_dirn == index);
104
105 const Ipc::StoreMapAnchor &s = map->readableEntry(collapsed.swap_filen);
106 return updateCollapsedWith(collapsed, s);
107 }
108
109 bool
110 Rock::SwapDir::updateCollapsedWith(StoreEntry &collapsed, const Ipc::StoreMapAnchor &anchor)
111 {
112 collapsed.swap_file_sz = anchor.basics.swap_file_sz;
113 return true;
114 }
115
116 void
117 Rock::SwapDir::anchorEntry(StoreEntry &e, const sfileno filen, const Ipc::StoreMapAnchor &anchor)
118 {
119 const Ipc::StoreMapAnchor::Basics &basics = anchor.basics;
120
121 e.swap_file_sz = basics.swap_file_sz;
122 e.lastref = basics.lastref;
123 e.timestamp = basics.timestamp;
124 e.expires = basics.expires;
125 e.lastModified(basics.lastmod);
126 e.refcount = basics.refcount;
127 e.flags = basics.flags;
128
129 if (anchor.complete()) {
130 e.store_status = STORE_OK;
131 e.swap_status = SWAPOUT_DONE;
132 } else {
133 e.store_status = STORE_PENDING;
134 e.swap_status = SWAPOUT_WRITING; // even though another worker writes?
135 }
136
137 e.ping_status = PING_NONE;
138
139 EBIT_CLR(e.flags, RELEASE_REQUEST);
140 e.clearPrivate();
141 EBIT_SET(e.flags, ENTRY_VALIDATED);
142
143 e.swap_dirn = index;
144 e.swap_filen = filen;
145 }
146
147 void Rock::SwapDir::disconnect(StoreEntry &e)
148 {
149 assert(e.swap_dirn == index);
150 assert(e.swap_filen >= 0);
151 // cannot have SWAPOUT_NONE entry with swap_filen >= 0
152 assert(e.swap_status != SWAPOUT_NONE);
153
154 // do not rely on e.swap_status here because there is an async delay
155 // before it switches from SWAPOUT_WRITING to SWAPOUT_DONE.
156
157 // since e has swap_filen, its slot is locked for reading and/or writing
158 // but it is difficult to know whether THIS worker is reading or writing e,
159 // especially since we may switch from writing to reading. This code relies
160 // on Rock::IoState::writeableAnchor_ being set when we locked for writing.
161 if (e.mem_obj && e.mem_obj->swapout.sio != NULL &&
162 dynamic_cast<IoState&>(*e.mem_obj->swapout.sio).writeableAnchor_) {
163 map->abortWriting(e.swap_filen);
164 e.swap_dirn = -1;
165 e.swap_filen = -1;
166 e.swap_status = SWAPOUT_NONE;
167 dynamic_cast<IoState&>(*e.mem_obj->swapout.sio).writeableAnchor_ = NULL;
168 Store::Root().transientsAbandon(e); // broadcasts after the change
169 } else {
170 map->closeForReading(e.swap_filen);
171 e.swap_dirn = -1;
172 e.swap_filen = -1;
173 e.swap_status = SWAPOUT_NONE;
174 }
175 }
176
177 uint64_t
178 Rock::SwapDir::currentSize() const
179 {
180 const uint64_t spaceSize = !freeSlots ?
181 maxSize() : (slotSize * freeSlots->size());
182 // everything that is not free is in use
183 return maxSize() - spaceSize;
184 }
185
186 uint64_t
187 Rock::SwapDir::currentCount() const
188 {
189 return map ? map->entryCount() : 0;
190 }
191
192 /// In SMP mode only the disker process reports stats to avoid
193 /// counting the same stats by multiple processes.
194 bool
195 Rock::SwapDir::doReportStat() const
196 {
197 return ::SwapDir::doReportStat() && (!UsingSmp() || IamDiskProcess());
198 }
199
200 void
201 Rock::SwapDir::swappedOut(const StoreEntry &)
202 {
203 // stats are not stored but computed when needed
204 }
205
206 int64_t
207 Rock::SwapDir::slotLimitAbsolute() const
208 {
209 // the max value is an invalid one; all values must be below the limit
210 assert(std::numeric_limits<Ipc::StoreMapSliceId>::max() ==
211 std::numeric_limits<SlotId>::max());
212 return std::numeric_limits<SlotId>::max();
213 }
214
215 int64_t
216 Rock::SwapDir::slotLimitActual() const
217 {
218 const int64_t sWanted = (maxSize() - HeaderSize)/slotSize;
219 const int64_t sLimitLo = map ? map->sliceLimit() : 0; // dynamic shrinking unsupported
220 const int64_t sLimitHi = slotLimitAbsolute();
221 return min(max(sLimitLo, sWanted), sLimitHi);
222 }
223
224 int64_t
225 Rock::SwapDir::entryLimitActual() const
226 {
227 return min(slotLimitActual(), entryLimitAbsolute());
228 }
229
230 // TODO: encapsulate as a tool
231 void
232 Rock::SwapDir::create()
233 {
234 assert(path);
235 assert(filePath);
236
237 if (UsingSmp() && !IamDiskProcess()) {
238 debugs (47,3, HERE << "disker will create in " << path);
239 return;
240 }
241
242 debugs (47,3, HERE << "creating in " << path);
243
244 struct stat dir_sb;
245 if (::stat(path, &dir_sb) == 0) {
246 struct stat file_sb;
247 if (::stat(filePath, &file_sb) == 0) {
248 debugs (47, DBG_IMPORTANT, "Skipping existing Rock db: " << filePath);
249 return;
250 }
251 // else the db file is not there or is not accessible, and we will try
252 // to create it later below, generating a detailed error on failures.
253 } else { // path does not exist or is inaccessible
254 // If path exists but is not accessible, mkdir() below will fail, and
255 // the admin should see the error and act accordingly, so there is
256 // no need to distinguish ENOENT from other possible stat() errors.
257 debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path);
258 const int res = mkdir(path, 0700);
259 if (res != 0)
260 createError("mkdir");
261 }
262
263 debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath);
264 const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
265 if (swap < 0)
266 createError("create");
267
268 #if SLOWLY_FILL_WITH_ZEROS
269 char block[1024];
270 Must(maxSize() % sizeof(block) == 0);
271 memset(block, '\0', sizeof(block));
272
273 for (off_t offset = 0; offset < maxSize(); offset += sizeof(block)) {
274 if (write(swap, block, sizeof(block)) != sizeof(block))
275 createError("write");
276 }
277 #else
278 if (ftruncate(swap, maxSize()) != 0)
279 createError("truncate");
280
281 char header[HeaderSize];
282 memset(header, '\0', sizeof(header));
283 if (write(swap, header, sizeof(header)) != sizeof(header))
284 createError("write");
285 #endif
286
287 close(swap);
288 }
289
290 // report Rock DB creation error and exit
291 void
292 Rock::SwapDir::createError(const char *const msg)
293 {
294 int xerrno = errno; // XXX: where does errno come from?
295 debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " <<
296 filePath << "; " << msg << " error: " << xstrerr(xerrno));
297 fatal("Rock Store db creation error");
298 }
299
300 void
301 Rock::SwapDir::init()
302 {
303 debugs(47,2, HERE);
304
305 // XXX: SwapDirs aren't refcounted. We make IORequestor calls, which
306 // are refcounted. We up our count once to avoid implicit delete's.
307 lock();
308
309 freeSlots = shm_old(Ipc::Mem::PageStack)(freeSlotsPath());
310
311 Must(!map);
312 map = new DirMap(inodeMapPath());
313 map->cleaner = this;
314
315 const char *ioModule = needsDiskStrand() ? "IpcIo" : "Blocking";
316 if (DiskIOModule *m = DiskIOModule::Find(ioModule)) {
317 debugs(47,2, HERE << "Using DiskIO module: " << ioModule);
318 io = m->createStrategy();
319 io->init();
320 } else {
321 debugs(47, DBG_CRITICAL, "FATAL: Rock store is missing DiskIO module: " <<
322 ioModule);
323 fatal("Rock Store missing a required DiskIO module");
324 }
325
326 theFile = io->newFile(filePath);
327 theFile->configure(fileConfig);
328 theFile->open(O_RDWR, 0644, this);
329
330 // Increment early. Otherwise, if one SwapDir finishes rebuild before
331 // others start, storeRebuildComplete() will think the rebuild is over!
332 // TODO: move store_dirs_rebuilding hack to store modules that need it.
333 ++StoreController::store_dirs_rebuilding;
334 }
335
336 bool
337 Rock::SwapDir::needsDiskStrand() const
338 {
339 const bool wontEvenWorkWithoutDisker = Config.workers > 1;
340 const bool wouldWorkBetterWithDisker = DiskIOModule::Find("IpcIo");
341 return InDaemonMode() && (wontEvenWorkWithoutDisker ||
342 wouldWorkBetterWithDisker);
343 }
344
345 void
346 Rock::SwapDir::parse(int anIndex, char *aPath)
347 {
348 index = anIndex;
349
350 path = xstrdup(aPath);
351
352 // cache store is located at path/db
353 String fname(path);
354 fname.append("/rock");
355 filePath = xstrdup(fname.termedBuf());
356
357 parseSize(false);
358 parseOptions(0);
359
360 // Current openForWriting() code overwrites the old slot if needed
361 // and possible, so proactively removing old slots is probably useless.
362 assert(!repl); // repl = createRemovalPolicy(Config.replPolicy);
363
364 validateOptions();
365 }
366
367 void
368 Rock::SwapDir::reconfigure()
369 {
370 parseSize(true);
371 parseOptions(1);
372 // TODO: can we reconfigure the replacement policy (repl)?
373 validateOptions();
374 }
375
376 /// parse maximum db disk size
377 void
378 Rock::SwapDir::parseSize(const bool reconfig)
379 {
380 const int i = GetInteger();
381 if (i < 0)
382 fatal("negative Rock cache_dir size value");
383 const uint64_t new_max_size =
384 static_cast<uint64_t>(i) << 20; // MBytes to Bytes
385 if (!reconfig)
386 max_size = new_max_size;
387 else if (new_max_size != max_size) {
388 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir '" << path << "' size "
389 "cannot be changed dynamically, value left unchanged (" <<
390 (max_size >> 20) << " MB)");
391 }
392 }
393
394 ConfigOption *
395 Rock::SwapDir::getOptionTree() const
396 {
397 ConfigOption *copt = ::SwapDir::getOptionTree();
398 ConfigOptionVector *vector = dynamic_cast<ConfigOptionVector*>(copt);
399 if (vector) {
400 // if copt is actually a ConfigOptionVector
401 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseSizeOption, &SwapDir::dumpSizeOption));
402 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseTimeOption, &SwapDir::dumpTimeOption));
403 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseRateOption, &SwapDir::dumpRateOption));
404 } else {
405 // we don't know how to handle copt, as it's not a ConfigOptionVector.
406 // free it (and return nullptr)
407 delete copt;
408 copt = nullptr;
409 }
410 return copt;
411 }
412
413 bool
414 Rock::SwapDir::allowOptionReconfigure(const char *const option) const
415 {
416 return strcmp(option, "slot-size") != 0 &&
417 ::SwapDir::allowOptionReconfigure(option);
418 }
419
420 /// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse()
421 bool
422 Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig)
423 {
424 // TODO: ::SwapDir or, better, Config should provide time-parsing routines,
425 // including time unit handling. Same for size and rate.
426
427 time_msec_t *storedTime;
428 if (strcmp(option, "swap-timeout") == 0)
429 storedTime = &fileConfig.ioTimeout;
430 else
431 return false;
432
433 if (!value) {
434 self_destruct();
435 return false;
436 }
437
438 // TODO: handle time units and detect parsing errors better
439 const int64_t parsedValue = strtoll(value, NULL, 10);
440 if (parsedValue < 0) {
441 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue);
442 self_destruct();
443 return false;
444 }
445
446 const time_msec_t newTime = static_cast<time_msec_t>(parsedValue);
447
448 if (!reconfig)
449 *storedTime = newTime;
450 else if (*storedTime != newTime) {
451 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
452 << " cannot be changed dynamically, value left unchanged: " <<
453 *storedTime);
454 }
455
456 return true;
457 }
458
459 /// reports time-specific options; mimics ::SwapDir::optionObjectSizeDump()
460 void
461 Rock::SwapDir::dumpTimeOption(StoreEntry * e) const
462 {
463 if (fileConfig.ioTimeout)
464 storeAppendPrintf(e, " swap-timeout=%" PRId64,
465 static_cast<int64_t>(fileConfig.ioTimeout));
466 }
467
468 /// parses rate-specific options; mimics ::SwapDir::optionObjectSizeParse()
469 bool
470 Rock::SwapDir::parseRateOption(char const *option, const char *value, int isaReconfig)
471 {
472 int *storedRate;
473 if (strcmp(option, "max-swap-rate") == 0)
474 storedRate = &fileConfig.ioRate;
475 else
476 return false;
477
478 if (!value) {
479 self_destruct();
480 return false;
481 }
482
483 // TODO: handle time units and detect parsing errors better
484 const int64_t parsedValue = strtoll(value, NULL, 10);
485 if (parsedValue < 0) {
486 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue);
487 self_destruct();
488 return false;
489 }
490
491 const int newRate = static_cast<int>(parsedValue);
492
493 if (newRate < 0) {
494 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << newRate);
495 self_destruct();
496 return false;
497 }
498
499 if (!isaReconfig)
500 *storedRate = newRate;
501 else if (*storedRate != newRate) {
502 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
503 << " cannot be changed dynamically, value left unchanged: " <<
504 *storedRate);
505 }
506
507 return true;
508 }
509
510 /// reports rate-specific options; mimics ::SwapDir::optionObjectSizeDump()
511 void
512 Rock::SwapDir::dumpRateOption(StoreEntry * e) const
513 {
514 if (fileConfig.ioRate >= 0)
515 storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate);
516 }
517
518 /// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse()
519 bool
520 Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig)
521 {
522 uint64_t *storedSize;
523 if (strcmp(option, "slot-size") == 0)
524 storedSize = &slotSize;
525 else
526 return false;
527
528 if (!value) {
529 self_destruct();
530 return false;
531 }
532
533 // TODO: handle size units and detect parsing errors better
534 const uint64_t newSize = strtoll(value, NULL, 10);
535 if (newSize <= 0) {
536 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize);
537 self_destruct();
538 return false;
539 }
540
541 if (newSize <= sizeof(DbCellHeader)) {
542 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize);
543 self_destruct();
544 return false;
545 }
546
547 if (!reconfig)
548 *storedSize = newSize;
549 else if (*storedSize != newSize) {
550 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
551 << " cannot be changed dynamically, value left unchanged: " <<
552 *storedSize);
553 }
554
555 return true;
556 }
557
558 /// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump()
559 void
560 Rock::SwapDir::dumpSizeOption(StoreEntry * e) const
561 {
562 storeAppendPrintf(e, " slot-size=%" PRId64, slotSize);
563 }
564
565 /// check the results of the configuration; only level-0 debugging works here
566 void
567 Rock::SwapDir::validateOptions()
568 {
569 if (slotSize <= 0)
570 fatal("Rock store requires a positive slot-size");
571
572 const int64_t maxSizeRoundingWaste = 1024 * 1024; // size is configured in MB
573 const int64_t slotSizeRoundingWaste = slotSize;
574 const int64_t maxRoundingWaste =
575 max(maxSizeRoundingWaste, slotSizeRoundingWaste);
576
577 // an entry consumes at least one slot; round up to reduce false warnings
578 const int64_t blockSize = static_cast<int64_t>(slotSize);
579 const int64_t maxObjSize = max(blockSize,
580 ((maxObjectSize()+blockSize-1)/blockSize)*blockSize);
581
582 // Does the "sfileno*max-size" limit match configured db capacity?
583 const double entriesMayOccupy = entryLimitAbsolute()*static_cast<double>(maxObjSize);
584 if (entriesMayOccupy + maxRoundingWaste < maxSize()) {
585 const int64_t diskWasteSize = maxSize() - static_cast<int64_t>(entriesMayOccupy);
586 debugs(47, DBG_CRITICAL, "WARNING: Rock cache_dir " << path << " wastes disk space due to entry limits:" <<
587 "\n\tconfigured db capacity: " << maxSize() << " bytes" <<
588 "\n\tconfigured db slot size: " << slotSize << " bytes" <<
589 "\n\tconfigured maximum entry size: " << maxObjectSize() << " bytes" <<
590 "\n\tmaximum number of cache_dir entries supported by Squid: " << entryLimitAbsolute() <<
591 "\n\tdisk space all entries may use: " << entriesMayOccupy << " bytes" <<
592 "\n\tdisk space wasted: " << diskWasteSize << " bytes");
593 }
594
595 // Does the "absolute slot count" limit match configured db capacity?
596 const double slotsMayOccupy = slotLimitAbsolute()*static_cast<double>(slotSize);
597 if (slotsMayOccupy + maxRoundingWaste < maxSize()) {
598 const int64_t diskWasteSize = maxSize() - static_cast<int64_t>(entriesMayOccupy);
599 debugs(47, DBG_CRITICAL, "WARNING: Rock cache_dir " << path << " wastes disk space due to slot limits:" <<
600 "\n\tconfigured db capacity: " << maxSize() << " bytes" <<
601 "\n\tconfigured db slot size: " << slotSize << " bytes" <<
602 "\n\tmaximum number of rock cache_dir slots supported by Squid: " << slotLimitAbsolute() <<
603 "\n\tdisk space all slots may use: " << slotsMayOccupy << " bytes" <<
604 "\n\tdisk space wasted: " << diskWasteSize << " bytes");
605 }
606 }
607
608 void
609 Rock::SwapDir::rebuild()
610 {
611 //++StoreController::store_dirs_rebuilding; // see Rock::SwapDir::init()
612 AsyncJob::Start(new Rebuild(this));
613 }
614
615 bool
616 Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const
617 {
618 if (diskSpaceNeeded >= 0)
619 diskSpaceNeeded += sizeof(DbCellHeader);
620 if (!::SwapDir::canStore(e, diskSpaceNeeded, load))
621 return false;
622
623 if (!theFile || !theFile->canWrite())
624 return false;
625
626 if (!map)
627 return false;
628
629 // Do not start I/O transaction if there are less than 10% free pages left.
630 // TODO: reserve page instead
631 if (needsDiskStrand() &&
632 Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) {
633 debugs(47, 5, HERE << "too few shared pages for IPC I/O left");
634 return false;
635 }
636
637 if (io->shedLoad())
638 return false;
639
640 load = io->load();
641 return true;
642 }
643
644 StoreIOState::Pointer
645 Rock::SwapDir::createStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
646 {
647 if (!theFile || theFile->error()) {
648 debugs(47,4, HERE << theFile);
649 return NULL;
650 }
651
652 sfileno filen;
653 Ipc::StoreMapAnchor *const slot =
654 map->openForWriting(reinterpret_cast<const cache_key *>(e.key), filen);
655 if (!slot) {
656 debugs(47, 5, HERE << "map->add failed");
657 return NULL;
658 }
659
660 assert(filen >= 0);
661 slot->set(e);
662
663 // XXX: We rely on our caller, storeSwapOutStart(), to set e.fileno.
664 // If that does not happen, the entry will not decrement the read level!
665
666 Rock::SwapDir::Pointer self(this);
667 IoState *sio = new IoState(self, &e, cbFile, cbIo, data);
668
669 sio->swap_dirn = index;
670 sio->swap_filen = filen;
671 sio->writeableAnchor_ = slot;
672
673 debugs(47,5, HERE << "dir " << index << " created new filen " <<
674 std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
675 sio->swap_filen << std::dec << " starting at " <<
676 diskOffset(sio->swap_filen));
677
678 sio->file(theFile);
679
680 trackReferences(e);
681 return sio;
682 }
683
684 StoreIOState::Pointer
685 Rock::SwapDir::createUpdateIO(const Ipc::StoreMapUpdate &update, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
686 {
687 if (!theFile || theFile->error()) {
688 debugs(47,4, theFile);
689 return nullptr;
690 }
691
692 Must(update.fresh);
693 Must(update.fresh.fileNo >= 0);
694
695 Rock::SwapDir::Pointer self(this);
696 IoState *sio = new IoState(self, update.entry, cbFile, cbIo, data);
697
698 sio->swap_dirn = index;
699 sio->swap_filen = update.fresh.fileNo;
700 sio->writeableAnchor_ = update.fresh.anchor;
701
702 debugs(47,5, "dir " << index << " updating filen " <<
703 std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
704 sio->swap_filen << std::dec << " starting at " <<
705 diskOffset(sio->swap_filen));
706
707 sio->file(theFile);
708 return sio;
709 }
710
711 int64_t
712 Rock::SwapDir::diskOffset(const SlotId sid) const
713 {
714 assert(sid >= 0);
715 return HeaderSize + slotSize*sid;
716 }
717
718 int64_t
719 Rock::SwapDir::diskOffset(Ipc::Mem::PageId &pageId) const
720 {
721 assert(pageId);
722 return diskOffset(pageId.number - 1);
723 }
724
725 int64_t
726 Rock::SwapDir::diskOffsetLimit() const
727 {
728 assert(map);
729 return diskOffset(map->sliceLimit());
730 }
731
732 bool
733 Rock::SwapDir::useFreeSlot(Ipc::Mem::PageId &pageId)
734 {
735 if (freeSlots->pop(pageId)) {
736 debugs(47, 5, "got a previously free slot: " << pageId);
737 return true;
738 }
739
740 // catch free slots delivered to noteFreeMapSlice()
741 assert(!waitingForPage);
742 waitingForPage = &pageId;
743 if (map->purgeOne()) {
744 assert(!waitingForPage); // noteFreeMapSlice() should have cleared it
745 assert(pageId.set());
746 debugs(47, 5, "got a previously busy slot: " << pageId);
747 return true;
748 }
749 assert(waitingForPage == &pageId);
750 waitingForPage = NULL;
751
752 debugs(47, 3, "cannot get a slot; entries: " << map->entryCount());
753 return false;
754 }
755
756 bool
757 Rock::SwapDir::validSlotId(const SlotId slotId) const
758 {
759 return 0 <= slotId && slotId < slotLimitActual();
760 }
761
762 void
763 Rock::SwapDir::noteFreeMapSlice(const Ipc::StoreMapSliceId sliceId)
764 {
765 Ipc::Mem::PageId pageId;
766 pageId.pool = index+1;
767 pageId.number = sliceId+1;
768 if (waitingForPage) {
769 *waitingForPage = pageId;
770 waitingForPage = NULL;
771 } else {
772 freeSlots->push(pageId);
773 }
774 }
775
776 // tries to open an old entry with swap_filen for reading
777 StoreIOState::Pointer
778 Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
779 {
780 if (!theFile || theFile->error()) {
781 debugs(47,4, HERE << theFile);
782 return NULL;
783 }
784
785 if (e.swap_filen < 0) {
786 debugs(47,4, HERE << e);
787 return NULL;
788 }
789
790 // Do not start I/O transaction if there are less than 10% free pages left.
791 // TODO: reserve page instead
792 if (needsDiskStrand() &&
793 Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) {
794 debugs(47, 5, HERE << "too few shared pages for IPC I/O left");
795 return NULL;
796 }
797
798 // The are two ways an entry can get swap_filen: our get() locked it for
799 // reading or our storeSwapOutStart() locked it for writing. Peeking at our
800 // locked entry is safe, but no support for reading the entry we swap out.
801 const Ipc::StoreMapAnchor *slot = map->peekAtReader(e.swap_filen);
802 if (!slot)
803 return NULL; // we were writing afterall
804
805 Rock::SwapDir::Pointer self(this);
806 IoState *sio = new IoState(self, &e, cbFile, cbIo, data);
807
808 sio->swap_dirn = index;
809 sio->swap_filen = e.swap_filen;
810 sio->readableAnchor_ = slot;
811 sio->file(theFile);
812
813 debugs(47,5, HERE << "dir " << index << " has old filen: " <<
814 std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
815 sio->swap_filen);
816
817 assert(slot->sameKey(static_cast<const cache_key*>(e.key)));
818 // For collapsed disk hits: e.swap_file_sz and slot->basics.swap_file_sz
819 // may still be zero and basics.swap_file_sz may grow.
820 assert(slot->basics.swap_file_sz >= e.swap_file_sz);
821
822 return sio;
823 }
824
825 void
826 Rock::SwapDir::ioCompletedNotification()
827 {
828 if (!theFile)
829 fatalf("Rock cache_dir failed to initialize db file: %s", filePath);
830
831 if (theFile->error()) {
832 int xerrno = errno; // XXX: where does errno come from
833 fatalf("Rock cache_dir at %s failed to open db file: %s", filePath,
834 xstrerr(xerrno));
835 }
836
837 debugs(47, 2, "Rock cache_dir[" << index << "] limits: " <<
838 std::setw(12) << maxSize() << " disk bytes, " <<
839 std::setw(7) << map->entryLimit() << " entries, and " <<
840 std::setw(7) << map->sliceLimit() << " slots");
841
842 rebuild();
843 }
844
845 void
846 Rock::SwapDir::closeCompleted()
847 {
848 theFile = NULL;
849 }
850
851 void
852 Rock::SwapDir::readCompleted(const char *, int rlen, int errflag, RefCount< ::ReadRequest> r)
853 {
854 ReadRequest *request = dynamic_cast<Rock::ReadRequest*>(r.getRaw());
855 assert(request);
856 IoState::Pointer sio = request->sio;
857
858 if (errflag == DISK_OK && rlen > 0)
859 sio->offset_ += rlen;
860
861 sio->callReaderBack(r->buf, rlen);
862 }
863
864 void
865 Rock::SwapDir::writeCompleted(int errflag, size_t, RefCount< ::WriteRequest> r)
866 {
867 Rock::WriteRequest *request = dynamic_cast<Rock::WriteRequest*>(r.getRaw());
868 assert(request);
869 assert(request->sio != NULL);
870 IoState &sio = *request->sio;
871
872 // quit if somebody called IoState::close() while we were waiting
873 if (!sio.stillWaiting()) {
874 debugs(79, 3, "ignoring closed entry " << sio.swap_filen);
875 noteFreeMapSlice(request->sidNext);
876 return;
877 }
878
879 debugs(79, 7, "errflag=" << errflag << " rlen=" << request->len << " eof=" << request->eof);
880
881 // TODO: Fail if disk dropped one of the previous write requests.
882
883 if (errflag == DISK_OK) {
884 // do not increment sio.offset_ because we do it in sio->write()
885
886 // finalize the shared slice info after writing slice contents to disk
887 Ipc::StoreMap::Slice &slice =
888 map->writeableSlice(sio.swap_filen, request->sidCurrent);
889 slice.size = request->len - sizeof(DbCellHeader);
890 slice.next = request->sidNext;
891
892 if (request->eof) {
893 assert(sio.e);
894 assert(sio.writeableAnchor_);
895 if (sio.touchingStoreEntry()) {
896 sio.e->swap_file_sz = sio.writeableAnchor_->basics.swap_file_sz =
897 sio.offset_;
898
899 // close, the entry gets the read lock
900 map->closeForWriting(sio.swap_filen, true);
901 }
902 sio.writeableAnchor_ = NULL;
903 sio.splicingPoint = request->sidCurrent;
904 sio.finishedWriting(errflag);
905 }
906 } else {
907 noteFreeMapSlice(request->sidNext);
908
909 writeError(sio);
910 sio.finishedWriting(errflag);
911 // and hope that Core will call disconnect() to close the map entry
912 }
913
914 if (sio.touchingStoreEntry())
915 CollapsedForwarding::Broadcast(*sio.e);
916 }
917
918 void
919 Rock::SwapDir::writeError(StoreIOState &sio)
920 {
921 // Do not abortWriting here. The entry should keep the write lock
922 // instead of losing association with the store and confusing core.
923 map->freeEntry(sio.swap_filen); // will mark as unusable, just in case
924
925 if (sio.touchingStoreEntry())
926 Store::Root().transientsAbandon(*sio.e);
927 // else noop: a fresh entry update error does not affect stale entry readers
928
929 // All callers must also call IoState callback, to propagate the error.
930 }
931
932 void
933 Rock::SwapDir::updateHeaders(StoreEntry *updatedE)
934 {
935 if (!map)
936 return;
937
938 Ipc::StoreMapUpdate update(updatedE);
939 if (!map->openForUpdating(update, updatedE->swap_filen))
940 return;
941
942 try {
943 AsyncJob::Start(new HeaderUpdater(this, update));
944 } catch (const std::exception &ex) {
945 debugs(20, 2, "error starting to update entry " << *updatedE << ": " << ex.what());
946 map->abortUpdating(update);
947 }
948 }
949
950 bool
951 Rock::SwapDir::full() const
952 {
953 return freeSlots != NULL && !freeSlots->size();
954 }
955
956 // storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT,
957 // but it should not happen for us
958 void
959 Rock::SwapDir::diskFull()
960 {
961 debugs(20, DBG_IMPORTANT, "BUG: No space left with rock cache_dir: " <<
962 filePath);
963 }
964
965 /// purge while full(); it should be sufficient to purge just one
966 void
967 Rock::SwapDir::maintain()
968 {
969 // The Store calls this to free some db space, but there is nothing wrong
970 // with a full() db, except when db has to shrink after reconfigure, and
971 // we do not support shrinking yet (it would have to purge specific slots).
972 // TODO: Disable maintain() requests when they are pointless.
973 }
974
975 void
976 Rock::SwapDir::reference(StoreEntry &e)
977 {
978 debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
979 if (repl && repl->Referenced)
980 repl->Referenced(repl, &e, &e.repl);
981 }
982
983 bool
984 Rock::SwapDir::dereference(StoreEntry &e)
985 {
986 debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
987 if (repl && repl->Dereferenced)
988 repl->Dereferenced(repl, &e, &e.repl);
989
990 // no need to keep e in the global store_table for us; we have our own map
991 return false;
992 }
993
994 bool
995 Rock::SwapDir::unlinkdUseful() const
996 {
997 // no entry-specific files to unlink
998 return false;
999 }
1000
1001 void
1002 Rock::SwapDir::unlink(StoreEntry &e)
1003 {
1004 debugs(47, 5, HERE << e);
1005 ignoreReferences(e);
1006 map->freeEntry(e.swap_filen);
1007 disconnect(e);
1008 }
1009
1010 void
1011 Rock::SwapDir::markForUnlink(StoreEntry &e)
1012 {
1013 debugs(47, 5, e);
1014 map->freeEntry(e.swap_filen);
1015 }
1016
1017 void
1018 Rock::SwapDir::trackReferences(StoreEntry &e)
1019 {
1020 debugs(47, 5, HERE << e);
1021 if (repl)
1022 repl->Add(repl, &e, &e.repl);
1023 }
1024
1025 void
1026 Rock::SwapDir::ignoreReferences(StoreEntry &e)
1027 {
1028 debugs(47, 5, HERE << e);
1029 if (repl)
1030 repl->Remove(repl, &e, &e.repl);
1031 }
1032
1033 void
1034 Rock::SwapDir::statfs(StoreEntry &e) const
1035 {
1036 storeAppendPrintf(&e, "\n");
1037 storeAppendPrintf(&e, "Maximum Size: %" PRIu64 " KB\n", maxSize() >> 10);
1038 storeAppendPrintf(&e, "Current Size: %.2f KB %.2f%%\n",
1039 currentSize() / 1024.0,
1040 Math::doublePercent(currentSize(), maxSize()));
1041
1042 const int entryLimit = entryLimitActual();
1043 const int slotLimit = slotLimitActual();
1044 storeAppendPrintf(&e, "Maximum entries: %9d\n", entryLimit);
1045 if (map && entryLimit > 0) {
1046 const int entryCount = map->entryCount();
1047 storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n",
1048 entryCount, (100.0 * entryCount / entryLimit));
1049 }
1050
1051 storeAppendPrintf(&e, "Maximum slots: %9d\n", slotLimit);
1052 if (map && slotLimit > 0) {
1053 const unsigned int slotsFree = !freeSlots ? 0 : freeSlots->size();
1054 if (slotsFree <= static_cast<const unsigned int>(slotLimit)) {
1055 const int usedSlots = slotLimit - static_cast<const int>(slotsFree);
1056 storeAppendPrintf(&e, "Used slots: %9d %.2f%%\n",
1057 usedSlots, (100.0 * usedSlots / slotLimit));
1058 }
1059 if (slotLimit < 100) { // XXX: otherwise too expensive to count
1060 Ipc::ReadWriteLockStats stats;
1061 map->updateStats(stats);
1062 stats.dump(e);
1063 }
1064 }
1065
1066 storeAppendPrintf(&e, "Pending operations: %d out of %d\n",
1067 store_open_disk_fd, Config.max_open_disk_fds);
1068
1069 storeAppendPrintf(&e, "Flags:");
1070
1071 if (flags.selected)
1072 storeAppendPrintf(&e, " SELECTED");
1073
1074 if (flags.read_only)
1075 storeAppendPrintf(&e, " READ-ONLY");
1076
1077 storeAppendPrintf(&e, "\n");
1078
1079 }
1080
1081 SBuf
1082 Rock::SwapDir::inodeMapPath() const
1083 {
1084 return Ipc::Mem::Segment::Name(SBuf(path), "map");
1085 }
1086
1087 const char *
1088 Rock::SwapDir::freeSlotsPath() const
1089 {
1090 static String spacesPath;
1091 spacesPath = path;
1092 spacesPath.append("_spaces");
1093 return spacesPath.termedBuf();
1094 }
1095
1096 namespace Rock
1097 {
1098 RunnerRegistrationEntry(SwapDirRr);
1099 }
1100
1101 void Rock::SwapDirRr::create()
1102 {
1103 Must(mapOwners.empty() && freeSlotsOwners.empty());
1104 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
1105 if (const Rock::SwapDir *const sd = dynamic_cast<Rock::SwapDir *>(INDEXSD(i))) {
1106 const int64_t capacity = sd->slotLimitActual();
1107
1108 SwapDir::DirMap::Owner *const mapOwner =
1109 SwapDir::DirMap::Init(sd->inodeMapPath(), capacity);
1110 mapOwners.push_back(mapOwner);
1111
1112 // TODO: somehow remove pool id and counters from PageStack?
1113 Ipc::Mem::Owner<Ipc::Mem::PageStack> *const freeSlotsOwner =
1114 shm_new(Ipc::Mem::PageStack)(sd->freeSlotsPath(),
1115 i+1, capacity, 0);
1116 freeSlotsOwners.push_back(freeSlotsOwner);
1117
1118 // TODO: add method to initialize PageStack with no free pages
1119 while (true) {
1120 Ipc::Mem::PageId pageId;
1121 if (!freeSlotsOwner->object()->pop(pageId))
1122 break;
1123 }
1124 }
1125 }
1126 }
1127
1128 Rock::SwapDirRr::~SwapDirRr()
1129 {
1130 for (size_t i = 0; i < mapOwners.size(); ++i) {
1131 delete mapOwners[i];
1132 delete freeSlotsOwners[i];
1133 }
1134 }
1135