]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs/rock/RockSwapDir.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / fs / rock / RockSwapDir.cc
CommitLineData
e2851fe7 1/*
bbc27441
AJ
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
e2851fe7
AR
7 */
8
bbc27441
AJ
9/* DEBUG: section 47 Store Directory Routines */
10
f7f3304a 11#include "squid.h"
8a01b99e 12#include "cache_cf.h"
807feb1d 13#include "CollapsedForwarding.h"
43ebbac3 14#include "ConfigOption.h"
e2851fe7
AR
15#include "DiskIO/DiskIOModule.h"
16#include "DiskIO/DiskIOStrategy.h"
17#include "DiskIO/ReadRequest.h"
18#include "DiskIO/WriteRequest.h"
e2851fe7 19#include "fs/rock/RockIoRequests.h"
602d9612 20#include "fs/rock/RockIoState.h"
e2851fe7 21#include "fs/rock/RockRebuild.h"
602d9612 22#include "fs/rock/RockSwapDir.h"
67679543 23#include "globals.h"
e0bdae60 24#include "ipc/mem/Pages.h"
f5adb654
AR
25#include "MemObject.h"
26#include "Parsing.h"
4d5904f7 27#include "SquidConfig.h"
f5adb654 28#include "SquidMath.h"
5bed43d6
FC
29#include "tools.h"
30
58373ff8 31#include <cstdlib>
f5adb654 32#include <iomanip>
36c84e19 33#include <limits>
e2851fe7 34
582c2af2
FC
35#if HAVE_SYS_STAT_H
36#include <sys/stat.h>
37#endif
38
e2851fe7
AR
39const int64_t Rock::SwapDir::HeaderSize = 16*1024;
40
9d4e9cfb
AR
41Rock::SwapDir::SwapDir(): ::SwapDir("rock"),
42 slotSize(HeaderSize), filePath(NULL), map(NULL), io(NULL),
43 waitingForPage(NULL)
e2851fe7
AR
44{
45}
46
47Rock::SwapDir::~SwapDir()
48{
49 delete io;
f1eaa254 50 delete map;
e2851fe7
AR
51 safe_free(filePath);
52}
53
54StoreSearch *
55Rock::SwapDir::search(String const url, HttpRequest *)
56{
9199139f
AR
57 assert(false);
58 return NULL; // XXX: implement
e2851fe7
AR
59}
60
79672f4f
AR
61void
62Rock::SwapDir::get(String const key, STOREGETCLIENT cb, void *data)
63{
64 ::SwapDir::get(key, cb, data);
65}
66
f1debb5e
DK
67// called when Squid core needs a StoreEntry with a given key
68StoreEntry *
69Rock::SwapDir::get(const cache_key *key)
70{
0a11e039 71 if (!map || !theFile || !theFile->canRead())
8abe1173
DK
72 return NULL;
73
5b3ea321 74 sfileno filen;
50dc81ec 75 const Ipc::StoreMapAnchor *const slot = map->openForReading(key, filen);
44c95fcf 76 if (!slot)
f1debb5e
DK
77 return NULL;
78
79 // create a brand new store entry and initialize it with stored basics
80 StoreEntry *e = new StoreEntry();
ce49546e
AR
81 anchorEntry(*e, filen, *slot);
82
e1825c5d 83 e->hashInsert(key);
f1debb5e
DK
84 trackReferences(*e);
85
86 return e;
87 // the disk entry remains open for reading, protected from modifications
88}
89
ce49546e 90bool
4475555f 91Rock::SwapDir::anchorCollapsed(StoreEntry &collapsed, bool &inSync)
ce49546e 92{
5296bbd9 93 if (!map || !theFile || !theFile->canRead())
ce49546e
AR
94 return false;
95
96 sfileno filen;
97 const Ipc::StoreMapAnchor *const slot = map->openForReading(
9d4e9cfb 98 reinterpret_cast<cache_key*>(collapsed.key), filen);
ce49546e
AR
99 if (!slot)
100 return false;
101
102 anchorEntry(collapsed, filen, *slot);
4475555f 103 inSync = updateCollapsedWith(collapsed, *slot);
2912daee 104 return true; // even if inSync is false
ce49546e
AR
105}
106
107bool
108Rock::SwapDir::updateCollapsed(StoreEntry &collapsed)
109{
110 if (!map || !theFile || !theFile->canRead())
111 return false;
112
113 if (collapsed.swap_filen < 0) // no longer using a disk cache
114 return true;
115 assert(collapsed.swap_dirn == index);
116
117 const Ipc::StoreMapAnchor &s = map->readableEntry(collapsed.swap_filen);
118 return updateCollapsedWith(collapsed, s);
119}
120
121bool
122Rock::SwapDir::updateCollapsedWith(StoreEntry &collapsed, const Ipc::StoreMapAnchor &anchor)
123{
e6d2c263 124 collapsed.swap_file_sz = anchor.basics.swap_file_sz;
ce49546e
AR
125 return true;
126}
127
128void
129Rock::SwapDir::anchorEntry(StoreEntry &e, const sfileno filen, const Ipc::StoreMapAnchor &anchor)
130{
131 const Ipc::StoreMapAnchor::Basics &basics = anchor.basics;
132
133 e.swap_file_sz = basics.swap_file_sz;
ce49546e
AR
134 e.lastref = basics.lastref;
135 e.timestamp = basics.timestamp;
136 e.expires = basics.expires;
137 e.lastmod = basics.lastmod;
138 e.refcount = basics.refcount;
139 e.flags = basics.flags;
140
2912daee
AR
141 if (anchor.complete()) {
142 e.store_status = STORE_OK;
143 e.swap_status = SWAPOUT_DONE;
144 } else {
145 e.store_status = STORE_PENDING;
146 e.swap_status = SWAPOUT_WRITING; // even though another worker writes?
147 }
148
ce49546e
AR
149 e.ping_status = PING_NONE;
150
ce49546e
AR
151 EBIT_CLR(e.flags, RELEASE_REQUEST);
152 EBIT_CLR(e.flags, KEY_PRIVATE);
153 EBIT_SET(e.flags, ENTRY_VALIDATED);
2912daee
AR
154
155 e.swap_dirn = index;
156 e.swap_filen = filen;
ce49546e
AR
157}
158
f58bb2f4 159void Rock::SwapDir::disconnect(StoreEntry &e)
6d8d05b5 160{
f58bb2f4 161 assert(e.swap_dirn == index);
6d8d05b5 162 assert(e.swap_filen >= 0);
f58bb2f4
AR
163 // cannot have SWAPOUT_NONE entry with swap_filen >= 0
164 assert(e.swap_status != SWAPOUT_NONE);
165
166 // do not rely on e.swap_status here because there is an async delay
167 // before it switches from SWAPOUT_WRITING to SWAPOUT_DONE.
168
4475555f 169 // since e has swap_filen, its slot is locked for reading and/or writing
49769258
AR
170 // but it is difficult to know whether THIS worker is reading or writing e,
171 // especially since we may switch from writing to reading. This code relies
172 // on Rock::IoState::writeableAnchor_ being set when we locked for writing.
173 if (e.mem_obj && e.mem_obj->swapout.sio != NULL &&
9d4e9cfb 174 dynamic_cast<IoState&>(*e.mem_obj->swapout.sio).writeableAnchor_) {
4475555f 175 map->abortWriting(e.swap_filen);
2912daee
AR
176 e.swap_dirn = -1;
177 e.swap_filen = -1;
178 e.swap_status = SWAPOUT_NONE;
49769258 179 dynamic_cast<IoState&>(*e.mem_obj->swapout.sio).writeableAnchor_ = NULL;
2912daee
AR
180 Store::Root().transientsAbandon(e); // broadcasts after the change
181 } else {
4475555f 182 map->closeForReading(e.swap_filen);
2912daee
AR
183 e.swap_dirn = -1;
184 e.swap_filen = -1;
185 e.swap_status = SWAPOUT_NONE;
186 }
6d8d05b5
DK
187}
188
39c1e1d9
DK
189uint64_t
190Rock::SwapDir::currentSize() const
191{
50dc81ec 192 const uint64_t spaceSize = !freeSlots ?
9d4e9cfb 193 maxSize() : (slotSize * freeSlots->size());
e51ce7da
AR
194 // everything that is not free is in use
195 return maxSize() - spaceSize;
39c1e1d9
DK
196}
197
198uint64_t
199Rock::SwapDir::currentCount() const
200{
201 return map ? map->entryCount() : 0;
202}
203
204/// In SMP mode only the disker process reports stats to avoid
205/// counting the same stats by multiple processes.
206bool
207Rock::SwapDir::doReportStat() const
208{
209 return ::SwapDir::doReportStat() && (!UsingSmp() || IamDiskProcess());
210}
211
da9d3191
DK
212void
213Rock::SwapDir::swappedOut(const StoreEntry &)
214{
215 // stats are not stored but computed when needed
216}
217
b3165da6 218int64_t
36c84e19 219Rock::SwapDir::slotLimitAbsolute() const
b3165da6 220{
36c84e19
AR
221 // the max value is an invalid one; all values must be below the limit
222 assert(std::numeric_limits<Ipc::StoreMapSliceId>::max() ==
223 std::numeric_limits<SlotId>::max());
224 return std::numeric_limits<SlotId>::max();
225}
226
227int64_t
228Rock::SwapDir::slotLimitActual() const
229{
230 const int64_t sWanted = (maxSize() - HeaderSize)/slotSize;
231 const int64_t sLimitLo = map ? map->sliceLimit() : 0; // dynamic shrinking unsupported
232 const int64_t sLimitHi = slotLimitAbsolute();
233 return min(max(sLimitLo, sWanted), sLimitHi);
234}
235
236int64_t
237Rock::SwapDir::entryLimitActual() const
238{
239 return min(slotLimitActual(), entryLimitAbsolute());
b3165da6
DK
240}
241
73656056 242// TODO: encapsulate as a tool
e2851fe7
AR
243void
244Rock::SwapDir::create()
245{
246 assert(path);
247 assert(filePath);
248
984d890b
AR
249 if (UsingSmp() && !IamDiskProcess()) {
250 debugs (47,3, HERE << "disker will create in " << path);
251 return;
252 }
253
e2851fe7
AR
254 debugs (47,3, HERE << "creating in " << path);
255
04632397
AR
256 struct stat dir_sb;
257 if (::stat(path, &dir_sb) == 0) {
258 struct stat file_sb;
259 if (::stat(filePath, &file_sb) == 0) {
260 debugs (47, DBG_IMPORTANT, "Skipping existing Rock db: " << filePath);
261 return;
262 }
263 // else the db file is not there or is not accessible, and we will try
264 // to create it later below, generating a detailed error on failures.
265 } else { // path does not exist or is inaccessible
266 // If path exists but is not accessible, mkdir() below will fail, and
267 // the admin should see the error and act accordingly, so there is
268 // no need to distinguish ENOENT from other possible stat() errors.
51618c6a 269 debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path);
e2851fe7 270 const int res = mkdir(path, 0700);
413f00bd
AR
271 if (res != 0)
272 createError("mkdir");
9199139f 273 }
e2851fe7 274
04632397 275 debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath);
413f00bd
AR
276 const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600);
277 if (swap < 0)
278 createError("create");
279
e2851fe7 280#if SLOWLY_FILL_WITH_ZEROS
cc34568d
DK
281 char block[1024];
282 Must(maxSize() % sizeof(block) == 0);
e2851fe7
AR
283 memset(block, '\0', sizeof(block));
284
cc34568d 285 for (off_t offset = 0; offset < maxSize(); offset += sizeof(block)) {
413f00bd
AR
286 if (write(swap, block, sizeof(block)) != sizeof(block))
287 createError("write");
9199139f 288 }
e2851fe7 289#else
413f00bd
AR
290 if (ftruncate(swap, maxSize()) != 0)
291 createError("truncate");
e2851fe7
AR
292
293 char header[HeaderSize];
294 memset(header, '\0', sizeof(header));
413f00bd
AR
295 if (write(swap, header, sizeof(header)) != sizeof(header))
296 createError("write");
e2851fe7 297#endif
413f00bd
AR
298
299 close(swap);
300}
301
302// report Rock DB creation error and exit
303void
9d4e9cfb
AR
304Rock::SwapDir::createError(const char *const msg)
305{
413f00bd
AR
306 debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " <<
307 filePath << "; " << msg << " error: " << xstrerror());
308 fatal("Rock Store db creation error");
e2851fe7
AR
309}
310
311void
312Rock::SwapDir::init()
313{
314 debugs(47,2, HERE);
315
316 // XXX: SwapDirs aren't refcounted. We make IORequestor calls, which
317 // are refcounted. We up our count once to avoid implicit delete's.
8bf217bd 318 lock();
e2851fe7 319
d26fb333
AR
320 freeSlots = shm_old(Ipc::Mem::PageStack)(freeSlotsPath());
321
902df398 322 Must(!map);
300fd297 323 map = new DirMap(inodeMapPath());
50dc81ec 324 map->cleaner = this;
f7091279 325
3b581957 326 const char *ioModule = needsDiskStrand() ? "IpcIo" : "Blocking";
c03c2bad
AR
327 if (DiskIOModule *m = DiskIOModule::Find(ioModule)) {
328 debugs(47,2, HERE << "Using DiskIO module: " << ioModule);
329 io = m->createStrategy();
330 io->init();
331 } else {
51618c6a
AR
332 debugs(47, DBG_CRITICAL, "FATAL: Rock store is missing DiskIO module: " <<
333 ioModule);
c03c2bad
AR
334 fatal("Rock Store missing a required DiskIO module");
335 }
e2851fe7
AR
336
337 theFile = io->newFile(filePath);
43ebbac3 338 theFile->configure(fileConfig);
e2851fe7 339 theFile->open(O_RDWR, 0644, this);
078274f6
AR
340
341 // Increment early. Otherwise, if one SwapDir finishes rebuild before
342 // others start, storeRebuildComplete() will think the rebuild is over!
343 // TODO: move store_dirs_rebuilding hack to store modules that need it.
344 ++StoreController::store_dirs_rebuilding;
e2851fe7
AR
345}
346
14911a4e
AR
347bool
348Rock::SwapDir::needsDiskStrand() const
349{
3b581957
DK
350 const bool wontEvenWorkWithoutDisker = Config.workers > 1;
351 const bool wouldWorkBetterWithDisker = DiskIOModule::Find("IpcIo");
352 return InDaemonMode() && (wontEvenWorkWithoutDisker ||
e29ccb57 353 wouldWorkBetterWithDisker);
14911a4e
AR
354}
355
e2851fe7
AR
356void
357Rock::SwapDir::parse(int anIndex, char *aPath)
358{
359 index = anIndex;
360
361 path = xstrdup(aPath);
362
363 // cache store is located at path/db
364 String fname(path);
365 fname.append("/rock");
366 filePath = xstrdup(fname.termedBuf());
367
24063512 368 parseSize(false);
e2851fe7
AR
369 parseOptions(0);
370
f428c9c4
AR
371 // Current openForWriting() code overwrites the old slot if needed
372 // and possible, so proactively removing old slots is probably useless.
0e240235 373 assert(!repl); // repl = createRemovalPolicy(Config.replPolicy);
e2851fe7
AR
374
375 validateOptions();
376}
377
378void
c6059970 379Rock::SwapDir::reconfigure()
e2851fe7 380{
24063512 381 parseSize(true);
e2851fe7
AR
382 parseOptions(1);
383 // TODO: can we reconfigure the replacement policy (repl)?
384 validateOptions();
385}
386
387/// parse maximum db disk size
388void
9dca980d 389Rock::SwapDir::parseSize(const bool reconfig)
e2851fe7 390{
cc34568d
DK
391 const int i = GetInteger();
392 if (i < 0)
e2851fe7 393 fatal("negative Rock cache_dir size value");
24063512
DK
394 const uint64_t new_max_size =
395 static_cast<uint64_t>(i) << 20; // MBytes to Bytes
9dca980d 396 if (!reconfig)
24063512
DK
397 max_size = new_max_size;
398 else if (new_max_size != max_size) {
399 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir '" << path << "' size "
400 "cannot be changed dynamically, value left unchanged (" <<
401 (max_size >> 20) << " MB)");
402 }
e2851fe7
AR
403}
404
43ebbac3
AR
405ConfigOption *
406Rock::SwapDir::getOptionTree() const
407{
408 ConfigOptionVector *vector = dynamic_cast<ConfigOptionVector*>(::SwapDir::getOptionTree());
409 assert(vector);
e51ce7da 410 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseSizeOption, &SwapDir::dumpSizeOption));
43ebbac3 411 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseTimeOption, &SwapDir::dumpTimeOption));
df881a0f 412 vector->options.push_back(new ConfigOptionAdapter<SwapDir>(*const_cast<SwapDir *>(this), &SwapDir::parseRateOption, &SwapDir::dumpRateOption));
43ebbac3
AR
413 return vector;
414}
415
24063512
DK
416bool
417Rock::SwapDir::allowOptionReconfigure(const char *const option) const
418{
e51ce7da 419 return strcmp(option, "slot-size") != 0 &&
16fea83b 420 ::SwapDir::allowOptionReconfigure(option);
24063512
DK
421}
422
43ebbac3
AR
423/// parses time-specific options; mimics ::SwapDir::optionObjectSizeParse()
424bool
9dca980d 425Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig)
43ebbac3
AR
426{
427 // TODO: ::SwapDir or, better, Config should provide time-parsing routines,
e51ce7da 428 // including time unit handling. Same for size and rate.
43ebbac3
AR
429
430 time_msec_t *storedTime;
431 if (strcmp(option, "swap-timeout") == 0)
432 storedTime = &fileConfig.ioTimeout;
433 else
434 return false;
435
436 if (!value)
437 self_destruct();
438
df881a0f
AR
439 // TODO: handle time units and detect parsing errors better
440 const int64_t parsedValue = strtoll(value, NULL, 10);
441 if (parsedValue < 0) {
442 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue);
43ebbac3
AR
443 self_destruct();
444 }
445
df881a0f
AR
446 const time_msec_t newTime = static_cast<time_msec_t>(parsedValue);
447
9dca980d 448 if (!reconfig)
7846d084
DK
449 *storedTime = newTime;
450 else if (*storedTime != newTime) {
451 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
452 << " cannot be changed dynamically, value left unchanged: " <<
453 *storedTime);
454 }
43ebbac3
AR
455
456 return true;
457}
458
459/// reports time-specific options; mimics ::SwapDir::optionObjectSizeDump()
460void
461Rock::SwapDir::dumpTimeOption(StoreEntry * e) const
462{
463 if (fileConfig.ioTimeout)
c91ca3ce 464 storeAppendPrintf(e, " swap-timeout=%" PRId64,
43ebbac3
AR
465 static_cast<int64_t>(fileConfig.ioTimeout));
466}
467
df881a0f
AR
468/// parses rate-specific options; mimics ::SwapDir::optionObjectSizeParse()
469bool
470Rock::SwapDir::parseRateOption(char const *option, const char *value, int isaReconfig)
471{
472 int *storedRate;
473 if (strcmp(option, "max-swap-rate") == 0)
474 storedRate = &fileConfig.ioRate;
475 else
476 return false;
477
478 if (!value)
479 self_destruct();
480
481 // TODO: handle time units and detect parsing errors better
482 const int64_t parsedValue = strtoll(value, NULL, 10);
483 if (parsedValue < 0) {
484 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << parsedValue);
485 self_destruct();
486 }
487
488 const int newRate = static_cast<int>(parsedValue);
489
490 if (newRate < 0) {
491 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must not be negative but is: " << newRate);
492 self_destruct();
493 }
494
7846d084
DK
495 if (!isaReconfig)
496 *storedRate = newRate;
497 else if (*storedRate != newRate) {
498 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
499 << " cannot be changed dynamically, value left unchanged: " <<
500 *storedRate);
501 }
df881a0f
AR
502
503 return true;
504}
505
506/// reports rate-specific options; mimics ::SwapDir::optionObjectSizeDump()
507void
508Rock::SwapDir::dumpRateOption(StoreEntry * e) const
509{
510 if (fileConfig.ioRate >= 0)
511 storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate);
512}
513
e51ce7da
AR
514/// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse()
515bool
a57a662c 516Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig)
e51ce7da
AR
517{
518 uint64_t *storedSize;
519 if (strcmp(option, "slot-size") == 0)
520 storedSize = &slotSize;
521 else
522 return false;
523
524 if (!value)
525 self_destruct();
526
527 // TODO: handle size units and detect parsing errors better
528 const uint64_t newSize = strtoll(value, NULL, 10);
529 if (newSize <= 0) {
530 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize);
531 self_destruct();
532 }
533
534 if (newSize <= sizeof(DbCellHeader)) {
535 debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize);
536 self_destruct();
537 }
538
a57a662c 539 if (!reconfig)
e51ce7da
AR
540 *storedSize = newSize;
541 else if (*storedSize != newSize) {
542 debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option
543 << " cannot be changed dynamically, value left unchanged: " <<
544 *storedSize);
545 }
546
547 return true;
548}
549
550/// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump()
551void
552Rock::SwapDir::dumpSizeOption(StoreEntry * e) const
553{
554 storeAppendPrintf(e, " slot-size=%" PRId64, slotSize);
555}
556
e2851fe7
AR
557/// check the results of the configuration; only level-0 debugging works here
558void
559Rock::SwapDir::validateOptions()
560{
e51ce7da
AR
561 if (slotSize <= 0)
562 fatal("Rock store requires a positive slot-size");
e2851fe7 563
9dc492d0 564 const int64_t maxSizeRoundingWaste = 1024 * 1024; // size is configured in MB
e51ce7da 565 const int64_t slotSizeRoundingWaste = slotSize;
9dc492d0 566 const int64_t maxRoundingWaste =
e51ce7da 567 max(maxSizeRoundingWaste, slotSizeRoundingWaste);
36c84e19
AR
568
569 // an entry consumes at least one slot; round up to reduce false warnings
570 const int64_t blockSize = static_cast<int64_t>(slotSize);
571 const int64_t maxObjSize = max(blockSize,
572 ((maxObjectSize()+blockSize-1)/blockSize)*blockSize);
573
574 // Does the "sfileno*max-size" limit match configured db capacity?
575 const double entriesMayOccupy = entryLimitAbsolute()*static_cast<double>(maxObjSize);
576 if (entriesMayOccupy + maxRoundingWaste < maxSize()) {
577 const int64_t diskWasteSize = maxSize() - static_cast<int64_t>(entriesMayOccupy);
578 debugs(47, DBG_CRITICAL, "WARNING: Rock cache_dir " << path << " wastes disk space due to entry limits:" <<
579 "\n\tconfigured db capacity: " << maxSize() << " bytes" <<
580 "\n\tconfigured db slot size: " << slotSize << " bytes" <<
581 "\n\tconfigured maximum entry size: " << maxObjectSize() << " bytes" <<
582 "\n\tmaximum number of cache_dir entries supported by Squid: " << entryLimitAbsolute() <<
583 "\n\tdisk space all entries may use: " << entriesMayOccupy << " bytes" <<
584 "\n\tdisk space wasted: " << diskWasteSize << " bytes");
585 }
586
587 // Does the "absolute slot count" limit match configured db capacity?
588 const double slotsMayOccupy = slotLimitAbsolute()*static_cast<double>(slotSize);
589 if (slotsMayOccupy + maxRoundingWaste < maxSize()) {
590 const int64_t diskWasteSize = maxSize() - static_cast<int64_t>(entriesMayOccupy);
591 debugs(47, DBG_CRITICAL, "WARNING: Rock cache_dir " << path << " wastes disk space due to slot limits:" <<
592 "\n\tconfigured db capacity: " << maxSize() << " bytes" <<
593 "\n\tconfigured db slot size: " << slotSize << " bytes" <<
594 "\n\tmaximum number of rock cache_dir slots supported by Squid: " << slotLimitAbsolute() <<
595 "\n\tdisk space all slots may use: " << slotsMayOccupy << " bytes" <<
596 "\n\tdisk space wasted: " << diskWasteSize << " bytes");
9199139f 597 }
e2851fe7
AR
598}
599
600void
9199139f
AR
601Rock::SwapDir::rebuild()
602{
078274f6
AR
603 //++StoreController::store_dirs_rebuilding; // see Rock::SwapDir::init()
604 AsyncJob::Start(new Rebuild(this));
e2851fe7
AR
605}
606
c728b6f9
AR
607bool
608Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const
e2851fe7 609{
c728b6f9
AR
610 if (!::SwapDir::canStore(e, sizeof(DbCellHeader)+diskSpaceNeeded, load))
611 return false;
e2851fe7 612
c728b6f9
AR
613 if (!theFile || !theFile->canWrite())
614 return false;
8abe1173
DK
615
616 if (!map)
c728b6f9 617 return false;
8abe1173 618
e0f3492c 619 // Do not start I/O transaction if there are less than 10% free pages left.
551f8a18 620 // TODO: reserve page instead
7ef5aa64 621 if (needsDiskStrand() &&
e29ccb57 622 Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) {
551f8a18 623 debugs(47, 5, HERE << "too few shared pages for IPC I/O left");
e0bdae60
DK
624 return false;
625 }
626
e2851fe7 627 if (io->shedLoad())
c728b6f9 628 return false;
e2851fe7 629
c728b6f9
AR
630 load = io->load();
631 return true;
e2851fe7
AR
632}
633
634StoreIOState::Pointer
635Rock::SwapDir::createStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
636{
637 if (!theFile || theFile->error()) {
638 debugs(47,4, HERE << theFile);
639 return NULL;
640 }
641
5b3ea321 642 sfileno filen;
50dc81ec 643 Ipc::StoreMapAnchor *const slot =
5b3ea321 644 map->openForWriting(reinterpret_cast<const cache_key *>(e.key), filen);
44c95fcf 645 if (!slot) {
f5adb654 646 debugs(47, 5, HERE << "map->add failed");
f1eaa254 647 return NULL;
e2851fe7 648 }
93910d5c 649
50dc81ec 650 assert(filen >= 0);
44c95fcf 651 slot->set(e);
e2851fe7 652
c728b6f9 653 // XXX: We rely on our caller, storeSwapOutStart(), to set e.fileno.
dd7ac58b
AR
654 // If that does not happen, the entry will not decrement the read level!
655
50dc81ec
AR
656 Rock::SwapDir::Pointer self(this);
657 IoState *sio = new IoState(self, &e, cbFile, cbIo, data);
e2851fe7
AR
658
659 sio->swap_dirn = index;
5b3ea321 660 sio->swap_filen = filen;
50dc81ec 661 sio->writeableAnchor_ = slot;
e2851fe7 662
5b3ea321 663 debugs(47,5, HERE << "dir " << index << " created new filen " <<
9199139f 664 std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
50dc81ec 665 sio->swap_filen << std::dec << " starting at " <<
93910d5c 666 diskOffset(sio->swap_filen));
e2851fe7
AR
667
668 sio->file(theFile);
669
670 trackReferences(e);
671 return sio;
672}
673
674int64_t
36c84e19 675Rock::SwapDir::diskOffset(const SlotId sid) const
e2851fe7 676{
36c84e19
AR
677 assert(sid >= 0);
678 return HeaderSize + slotSize*sid;
93910d5c
AR
679}
680
681int64_t
682Rock::SwapDir::diskOffset(Ipc::Mem::PageId &pageId) const
683{
684 assert(pageId);
685 return diskOffset(pageId.number - 1);
e2851fe7
AR
686}
687
688int64_t
689Rock::SwapDir::diskOffsetLimit() const
690{
c728b6f9 691 assert(map);
36c84e19 692 return diskOffset(map->sliceLimit());
93910d5c
AR
693}
694
695bool
50dc81ec 696Rock::SwapDir::useFreeSlot(Ipc::Mem::PageId &pageId)
93910d5c 697{
50dc81ec
AR
698 if (freeSlots->pop(pageId)) {
699 debugs(47, 5, "got a previously free slot: " << pageId);
700 return true;
701 }
93910d5c 702
50dc81ec
AR
703 // catch free slots delivered to noteFreeMapSlice()
704 assert(!waitingForPage);
705 waitingForPage = &pageId;
706 if (map->purgeOne()) {
707 assert(!waitingForPage); // noteFreeMapSlice() should have cleared it
708 assert(pageId.set());
709 debugs(47, 5, "got a previously busy slot: " << pageId);
710 return true;
711 }
712 assert(waitingForPage == &pageId);
713 waitingForPage = NULL;
714
715 debugs(47, 3, "cannot get a slot; entries: " << map->entryCount());
716 return false;
93910d5c
AR
717}
718
50dc81ec
AR
719bool
720Rock::SwapDir::validSlotId(const SlotId slotId) const
93910d5c 721{
36c84e19 722 return 0 <= slotId && slotId < slotLimitActual();
93910d5c
AR
723}
724
725void
36c84e19 726Rock::SwapDir::noteFreeMapSlice(const Ipc::StoreMapSliceId sliceId)
50dc81ec
AR
727{
728 Ipc::Mem::PageId pageId;
729 pageId.pool = index+1;
730 pageId.number = sliceId+1;
731 if (waitingForPage) {
732 *waitingForPage = pageId;
733 waitingForPage = NULL;
734 } else {
735 freeSlots->push(pageId);
93910d5c
AR
736 }
737}
738
50dc81ec 739// tries to open an old entry with swap_filen for reading
e2851fe7
AR
740StoreIOState::Pointer
741Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data)
742{
743 if (!theFile || theFile->error()) {
744 debugs(47,4, HERE << theFile);
745 return NULL;
746 }
747
9199139f 748 if (e.swap_filen < 0) {
1adea2a6 749 debugs(47,4, HERE << e);
f1eaa254
DK
750 return NULL;
751 }
752
e0f3492c 753 // Do not start I/O transaction if there are less than 10% free pages left.
551f8a18 754 // TODO: reserve page instead
7ef5aa64 755 if (needsDiskStrand() &&
e29ccb57 756 Ipc::Mem::PageLevel(Ipc::Mem::PageId::ioPage) >= 0.9 * Ipc::Mem::PageLimit(Ipc::Mem::PageId::ioPage)) {
551f8a18 757 debugs(47, 5, HERE << "too few shared pages for IPC I/O left");
e0bdae60
DK
758 return NULL;
759 }
760
c728b6f9
AR
761 // The are two ways an entry can get swap_filen: our get() locked it for
762 // reading or our storeSwapOutStart() locked it for writing. Peeking at our
5296bbd9 763 // locked entry is safe, but no support for reading the entry we swap out.
50dc81ec 764 const Ipc::StoreMapAnchor *slot = map->peekAtReader(e.swap_filen);
44c95fcf 765 if (!slot)
c728b6f9 766 return NULL; // we were writing afterall
1adea2a6 767
50dc81ec
AR
768 Rock::SwapDir::Pointer self(this);
769 IoState *sio = new IoState(self, &e, cbFile, cbIo, data);
e2851fe7
AR
770
771 sio->swap_dirn = index;
772 sio->swap_filen = e.swap_filen;
50dc81ec
AR
773 sio->readableAnchor_ = slot;
774 sio->file(theFile);
c728b6f9 775
5b3ea321 776 debugs(47,5, HERE << "dir " << index << " has old filen: " <<
9199139f
AR
777 std::setfill('0') << std::hex << std::uppercase << std::setw(8) <<
778 sio->swap_filen);
e2851fe7 779
50dc81ec 780 assert(slot->sameKey(static_cast<const cache_key*>(e.key)));
5296bbd9
AR
781 // For collapsed disk hits: e.swap_file_sz and slot->basics.swap_file_sz
782 // may still be zero and basics.swap_file_sz may grow.
783 assert(slot->basics.swap_file_sz >= e.swap_file_sz);
e2851fe7 784
e2851fe7
AR
785 return sio;
786}
787
788void
789Rock::SwapDir::ioCompletedNotification()
790{
51618c6a 791 if (!theFile)
e2851fe7 792 fatalf("Rock cache_dir failed to initialize db file: %s", filePath);
e2851fe7 793
51618c6a
AR
794 if (theFile->error())
795 fatalf("Rock cache_dir at %s failed to open db file: %s", filePath,
796 xstrerror());
e2851fe7 797
51618c6a 798 debugs(47, 2, "Rock cache_dir[" << index << "] limits: " <<
36c84e19
AR
799 std::setw(12) << maxSize() << " disk bytes, " <<
800 std::setw(7) << map->entryLimit() << " entries, and " <<
801 std::setw(7) << map->sliceLimit() << " slots");
9cfba26c
AR
802
803 rebuild();
e2851fe7
AR
804}
805
806void
807Rock::SwapDir::closeCompleted()
808{
809 theFile = NULL;
810}
811
812void
813Rock::SwapDir::readCompleted(const char *buf, int rlen, int errflag, RefCount< ::ReadRequest> r)
814{
815 ReadRequest *request = dynamic_cast<Rock::ReadRequest*>(r.getRaw());
816 assert(request);
817 IoState::Pointer sio = request->sio;
818
c728b6f9
AR
819 if (errflag == DISK_OK && rlen > 0)
820 sio->offset_ += rlen;
e2851fe7 821
5296bbd9 822 sio->callReaderBack(r->buf, rlen);
e2851fe7
AR
823}
824
825void
826Rock::SwapDir::writeCompleted(int errflag, size_t rlen, RefCount< ::WriteRequest> r)
827{
828 Rock::WriteRequest *request = dynamic_cast<Rock::WriteRequest*>(r.getRaw());
829 assert(request);
830 assert(request->sio != NULL);
831 IoState &sio = *request->sio;
1adea2a6 832
50dc81ec
AR
833 // quit if somebody called IoState::close() while we were waiting
834 if (!sio.stillWaiting()) {
835 debugs(79, 3, "ignoring closed entry " << sio.swap_filen);
5296bbd9 836 noteFreeMapSlice(request->sidNext);
50dc81ec
AR
837 return;
838 }
839
5296bbd9
AR
840 // TODO: Fail if disk dropped one of the previous write requests.
841
f58bb2f4 842 if (errflag == DISK_OK) {
c728b6f9 843 // do not increment sio.offset_ because we do it in sio->write()
ce49546e
AR
844
845 // finalize the shared slice info after writing slice contents to disk
846 Ipc::StoreMap::Slice &slice =
847 map->writeableSlice(sio.swap_filen, request->sidCurrent);
848 slice.size = request->len - sizeof(DbCellHeader);
849 slice.next = request->sidNext;
9d4e9cfb 850
5296bbd9
AR
851 if (request->eof) {
852 assert(sio.e);
853 assert(sio.writeableAnchor_);
854 sio.e->swap_file_sz = sio.writeableAnchor_->basics.swap_file_sz =
9d4e9cfb 855 sio.offset_;
5296bbd9 856
50dc81ec
AR
857 // close, the entry gets the read lock
858 map->closeForWriting(sio.swap_filen, true);
49769258 859 sio.writeableAnchor_ = NULL;
93910d5c 860 sio.finishedWriting(errflag);
50dc81ec
AR
861 }
862 } else {
5296bbd9
AR
863 noteFreeMapSlice(request->sidNext);
864
4475555f 865 writeError(*sio.e);
50dc81ec
AR
866 sio.finishedWriting(errflag);
867 // and hope that Core will call disconnect() to close the map entry
868 }
ce49546e 869
99921d9d 870 CollapsedForwarding::Broadcast(*sio.e);
93910d5c 871}
e2851fe7 872
93910d5c 873void
4475555f 874Rock::SwapDir::writeError(StoreEntry &e)
93910d5c
AR
875{
876 // Do not abortWriting here. The entry should keep the write lock
877 // instead of losing association with the store and confusing core.
4475555f
AR
878 map->freeEntry(e.swap_filen); // will mark as unusable, just in case
879
880 Store::Root().transientsAbandon(e);
881
50dc81ec 882 // All callers must also call IoState callback, to propagate the error.
e2851fe7
AR
883}
884
885bool
886Rock::SwapDir::full() const
887{
50dc81ec 888 return freeSlots != NULL && !freeSlots->size();
e2851fe7
AR
889}
890
e2851fe7
AR
891// storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT,
892// but it should not happen for us
893void
9199139f
AR
894Rock::SwapDir::diskFull()
895{
f5adb654
AR
896 debugs(20, DBG_IMPORTANT, "BUG: No space left with rock cache_dir: " <<
897 filePath);
e2851fe7
AR
898}
899
900/// purge while full(); it should be sufficient to purge just one
901void
902Rock::SwapDir::maintain()
903{
50dc81ec
AR
904 // The Store calls this to free some db space, but there is nothing wrong
905 // with a full() db, except when db has to shrink after reconfigure, and
906 // we do not support shrinking yet (it would have to purge specific slots).
907 // TODO: Disable maintain() requests when they are pointless.
e2851fe7
AR
908}
909
910void
911Rock::SwapDir::reference(StoreEntry &e)
912{
913 debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
0e240235 914 if (repl && repl->Referenced)
e2851fe7
AR
915 repl->Referenced(repl, &e, &e.repl);
916}
917
4c973beb 918bool
54347cbd 919Rock::SwapDir::dereference(StoreEntry &e, bool)
e2851fe7
AR
920{
921 debugs(47, 5, HERE << &e << ' ' << e.swap_dirn << ' ' << e.swap_filen);
0e240235 922 if (repl && repl->Dereferenced)
e2851fe7 923 repl->Dereferenced(repl, &e, &e.repl);
4c973beb
AR
924
925 // no need to keep e in the global store_table for us; we have our own map
926 return false;
e2851fe7
AR
927}
928
c521ad17
DK
929bool
930Rock::SwapDir::unlinkdUseful() const
931{
932 // no entry-specific files to unlink
933 return false;
934}
935
e2851fe7
AR
936void
937Rock::SwapDir::unlink(StoreEntry &e)
938{
f58bb2f4 939 debugs(47, 5, HERE << e);
e2851fe7 940 ignoreReferences(e);
50dc81ec 941 map->freeEntry(e.swap_filen);
f58bb2f4 942 disconnect(e);
e2851fe7
AR
943}
944
1bfe9ade
AR
945void
946Rock::SwapDir::markForUnlink(StoreEntry &e)
947{
948 debugs(47, 5, e);
949 map->freeEntry(e.swap_filen);
950}
951
e2851fe7
AR
952void
953Rock::SwapDir::trackReferences(StoreEntry &e)
954{
f58bb2f4 955 debugs(47, 5, HERE << e);
0e240235
AR
956 if (repl)
957 repl->Add(repl, &e, &e.repl);
e2851fe7
AR
958}
959
e2851fe7
AR
960void
961Rock::SwapDir::ignoreReferences(StoreEntry &e)
962{
f58bb2f4 963 debugs(47, 5, HERE << e);
0e240235
AR
964 if (repl)
965 repl->Remove(repl, &e, &e.repl);
e2851fe7
AR
966}
967
968void
969Rock::SwapDir::statfs(StoreEntry &e) const
970{
971 storeAppendPrintf(&e, "\n");
c91ca3ce 972 storeAppendPrintf(&e, "Maximum Size: %" PRIu64 " KB\n", maxSize() >> 10);
57f583f1 973 storeAppendPrintf(&e, "Current Size: %.2f KB %.2f%%\n",
cc34568d
DK
974 currentSize() / 1024.0,
975 Math::doublePercent(currentSize(), maxSize()));
e2851fe7 976
36c84e19
AR
977 const int entryLimit = entryLimitActual();
978 const int slotLimit = slotLimitActual();
979 storeAppendPrintf(&e, "Maximum entries: %9d\n", entryLimit);
980 if (map && entryLimit > 0) {
2da4bfe6
A
981 const int entryCount = map->entryCount();
982 storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n",
983 entryCount, (100.0 * entryCount / entryLimit));
36c84e19 984 }
c728b6f9 985
36c84e19
AR
986 storeAppendPrintf(&e, "Maximum slots: %9d\n", slotLimit);
987 if (map && slotLimit > 0) {
2da4bfe6
A
988 const unsigned int slotsFree = !freeSlots ? 0 : freeSlots->size();
989 if (slotsFree <= static_cast<const unsigned int>(slotLimit)) {
990 const int usedSlots = slotLimit - static_cast<const int>(slotsFree);
991 storeAppendPrintf(&e, "Used slots: %9d %.2f%%\n",
992 usedSlots, (100.0 * usedSlots / slotLimit));
993 }
994 if (slotLimit < 100) { // XXX: otherwise too expensive to count
995 Ipc::ReadWriteLockStats stats;
996 map->updateStats(stats);
997 stats.dump(e);
998 }
9199139f 999 }
e2851fe7
AR
1000
1001 storeAppendPrintf(&e, "Pending operations: %d out of %d\n",
9199139f 1002 store_open_disk_fd, Config.max_open_disk_fds);
e2851fe7
AR
1003
1004 storeAppendPrintf(&e, "Flags:");
1005
1006 if (flags.selected)
1007 storeAppendPrintf(&e, " SELECTED");
1008
1009 if (flags.read_only)
1010 storeAppendPrintf(&e, " READ-ONLY");
1011
1012 storeAppendPrintf(&e, "\n");
1013
1014}
902df398 1015
1860fbac 1016SBuf
9d4e9cfb
AR
1017Rock::SwapDir::inodeMapPath() const
1018{
1860fbac 1019 return Ipc::Mem::Segment::Name(SBuf(path), "map");
300fd297
AR
1020}
1021
1022const char *
9d4e9cfb
AR
1023Rock::SwapDir::freeSlotsPath() const
1024{
300fd297
AR
1025 static String spacesPath;
1026 spacesPath = path;
1027 spacesPath.append("_spaces");
1028 return spacesPath.termedBuf();
1029}
1030
9bb01611 1031namespace Rock
902df398 1032{
21b7990f 1033RunnerRegistrationEntry(SwapDirRr);
9bb01611 1034}
902df398 1035
21b7990f 1036void Rock::SwapDirRr::create()
902df398 1037{
50dc81ec 1038 Must(mapOwners.empty() && freeSlotsOwners.empty());
4404f1c5
DK
1039 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
1040 if (const Rock::SwapDir *const sd = dynamic_cast<Rock::SwapDir *>(INDEXSD(i))) {
36c84e19 1041 const int64_t capacity = sd->slotLimitActual();
e51ce7da 1042
93910d5c 1043 SwapDir::DirMap::Owner *const mapOwner =
300fd297 1044 SwapDir::DirMap::Init(sd->inodeMapPath(), capacity);
93910d5c
AR
1045 mapOwners.push_back(mapOwner);
1046
e6d2c263 1047 // TODO: somehow remove pool id and counters from PageStack?
50dc81ec
AR
1048 Ipc::Mem::Owner<Ipc::Mem::PageStack> *const freeSlotsOwner =
1049 shm_new(Ipc::Mem::PageStack)(sd->freeSlotsPath(),
636b913c 1050 i+1, capacity, 0);
50dc81ec 1051 freeSlotsOwners.push_back(freeSlotsOwner);
93910d5c 1052
e6d2c263 1053 // TODO: add method to initialize PageStack with no free pages
93910d5c
AR
1054 while (true) {
1055 Ipc::Mem::PageId pageId;
50dc81ec 1056 if (!freeSlotsOwner->object()->pop(pageId))
93910d5c
AR
1057 break;
1058 }
902df398
DK
1059 }
1060 }
1061}
1062
9bb01611 1063Rock::SwapDirRr::~SwapDirRr()
902df398 1064{
93910d5c
AR
1065 for (size_t i = 0; i < mapOwners.size(); ++i) {
1066 delete mapOwners[i];
50dc81ec 1067 delete freeSlotsOwners[i];
93910d5c 1068 }
902df398 1069}