]> git.ipfire.org Git - thirdparty/squid.git/blob - src/store/Disks.cc
Store API and layout polishing. No functionality changes intended.
[thirdparty/squid.git] / src / store / Disks.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 47 Store Directory Routines */
10
11 #include "squid.h"
12 #include "Debug.h"
13 #include "globals.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
16 #include "Store.h"
17 #include "store/Disk.h"
18 #include "store/Disks.h"
19 #include "swap_log_op.h"
20 #include "util.h" // for tvSubDsec() which should be in SquidTime.h
21
22 static STDIRSELECT storeDirSelectSwapDirRoundRobin;
23 static STDIRSELECT storeDirSelectSwapDirLeastLoad;
24 /*
25 * This function pointer is set according to 'store_dir_select_algorithm'
26 * in squid.conf.
27 */
28 STDIRSELECT *storeDirSelectSwapDir = storeDirSelectSwapDirLeastLoad;
29
30 /*
31 * This new selection scheme simply does round-robin on all SwapDirs.
32 * A SwapDir is skipped if it is over the max_size (100%) limit, or
33 * overloaded.
34 */
35 static int
36 storeDirSelectSwapDirRoundRobin(const StoreEntry * e)
37 {
38 // e->objectLen() is negative at this point when we are still STORE_PENDING
39 ssize_t objsize = e->mem_obj->expectedReplySize();
40 if (objsize != -1)
41 objsize += e->mem_obj->swap_hdr_sz;
42
43 // Increment the first candidate once per selection (not once per
44 // iteration) to reduce bias when some disk(s) attract more entries.
45 static int firstCandidate = 0;
46 if (++firstCandidate >= Config.cacheSwap.n_configured)
47 firstCandidate = 0;
48
49 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
50 const int dirn = (firstCandidate + i) % Config.cacheSwap.n_configured;
51 const SwapDir *sd = dynamic_cast<SwapDir*>(INDEXSD(dirn));
52
53 int load = 0;
54 if (!sd->canStore(*e, objsize, load))
55 continue;
56
57 if (load < 0 || load > 1000) {
58 continue;
59 }
60
61 return dirn;
62 }
63
64 return -1;
65 }
66
67 /*
68 * Spread load across all of the store directories
69 *
70 * Note: We should modify this later on to prefer sticking objects
71 * in the *tightest fit* swapdir to conserve space, along with the
72 * actual swapdir usage. But for now, this hack will do while
73 * testing, so you should order your swapdirs in the config file
74 * from smallest max-size= to largest max-size=.
75 *
76 * We also have to choose nleast == nconf since we need to consider
77 * ALL swapdirs, regardless of state. Again, this is a hack while
78 * we sort out the real usefulness of this algorithm.
79 */
80 static int
81 storeDirSelectSwapDirLeastLoad(const StoreEntry * e)
82 {
83 int64_t most_free = 0;
84 ssize_t least_objsize = -1;
85 int least_load = INT_MAX;
86 int load;
87 int dirn = -1;
88 int i;
89 RefCount<SwapDir> SD;
90
91 // e->objectLen() is negative at this point when we are still STORE_PENDING
92 ssize_t objsize = e->mem_obj->expectedReplySize();
93
94 if (objsize != -1)
95 objsize += e->mem_obj->swap_hdr_sz;
96
97 for (i = 0; i < Config.cacheSwap.n_configured; ++i) {
98 SD = dynamic_cast<SwapDir *>(INDEXSD(i));
99 SD->flags.selected = false;
100
101 if (!SD->canStore(*e, objsize, load))
102 continue;
103
104 if (load < 0 || load > 1000)
105 continue;
106
107 if (load > least_load)
108 continue;
109
110 const int64_t cur_free = SD->maxSize() - SD->currentSize();
111
112 /* If the load is equal, then look in more details */
113 if (load == least_load) {
114 /* closest max-size fit */
115
116 if (least_objsize != -1)
117 if (SD->maxObjectSize() > least_objsize)
118 continue;
119
120 /* most free */
121 if (cur_free < most_free)
122 continue;
123 }
124
125 least_load = load;
126 least_objsize = SD->maxObjectSize();
127 most_free = cur_free;
128 dirn = i;
129 }
130
131 if (dirn >= 0)
132 dynamic_cast<SwapDir *>(INDEXSD(dirn))->flags.selected = true;
133
134 return dirn;
135 }
136
137 SwapDir *
138 Store::Disks::store(int const x) const
139 {
140 return INDEXSD(x);
141 }
142
143 SwapDir &
144 Store::Disks::dir(const int i) const
145 {
146 SwapDir *sd = INDEXSD(i);
147 assert(sd);
148 return *sd;
149 }
150
151 int
152 Store::Disks::callback()
153 {
154 int result = 0;
155 int j;
156 static int ndir = 0;
157
158 do {
159 j = 0;
160
161 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
162 if (ndir >= Config.cacheSwap.n_configured)
163 ndir = ndir % Config.cacheSwap.n_configured;
164
165 int temp_result = store(ndir)->callback();
166
167 ++ndir;
168
169 j += temp_result;
170
171 result += temp_result;
172
173 if (j > 100)
174 fatal ("too much io\n");
175 }
176 } while (j > 0);
177
178 ++ndir;
179
180 return result;
181 }
182
183 void
184 Store::Disks::create()
185 {
186 if (Config.cacheSwap.n_configured == 0) {
187 debugs(0, DBG_PARSE_NOTE(DBG_CRITICAL), "No cache_dir stores are configured.");
188 }
189
190 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
191 if (dir(i).active())
192 store(i)->create();
193 }
194 }
195
196 StoreEntry *
197 Store::Disks::get(const cache_key *key)
198 {
199 if (const int cacheDirs = Config.cacheSwap.n_configured) {
200 // ask each cache_dir until the entry is found; use static starting
201 // point to avoid asking the same subset of disks more often
202 // TODO: coordinate with put() to be able to guess the right disk often
203 static int idx = 0;
204 for (int n = 0; n < cacheDirs; ++n) {
205 idx = (idx + 1) % cacheDirs;
206 SwapDir *sd = dynamic_cast<SwapDir*>(INDEXSD(idx));
207 if (!sd->active())
208 continue;
209
210 if (StoreEntry *e = sd->get(key)) {
211 debugs(20, 7, "cache_dir " << idx << " has: " << *e);
212 return e;
213 }
214 }
215 }
216
217 debugs(20, 6, "none of " << Config.cacheSwap.n_configured <<
218 " cache_dirs have " << storeKeyText(key));
219 return nullptr;
220 }
221
222 void
223 Store::Disks::init()
224 {
225 if (Config.Store.objectsPerBucket <= 0)
226 fatal("'store_objects_per_bucket' should be larger than 0.");
227
228 if (Config.Store.avgObjectSize <= 0)
229 fatal("'store_avg_object_size' should be larger than 0.");
230
231 /* Calculate size of hash table (maximum currently 64k buckets). */
232 /* this is very bogus, its specific to the any Store maintaining an
233 * in-core index, not global */
234 size_t buckets = (Store::Root().maxSize() + Config.memMaxSize) / Config.Store.avgObjectSize;
235 debugs(20, DBG_IMPORTANT, "Swap maxSize " << (Store::Root().maxSize() >> 10) <<
236 " + " << ( Config.memMaxSize >> 10) << " KB, estimated " << buckets << " objects");
237 buckets /= Config.Store.objectsPerBucket;
238 debugs(20, DBG_IMPORTANT, "Target number of buckets: " << buckets);
239 /* ideally the full scan period should be configurable, for the
240 * moment it remains at approximately 24 hours. */
241 store_hash_buckets = storeKeyHashBuckets(buckets);
242 debugs(20, DBG_IMPORTANT, "Using " << store_hash_buckets << " Store buckets");
243 debugs(20, DBG_IMPORTANT, "Max Mem size: " << ( Config.memMaxSize >> 10) << " KB" <<
244 (Config.memShared ? " [shared]" : ""));
245 debugs(20, DBG_IMPORTANT, "Max Swap size: " << (Store::Root().maxSize() >> 10) << " KB");
246
247 store_table = hash_create(storeKeyHashCmp,
248 store_hash_buckets, storeKeyHashHash);
249
250 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
251 /* this starts a search of the store dirs, loading their
252 * index. under the new Store api this should be
253 * driven by the StoreHashIndex, not by each store.
254 *
255 * That is, the HashIndex should perform a search of each dir it is
256 * indexing to do the hash insertions. The search is then able to
257 * decide 'from-memory', or 'from-clean-log' or 'from-dirty-log' or
258 * 'from-no-log'.
259 *
260 * Step 1: make the store rebuilds use a search internally
261 * Step 2: change the search logic to use the four modes described
262 * above
263 * Step 3: have the hash index walk the searches itself.
264 */
265 if (dir(i).active())
266 store(i)->init();
267 }
268
269 if (strcasecmp(Config.store_dir_select_algorithm, "round-robin") == 0) {
270 storeDirSelectSwapDir = storeDirSelectSwapDirRoundRobin;
271 debugs(47, DBG_IMPORTANT, "Using Round Robin store dir selection");
272 } else {
273 storeDirSelectSwapDir = storeDirSelectSwapDirLeastLoad;
274 debugs(47, DBG_IMPORTANT, "Using Least Load store dir selection");
275 }
276 }
277
278 uint64_t
279 Store::Disks::maxSize() const
280 {
281 uint64_t result = 0;
282
283 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
284 if (dir(i).doReportStat())
285 result += store(i)->maxSize();
286 }
287
288 return result;
289 }
290
291 uint64_t
292 Store::Disks::minSize() const
293 {
294 uint64_t result = 0;
295
296 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
297 if (dir(i).doReportStat())
298 result += store(i)->minSize();
299 }
300
301 return result;
302 }
303
304 uint64_t
305 Store::Disks::currentSize() const
306 {
307 uint64_t result = 0;
308
309 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
310 if (dir(i).doReportStat())
311 result += store(i)->currentSize();
312 }
313
314 return result;
315 }
316
317 uint64_t
318 Store::Disks::currentCount() const
319 {
320 uint64_t result = 0;
321
322 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
323 if (dir(i).doReportStat())
324 result += store(i)->currentCount();
325 }
326
327 return result;
328 }
329
330 int64_t
331 Store::Disks::maxObjectSize() const
332 {
333 int64_t result = -1;
334
335 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
336 if (dir(i).active() && store(i)->maxObjectSize() > result)
337 result = store(i)->maxObjectSize();
338 }
339
340 return result;
341 }
342
343 void
344 Store::Disks::getStats(StoreInfoStats &stats) const
345 {
346 // accumulate per-disk cache stats
347 for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
348 StoreInfoStats dirStats;
349 store(i)->getStats(dirStats);
350 stats += dirStats;
351 }
352
353 // common to all disks
354 stats.swap.open_disk_fd = store_open_disk_fd;
355
356 // memory cache stats are collected in StoreController::getStats(), for now
357 }
358
359 void
360 Store::Disks::stat(StoreEntry & output) const
361 {
362 int i;
363
364 /* Now go through each store, calling its stat routine */
365
366 for (i = 0; i < Config.cacheSwap.n_configured; ++i) {
367 storeAppendPrintf(&output, "\n");
368 store(i)->stat(output);
369 }
370 }
371
372 void
373 Store::Disks::reference(StoreEntry &e)
374 {
375 e.disk().reference(e);
376 }
377
378 bool
379 Store::Disks::dereference(StoreEntry &e)
380 {
381 return e.disk().dereference(e);
382 }
383
384 void
385 Store::Disks::maintain()
386 {
387 int i;
388 /* walk each fs */
389
390 for (i = 0; i < Config.cacheSwap.n_configured; ++i) {
391 /* XXX FixMe: This should be done "in parallell" on the different
392 * cache_dirs, not one at a time.
393 */
394 /* call the maintain function .. */
395 store(i)->maintain();
396 }
397 }
398
399 void
400 Store::Disks::sync()
401 {
402 for (int i = 0; i < Config.cacheSwap.n_configured; ++i)
403 store(i)->sync();
404 }
405
406 void
407 Store::Disks::markForUnlink(StoreEntry &e) {
408 if (e.swap_filen >= 0)
409 store(e.swap_dirn)->markForUnlink(e);
410 }
411
412 void
413 Store::Disks::unlink(StoreEntry &e) {
414 if (e.swap_filen >= 0)
415 store(e.swap_dirn)->unlink(e);
416 }
417
418 bool
419 Store::Disks::anchorCollapsed(StoreEntry &collapsed, bool &inSync)
420 {
421 if (const int cacheDirs = Config.cacheSwap.n_configured) {
422 // ask each cache_dir until the entry is found; use static starting
423 // point to avoid asking the same subset of disks more often
424 // TODO: coordinate with put() to be able to guess the right disk often
425 static int idx = 0;
426 for (int n = 0; n < cacheDirs; ++n) {
427 idx = (idx + 1) % cacheDirs;
428 SwapDir &sd = dir(idx);
429 if (!sd.active())
430 continue;
431
432 if (sd.anchorCollapsed(collapsed, inSync)) {
433 debugs(20, 3, "cache_dir " << idx << " anchors " << collapsed);
434 return true;
435 }
436 }
437 }
438
439 debugs(20, 4, "none of " << Config.cacheSwap.n_configured <<
440 " cache_dirs have " << collapsed);
441 return false;
442 }
443
444 bool
445 Store::Disks::updateCollapsed(StoreEntry &collapsed)
446 {
447 return collapsed.swap_filen >= 0 &&
448 dir(collapsed.swap_dirn).updateCollapsed(collapsed);
449 }
450
451
452 /* Store::Disks globals that should be converted to use RegisteredRunner */
453
454 void
455 storeDirOpenSwapLogs()
456 {
457 for (int dirn = 0; dirn < Config.cacheSwap.n_configured; ++dirn)
458 INDEXSD(dirn)->openLog();
459 }
460
461 void
462 storeDirCloseSwapLogs()
463 {
464 for (int dirn = 0; dirn < Config.cacheSwap.n_configured; ++dirn)
465 INDEXSD(dirn)->closeLog();
466 }
467
468 /*
469 * storeDirWriteCleanLogs
470 *
471 * Writes a "clean" swap log file from in-memory metadata.
472 * This is a rewrite of the original function to troll each
473 * StoreDir and write the logs, and flush at the end of
474 * the run. Thanks goes to Eric Stern, since this solution
475 * came out of his COSS code.
476 */
477 int
478 storeDirWriteCleanLogs(int reopen)
479 {
480 const StoreEntry *e = NULL;
481 int n = 0;
482
483 struct timeval start;
484 double dt;
485 RefCount<SwapDir> sd;
486 int dirn;
487 int notdone = 1;
488
489 // Check for store_dirs_rebuilding because fatal() often calls us in early
490 // initialization phases, before store log is initialized and ready. Also,
491 // some stores probably do not support log cleanup during Store rebuilding.
492 if (StoreController::store_dirs_rebuilding) {
493 debugs(20, DBG_IMPORTANT, "Not currently OK to rewrite swap log.");
494 debugs(20, DBG_IMPORTANT, "storeDirWriteCleanLogs: Operation aborted.");
495 return 0;
496 }
497
498 debugs(20, DBG_IMPORTANT, "storeDirWriteCleanLogs: Starting...");
499 getCurrentTime();
500 start = current_time;
501
502 for (dirn = 0; dirn < Config.cacheSwap.n_configured; ++dirn) {
503 sd = dynamic_cast<SwapDir *>(INDEXSD(dirn));
504
505 if (sd->writeCleanStart() < 0) {
506 debugs(20, DBG_IMPORTANT, "log.clean.start() failed for dir #" << sd->index);
507 continue;
508 }
509 }
510
511 /*
512 * This may look inefficient as CPU wise it is more efficient to do this
513 * sequentially, but I/O wise the parallellism helps as it allows more
514 * hdd spindles to be active.
515 */
516 while (notdone) {
517 notdone = 0;
518
519 for (dirn = 0; dirn < Config.cacheSwap.n_configured; ++dirn) {
520 sd = dynamic_cast<SwapDir *>(INDEXSD(dirn));
521
522 if (NULL == sd->cleanLog)
523 continue;
524
525 e = sd->cleanLog->nextEntry();
526
527 if (!e)
528 continue;
529
530 notdone = 1;
531
532 if (!sd->canLog(*e))
533 continue;
534
535 sd->cleanLog->write(*e);
536
537 if ((++n & 0xFFFF) == 0) {
538 getCurrentTime();
539 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << n <<
540 " entries written so far.");
541 }
542 }
543 }
544
545 /* Flush */
546 for (dirn = 0; dirn < Config.cacheSwap.n_configured; ++dirn)
547 dynamic_cast<SwapDir *>(INDEXSD(dirn))->writeCleanDone();
548
549 if (reopen)
550 storeDirOpenSwapLogs();
551
552 getCurrentTime();
553
554 dt = tvSubDsec(start, current_time);
555
556 debugs(20, DBG_IMPORTANT, " Finished. Wrote " << n << " entries.");
557 debugs(20, DBG_IMPORTANT, " Took "<< std::setw(3)<< std::setprecision(2) << dt <<
558 " seconds ("<< std::setw(6) << ((double) n / (dt > 0.0 ? dt : 1.0)) << " entries/sec).");
559
560 return n;
561 }
562
563 /* Globals that should be converted to static Store::Disks methods */
564
565 void
566 allocate_new_swapdir(Store::DiskConfig *swap)
567 {
568 if (swap->swapDirs == NULL) {
569 swap->n_allocated = 4;
570 swap->swapDirs = static_cast<SwapDir::Pointer *>(xcalloc(swap->n_allocated, sizeof(SwapDir::Pointer)));
571 }
572
573 if (swap->n_allocated == swap->n_configured) {
574 swap->n_allocated <<= 1;
575 SwapDir::Pointer *const tmp = static_cast<SwapDir::Pointer *>(xcalloc(swap->n_allocated, sizeof(SwapDir::Pointer)));
576 memcpy(tmp, swap->swapDirs, swap->n_configured * sizeof(SwapDir *));
577 xfree(swap->swapDirs);
578 swap->swapDirs = tmp;
579 }
580 }
581
582 void
583 free_cachedir(Store::DiskConfig *swap)
584 {
585 int i;
586 /* DON'T FREE THESE FOR RECONFIGURE */
587
588 if (reconfiguring)
589 return;
590
591 for (i = 0; i < swap->n_configured; ++i) {
592 /* TODO XXX this lets the swapdir free resources asynchronously
593 * swap->swapDirs[i]->deactivate();
594 * but there may be such a means already.
595 * RBC 20041225
596 */
597 swap->swapDirs[i] = NULL;
598 }
599
600 safe_free(swap->swapDirs);
601 swap->swapDirs = NULL;
602 swap->n_allocated = 0;
603 swap->n_configured = 0;
604 }
605
606 /* Globals that should be moved to some Store::UFS-specific logging module */
607
608 /*
609 * An entry written to the swap log MUST have the following
610 * properties.
611 * 1. It MUST be a public key. It does no good to log
612 * a public ADD, change the key, then log a private
613 * DEL. So we need to log a DEL before we change a
614 * key from public to private.
615 * 2. It MUST have a valid (> -1) swap_filen.
616 */
617 void
618 storeDirSwapLog(const StoreEntry * e, int op)
619 {
620 assert (e);
621 assert(!EBIT_TEST(e->flags, KEY_PRIVATE));
622 assert(e->swap_filen >= 0);
623 /*
624 * icons and such; don't write them to the swap log
625 */
626
627 if (EBIT_TEST(e->flags, ENTRY_SPECIAL))
628 return;
629
630 assert(op > SWAP_LOG_NOP && op < SWAP_LOG_MAX);
631
632 debugs(20, 3, "storeDirSwapLog: " <<
633 swap_log_op_str[op] << " " <<
634 e->getMD5Text() << " " <<
635 e->swap_dirn << " " <<
636 std::hex << std::uppercase << std::setfill('0') << std::setw(8) << e->swap_filen);
637
638 dynamic_cast<SwapDir *>(INDEXSD(e->swap_dirn))->logEntry(*e, op);
639 }