]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/cache.c
xfs_repair: fix libxfs namespace problems
[thirdparty/xfsprogs-dev.git] / libxfs / cache.c
1 /*
2 * Copyright (c) 2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <pthread.h>
24
25 #include "libxfs_priv.h"
26 #include "xfs_fs.h"
27 #include "xfs_shared.h"
28 #include "xfs_format.h"
29 #include "xfs_trans_resv.h"
30 #include "xfs_mount.h"
31 #include "xfs_bit.h"
32
33 #define CACHE_DEBUG 1
34 #undef CACHE_DEBUG
35 #define CACHE_DEBUG 1
36 #undef CACHE_ABORT
37 /* #define CACHE_ABORT 1 */
38
39 #define CACHE_SHAKE_COUNT 64
40
41 static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
42
43 struct cache *
44 cache_init(
45 int flags,
46 unsigned int hashsize,
47 struct cache_operations *cache_operations)
48 {
49 struct cache * cache;
50 unsigned int i, maxcount;
51
52 maxcount = hashsize * HASH_CACHE_RATIO;
53
54 if (!(cache = malloc(sizeof(struct cache))))
55 return NULL;
56 if (!(cache->c_hash = calloc(hashsize, sizeof(struct cache_hash)))) {
57 free(cache);
58 return NULL;
59 }
60
61 cache->c_flags = flags;
62 cache->c_count = 0;
63 cache->c_max = 0;
64 cache->c_hits = 0;
65 cache->c_misses = 0;
66 cache->c_maxcount = maxcount;
67 cache->c_hashsize = hashsize;
68 cache->c_hashshift = libxfs_highbit32(hashsize);
69 cache->hash = cache_operations->hash;
70 cache->alloc = cache_operations->alloc;
71 cache->flush = cache_operations->flush;
72 cache->relse = cache_operations->relse;
73 cache->compare = cache_operations->compare;
74 cache->bulkrelse = cache_operations->bulkrelse ?
75 cache_operations->bulkrelse : cache_generic_bulkrelse;
76 pthread_mutex_init(&cache->c_mutex, NULL);
77
78 for (i = 0; i < hashsize; i++) {
79 list_head_init(&cache->c_hash[i].ch_list);
80 cache->c_hash[i].ch_count = 0;
81 pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL);
82 }
83
84 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
85 list_head_init(&cache->c_mrus[i].cm_list);
86 cache->c_mrus[i].cm_count = 0;
87 pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL);
88 }
89 return cache;
90 }
91
92 void
93 cache_expand(
94 struct cache * cache)
95 {
96 pthread_mutex_lock(&cache->c_mutex);
97 #ifdef CACHE_DEBUG
98 fprintf(stderr, "doubling cache size to %d\n", 2 * cache->c_maxcount);
99 #endif
100 cache->c_maxcount *= 2;
101 pthread_mutex_unlock(&cache->c_mutex);
102 }
103
104 void
105 cache_walk(
106 struct cache * cache,
107 cache_walk_t visit)
108 {
109 struct cache_hash * hash;
110 struct list_head * head;
111 struct list_head * pos;
112 unsigned int i;
113
114 for (i = 0; i < cache->c_hashsize; i++) {
115 hash = &cache->c_hash[i];
116 head = &hash->ch_list;
117 pthread_mutex_lock(&hash->ch_mutex);
118 for (pos = head->next; pos != head; pos = pos->next)
119 visit((struct cache_node *)pos);
120 pthread_mutex_unlock(&hash->ch_mutex);
121 }
122 }
123
124 #ifdef CACHE_ABORT
125 #define cache_abort() abort()
126 #else
127 #define cache_abort() do { } while (0)
128 #endif
129
130 #ifdef CACHE_DEBUG
131 static void
132 cache_zero_check(
133 struct cache_node * node)
134 {
135 if (node->cn_count > 0) {
136 fprintf(stderr, "%s: refcount is %u, not zero (node=%p)\n",
137 __FUNCTION__, node->cn_count, node);
138 cache_abort();
139 }
140 }
141 #define cache_destroy_check(c) cache_walk((c), cache_zero_check)
142 #else
143 #define cache_destroy_check(c) do { } while (0)
144 #endif
145
146 void
147 cache_destroy(
148 struct cache * cache)
149 {
150 unsigned int i;
151
152 cache_destroy_check(cache);
153 for (i = 0; i < cache->c_hashsize; i++) {
154 list_head_destroy(&cache->c_hash[i].ch_list);
155 pthread_mutex_destroy(&cache->c_hash[i].ch_mutex);
156 }
157 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
158 list_head_destroy(&cache->c_mrus[i].cm_list);
159 pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex);
160 }
161 pthread_mutex_destroy(&cache->c_mutex);
162 free(cache->c_hash);
163 free(cache);
164 }
165
166 static unsigned int
167 cache_generic_bulkrelse(
168 struct cache * cache,
169 struct list_head * list)
170 {
171 struct cache_node * node;
172 unsigned int count = 0;
173
174 while (!list_empty(list)) {
175 node = list_entry(list->next, struct cache_node, cn_mru);
176 pthread_mutex_destroy(&node->cn_mutex);
177 list_del_init(&node->cn_mru);
178 cache->relse(node);
179 count++;
180 }
181
182 return count;
183 }
184
185 /*
186 * Park unflushable nodes on their own special MRU so that cache_shake() doesn't
187 * end up repeatedly scanning them in the futile attempt to clean them before
188 * reclaim.
189 */
190 static void
191 cache_add_to_dirty_mru(
192 struct cache *cache,
193 struct cache_node *node)
194 {
195 struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY];
196
197 pthread_mutex_lock(&mru->cm_mutex);
198 node->cn_old_priority = node->cn_priority;
199 node->cn_priority = CACHE_DIRTY_PRIORITY;
200 list_add(&node->cn_mru, &mru->cm_list);
201 mru->cm_count++;
202 pthread_mutex_unlock(&mru->cm_mutex);
203 }
204
205 /*
206 * We've hit the limit on cache size, so we need to start reclaiming nodes we've
207 * used. The MRU specified by the priority is shaken. Returns new priority at
208 * end of the call (in case we call again). We are not allowed to reclaim dirty
209 * objects, so we have to flush them first. If flushing fails, we move them to
210 * the "dirty, unreclaimable" list.
211 *
212 * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we
213 * park unflushable (and hence unreclaimable) buffers at these priorities.
214 * Trying to shake unreclaimable buffer lists when there is memory pressure is a
215 * waste of time and CPU and greatly slows down cache node recycling operations.
216 * Hence we only try to free them if we are being asked to purge the cache of
217 * all entries.
218 */
219 static unsigned int
220 cache_shake(
221 struct cache * cache,
222 unsigned int priority,
223 bool purge)
224 {
225 struct cache_mru *mru;
226 struct cache_hash * hash;
227 struct list_head temp;
228 struct list_head * head;
229 struct list_head * pos;
230 struct list_head * n;
231 struct cache_node * node;
232 unsigned int count;
233
234 ASSERT(priority <= CACHE_DIRTY_PRIORITY);
235 if (priority > CACHE_MAX_PRIORITY && !purge)
236 priority = 0;
237
238 mru = &cache->c_mrus[priority];
239 count = 0;
240 list_head_init(&temp);
241 head = &mru->cm_list;
242
243 pthread_mutex_lock(&mru->cm_mutex);
244 for (pos = head->prev, n = pos->prev; pos != head;
245 pos = n, n = pos->prev) {
246 node = list_entry(pos, struct cache_node, cn_mru);
247
248 if (pthread_mutex_trylock(&node->cn_mutex) != 0)
249 continue;
250
251 /* memory pressure is not allowed to release dirty objects */
252 if (cache->flush(node) && !purge) {
253 list_del(&node->cn_mru);
254 mru->cm_count--;
255 node->cn_priority = -1;
256 pthread_mutex_unlock(&node->cn_mutex);
257 cache_add_to_dirty_mru(cache, node);
258 continue;
259 }
260
261 hash = cache->c_hash + node->cn_hashidx;
262 if (pthread_mutex_trylock(&hash->ch_mutex) != 0) {
263 pthread_mutex_unlock(&node->cn_mutex);
264 continue;
265 }
266 ASSERT(node->cn_count == 0);
267 ASSERT(node->cn_priority == priority);
268 node->cn_priority = -1;
269
270 list_move(&node->cn_mru, &temp);
271 list_del_init(&node->cn_hash);
272 hash->ch_count--;
273 mru->cm_count--;
274 pthread_mutex_unlock(&hash->ch_mutex);
275 pthread_mutex_unlock(&node->cn_mutex);
276
277 count++;
278 if (!purge && count == CACHE_SHAKE_COUNT)
279 break;
280 }
281 pthread_mutex_unlock(&mru->cm_mutex);
282
283 if (count > 0) {
284 cache->bulkrelse(cache, &temp);
285
286 pthread_mutex_lock(&cache->c_mutex);
287 cache->c_count -= count;
288 pthread_mutex_unlock(&cache->c_mutex);
289 }
290
291 return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
292 }
293
294 /*
295 * Allocate a new hash node (updating atomic counter in the process),
296 * unless doing so will push us over the maximum cache size.
297 */
298 static struct cache_node *
299 cache_node_allocate(
300 struct cache * cache,
301 cache_key_t key)
302 {
303 unsigned int nodesfree;
304 struct cache_node * node;
305
306 pthread_mutex_lock(&cache->c_mutex);
307 nodesfree = (cache->c_count < cache->c_maxcount);
308 if (nodesfree) {
309 cache->c_count++;
310 if (cache->c_count > cache->c_max)
311 cache->c_max = cache->c_count;
312 }
313 cache->c_misses++;
314 pthread_mutex_unlock(&cache->c_mutex);
315 if (!nodesfree)
316 return NULL;
317 node = cache->alloc(key);
318 if (node == NULL) { /* uh-oh */
319 pthread_mutex_lock(&cache->c_mutex);
320 cache->c_count--;
321 pthread_mutex_unlock(&cache->c_mutex);
322 return NULL;
323 }
324 pthread_mutex_init(&node->cn_mutex, NULL);
325 list_head_init(&node->cn_mru);
326 node->cn_count = 1;
327 node->cn_priority = 0;
328 node->cn_old_priority = -1;
329 return node;
330 }
331
332 int
333 cache_overflowed(
334 struct cache * cache)
335 {
336 return cache->c_maxcount == cache->c_max;
337 }
338
339
340 static int
341 __cache_node_purge(
342 struct cache * cache,
343 struct cache_node * node)
344 {
345 int count;
346 struct cache_mru * mru;
347
348 pthread_mutex_lock(&node->cn_mutex);
349 count = node->cn_count;
350 if (count != 0) {
351 pthread_mutex_unlock(&node->cn_mutex);
352 return count;
353 }
354
355 /* can't purge dirty objects */
356 if (cache->flush(node)) {
357 pthread_mutex_unlock(&node->cn_mutex);
358 return 1;
359 }
360
361 mru = &cache->c_mrus[node->cn_priority];
362 pthread_mutex_lock(&mru->cm_mutex);
363 list_del_init(&node->cn_mru);
364 mru->cm_count--;
365 pthread_mutex_unlock(&mru->cm_mutex);
366
367 pthread_mutex_unlock(&node->cn_mutex);
368 pthread_mutex_destroy(&node->cn_mutex);
369 list_del_init(&node->cn_hash);
370 cache->relse(node);
371 return 0;
372 }
373
374 /*
375 * Lookup in the cache hash table. With any luck we'll get a cache
376 * hit, in which case this will all be over quickly and painlessly.
377 * Otherwise, we allocate a new node, taking care not to expand the
378 * cache beyond the requested maximum size (shrink it if it would).
379 * Returns one if hit in cache, otherwise zero. A node is _always_
380 * returned, however.
381 */
382 int
383 cache_node_get(
384 struct cache * cache,
385 cache_key_t key,
386 struct cache_node ** nodep)
387 {
388 struct cache_node * node = NULL;
389 struct cache_hash * hash;
390 struct cache_mru * mru;
391 struct list_head * head;
392 struct list_head * pos;
393 struct list_head * n;
394 unsigned int hashidx;
395 int priority = 0;
396 int purged = 0;
397
398 hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift);
399 hash = cache->c_hash + hashidx;
400 head = &hash->ch_list;
401
402 for (;;) {
403 pthread_mutex_lock(&hash->ch_mutex);
404 for (pos = head->next, n = pos->next; pos != head;
405 pos = n, n = pos->next) {
406 int result;
407
408 node = list_entry(pos, struct cache_node, cn_hash);
409 result = cache->compare(node, key);
410 switch (result) {
411 case CACHE_HIT:
412 break;
413 case CACHE_PURGE:
414 if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) &&
415 !__cache_node_purge(cache, node)) {
416 purged++;
417 hash->ch_count--;
418 }
419 /* FALL THROUGH */
420 case CACHE_MISS:
421 goto next_object;
422 }
423
424 /*
425 * node found, bump node's reference count, remove it
426 * from its MRU list, and update stats.
427 */
428 pthread_mutex_lock(&node->cn_mutex);
429
430 if (node->cn_count == 0) {
431 ASSERT(node->cn_priority >= 0);
432 ASSERT(!list_empty(&node->cn_mru));
433 mru = &cache->c_mrus[node->cn_priority];
434 pthread_mutex_lock(&mru->cm_mutex);
435 mru->cm_count--;
436 list_del_init(&node->cn_mru);
437 pthread_mutex_unlock(&mru->cm_mutex);
438 if (node->cn_old_priority != -1) {
439 ASSERT(node->cn_priority ==
440 CACHE_DIRTY_PRIORITY);
441 node->cn_priority = node->cn_old_priority;
442 node->cn_old_priority = -1;
443 }
444 }
445 node->cn_count++;
446
447 pthread_mutex_unlock(&node->cn_mutex);
448 pthread_mutex_unlock(&hash->ch_mutex);
449
450 pthread_mutex_lock(&cache->c_mutex);
451 cache->c_hits++;
452 pthread_mutex_unlock(&cache->c_mutex);
453
454 *nodep = node;
455 return 0;
456 next_object:
457 continue; /* what the hell, gcc? */
458 }
459 pthread_mutex_unlock(&hash->ch_mutex);
460 /*
461 * not found, allocate a new entry
462 */
463 node = cache_node_allocate(cache, key);
464 if (node)
465 break;
466 priority = cache_shake(cache, priority, false);
467 /*
468 * We start at 0; if we free CACHE_SHAKE_COUNT we get
469 * back the same priority, if not we get back priority+1.
470 * If we exceed CACHE_MAX_PRIORITY all slots are full; grow it.
471 */
472 if (priority > CACHE_MAX_PRIORITY) {
473 priority = 0;
474 cache_expand(cache);
475 }
476 }
477
478 node->cn_hashidx = hashidx;
479
480 /* add new node to appropriate hash */
481 pthread_mutex_lock(&hash->ch_mutex);
482 hash->ch_count++;
483 list_add(&node->cn_hash, &hash->ch_list);
484 pthread_mutex_unlock(&hash->ch_mutex);
485
486 if (purged) {
487 pthread_mutex_lock(&cache->c_mutex);
488 cache->c_count -= purged;
489 pthread_mutex_unlock(&cache->c_mutex);
490 }
491
492 *nodep = node;
493 return 1;
494 }
495
496 void
497 cache_node_put(
498 struct cache * cache,
499 struct cache_node * node)
500 {
501 struct cache_mru * mru;
502
503 pthread_mutex_lock(&node->cn_mutex);
504 #ifdef CACHE_DEBUG
505 if (node->cn_count < 1) {
506 fprintf(stderr, "%s: node put on refcount %u (node=%p)\n",
507 __FUNCTION__, node->cn_count, node);
508 cache_abort();
509 }
510 if (!list_empty(&node->cn_mru)) {
511 fprintf(stderr, "%s: node put on node (%p) in MRU list\n",
512 __FUNCTION__, node);
513 cache_abort();
514 }
515 #endif
516 node->cn_count--;
517
518 if (node->cn_count == 0) {
519 /* add unreferenced node to appropriate MRU for shaker */
520 mru = &cache->c_mrus[node->cn_priority];
521 pthread_mutex_lock(&mru->cm_mutex);
522 mru->cm_count++;
523 list_add(&node->cn_mru, &mru->cm_list);
524 pthread_mutex_unlock(&mru->cm_mutex);
525 }
526
527 pthread_mutex_unlock(&node->cn_mutex);
528 }
529
530 void
531 cache_node_set_priority(
532 struct cache * cache,
533 struct cache_node * node,
534 int priority)
535 {
536 if (priority < 0)
537 priority = 0;
538 else if (priority > CACHE_MAX_PRIORITY)
539 priority = CACHE_MAX_PRIORITY;
540
541 pthread_mutex_lock(&node->cn_mutex);
542 ASSERT(node->cn_count > 0);
543 node->cn_priority = priority;
544 node->cn_old_priority = -1;
545 pthread_mutex_unlock(&node->cn_mutex);
546 }
547
548 int
549 cache_node_get_priority(
550 struct cache_node * node)
551 {
552 int priority;
553
554 pthread_mutex_lock(&node->cn_mutex);
555 priority = node->cn_priority;
556 pthread_mutex_unlock(&node->cn_mutex);
557
558 return priority;
559 }
560
561
562 /*
563 * Purge a specific node from the cache. Reference count must be zero.
564 */
565 int
566 cache_node_purge(
567 struct cache * cache,
568 cache_key_t key,
569 struct cache_node * node)
570 {
571 struct list_head * head;
572 struct list_head * pos;
573 struct list_head * n;
574 struct cache_hash * hash;
575 int count = -1;
576
577 hash = cache->c_hash + cache->hash(key, cache->c_hashsize,
578 cache->c_hashshift);
579 head = &hash->ch_list;
580 pthread_mutex_lock(&hash->ch_mutex);
581 for (pos = head->next, n = pos->next; pos != head;
582 pos = n, n = pos->next) {
583 if ((struct cache_node *)pos != node)
584 continue;
585
586 count = __cache_node_purge(cache, node);
587 if (!count)
588 hash->ch_count--;
589 break;
590 }
591 pthread_mutex_unlock(&hash->ch_mutex);
592
593 if (count == 0) {
594 pthread_mutex_lock(&cache->c_mutex);
595 cache->c_count--;
596 pthread_mutex_unlock(&cache->c_mutex);
597 }
598 #ifdef CACHE_DEBUG
599 if (count >= 1) {
600 fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n",
601 __FUNCTION__, count, node);
602 cache_abort();
603 }
604 if (count == -1) {
605 fprintf(stderr, "%s: purge node not found! (node=%p)\n",
606 __FUNCTION__, node);
607 cache_abort();
608 }
609 #endif
610 return count == 0;
611 }
612
613 /*
614 * Purge all nodes from the cache. All reference counts must be zero.
615 */
616 void
617 cache_purge(
618 struct cache * cache)
619 {
620 int i;
621
622 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
623 cache_shake(cache, i, true);
624
625 #ifdef CACHE_DEBUG
626 if (cache->c_count != 0) {
627 /* flush referenced nodes to disk */
628 cache_flush(cache);
629 fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n",
630 __FUNCTION__, cache, cache->c_count);
631 cache_abort();
632 }
633 #endif
634 }
635
636 /*
637 * Flush all nodes in the cache to disk.
638 */
639 void
640 cache_flush(
641 struct cache * cache)
642 {
643 struct cache_hash * hash;
644 struct list_head * head;
645 struct list_head * pos;
646 struct cache_node * node;
647 int i;
648
649 if (!cache->flush)
650 return;
651
652 for (i = 0; i < cache->c_hashsize; i++) {
653 hash = &cache->c_hash[i];
654
655 pthread_mutex_lock(&hash->ch_mutex);
656 head = &hash->ch_list;
657 for (pos = head->next; pos != head; pos = pos->next) {
658 node = (struct cache_node *)pos;
659 pthread_mutex_lock(&node->cn_mutex);
660 cache->flush(node);
661 pthread_mutex_unlock(&node->cn_mutex);
662 }
663 pthread_mutex_unlock(&hash->ch_mutex);
664 }
665 }
666
667 #define HASH_REPORT (3 * HASH_CACHE_RATIO)
668 void
669 cache_report(
670 FILE *fp,
671 const char *name,
672 struct cache *cache)
673 {
674 int i;
675 unsigned long count, index, total;
676 unsigned long hash_bucket_lengths[HASH_REPORT + 2];
677
678 if ((cache->c_hits + cache->c_misses) == 0)
679 return;
680
681 /* report cache summary */
682 fprintf(fp, "%s: %p\n"
683 "Max supported entries = %u\n"
684 "Max utilized entries = %u\n"
685 "Active entries = %u\n"
686 "Hash table size = %u\n"
687 "Hits = %llu\n"
688 "Misses = %llu\n"
689 "Hit ratio = %5.2f\n",
690 name, cache,
691 cache->c_maxcount,
692 cache->c_max,
693 cache->c_count,
694 cache->c_hashsize,
695 cache->c_hits,
696 cache->c_misses,
697 (double)cache->c_hits * 100 /
698 (cache->c_hits + cache->c_misses)
699 );
700
701 for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
702 fprintf(fp, "MRU %d entries = %6u (%3u%%)\n",
703 i, cache->c_mrus[i].cm_count,
704 cache->c_mrus[i].cm_count * 100 / cache->c_count);
705
706 i = CACHE_DIRTY_PRIORITY;
707 fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n",
708 i, cache->c_mrus[i].cm_count,
709 cache->c_mrus[i].cm_count * 100 / cache->c_count);
710
711 /* report hash bucket lengths */
712 bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));
713
714 for (i = 0; i < cache->c_hashsize; i++) {
715 count = cache->c_hash[i].ch_count;
716 if (count > HASH_REPORT)
717 index = HASH_REPORT + 1;
718 else
719 index = count;
720 hash_bucket_lengths[index]++;
721 }
722
723 total = 0;
724 for (i = 0; i < HASH_REPORT + 1; i++) {
725 total += i * hash_bucket_lengths[i];
726 if (hash_bucket_lengths[i] == 0)
727 continue;
728 fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n",
729 i, hash_bucket_lengths[i],
730 (i * hash_bucket_lengths[i] * 100) / cache->c_count);
731 }
732 if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */
733 fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n",
734 i - 1, hash_bucket_lengths[i],
735 ((cache->c_count - total) * 100) / cache->c_count);
736 }