]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/cache.c
xfs: fix transaction leak on remote attr set/remove failure
[thirdparty/xfsprogs-dev.git] / libxfs / cache.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <pthread.h>
12
13 #include "libxfs_priv.h"
14 #include "xfs_fs.h"
15 #include "xfs_shared.h"
16 #include "xfs_format.h"
17 #include "xfs_trans_resv.h"
18 #include "xfs_mount.h"
19 #include "xfs_bit.h"
20
21 #define CACHE_DEBUG 1
22 #undef CACHE_DEBUG
23 #define CACHE_DEBUG 1
24 #undef CACHE_ABORT
25 /* #define CACHE_ABORT 1 */
26
27 #define CACHE_SHAKE_COUNT 64
28
29 static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
30
31 struct cache *
32 cache_init(
33 int flags,
34 unsigned int hashsize,
35 struct cache_operations *cache_operations)
36 {
37 struct cache * cache;
38 unsigned int i, maxcount;
39
40 maxcount = hashsize * HASH_CACHE_RATIO;
41
42 if (!(cache = malloc(sizeof(struct cache))))
43 return NULL;
44 if (!(cache->c_hash = calloc(hashsize, sizeof(struct cache_hash)))) {
45 free(cache);
46 return NULL;
47 }
48
49 cache->c_flags = flags;
50 cache->c_count = 0;
51 cache->c_max = 0;
52 cache->c_hits = 0;
53 cache->c_misses = 0;
54 cache->c_maxcount = maxcount;
55 cache->c_hashsize = hashsize;
56 cache->c_hashshift = libxfs_highbit32(hashsize);
57 cache->hash = cache_operations->hash;
58 cache->alloc = cache_operations->alloc;
59 cache->flush = cache_operations->flush;
60 cache->relse = cache_operations->relse;
61 cache->compare = cache_operations->compare;
62 cache->bulkrelse = cache_operations->bulkrelse ?
63 cache_operations->bulkrelse : cache_generic_bulkrelse;
64 pthread_mutex_init(&cache->c_mutex, NULL);
65
66 for (i = 0; i < hashsize; i++) {
67 list_head_init(&cache->c_hash[i].ch_list);
68 cache->c_hash[i].ch_count = 0;
69 pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL);
70 }
71
72 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
73 list_head_init(&cache->c_mrus[i].cm_list);
74 cache->c_mrus[i].cm_count = 0;
75 pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL);
76 }
77 return cache;
78 }
79
80 void
81 cache_expand(
82 struct cache * cache)
83 {
84 pthread_mutex_lock(&cache->c_mutex);
85 #ifdef CACHE_DEBUG
86 fprintf(stderr, "doubling cache size to %d\n", 2 * cache->c_maxcount);
87 #endif
88 cache->c_maxcount *= 2;
89 pthread_mutex_unlock(&cache->c_mutex);
90 }
91
92 void
93 cache_walk(
94 struct cache * cache,
95 cache_walk_t visit)
96 {
97 struct cache_hash * hash;
98 struct list_head * head;
99 struct list_head * pos;
100 unsigned int i;
101
102 for (i = 0; i < cache->c_hashsize; i++) {
103 hash = &cache->c_hash[i];
104 head = &hash->ch_list;
105 pthread_mutex_lock(&hash->ch_mutex);
106 for (pos = head->next; pos != head; pos = pos->next)
107 visit((struct cache_node *)pos);
108 pthread_mutex_unlock(&hash->ch_mutex);
109 }
110 }
111
112 #ifdef CACHE_ABORT
113 #define cache_abort() abort()
114 #else
115 #define cache_abort() do { } while (0)
116 #endif
117
118 #ifdef CACHE_DEBUG
119 static void
120 cache_zero_check(
121 struct cache_node * node)
122 {
123 if (node->cn_count > 0) {
124 fprintf(stderr, "%s: refcount is %u, not zero (node=%p)\n",
125 __FUNCTION__, node->cn_count, node);
126 cache_abort();
127 }
128 }
129 #define cache_destroy_check(c) cache_walk((c), cache_zero_check)
130 #else
131 #define cache_destroy_check(c) do { } while (0)
132 #endif
133
134 void
135 cache_destroy(
136 struct cache * cache)
137 {
138 unsigned int i;
139
140 cache_destroy_check(cache);
141 for (i = 0; i < cache->c_hashsize; i++) {
142 list_head_destroy(&cache->c_hash[i].ch_list);
143 pthread_mutex_destroy(&cache->c_hash[i].ch_mutex);
144 }
145 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
146 list_head_destroy(&cache->c_mrus[i].cm_list);
147 pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex);
148 }
149 pthread_mutex_destroy(&cache->c_mutex);
150 free(cache->c_hash);
151 free(cache);
152 }
153
154 static unsigned int
155 cache_generic_bulkrelse(
156 struct cache * cache,
157 struct list_head * list)
158 {
159 struct cache_node * node;
160 unsigned int count = 0;
161
162 while (!list_empty(list)) {
163 node = list_entry(list->next, struct cache_node, cn_mru);
164 pthread_mutex_destroy(&node->cn_mutex);
165 list_del_init(&node->cn_mru);
166 cache->relse(node);
167 count++;
168 }
169
170 return count;
171 }
172
173 /*
174 * Park unflushable nodes on their own special MRU so that cache_shake() doesn't
175 * end up repeatedly scanning them in the futile attempt to clean them before
176 * reclaim.
177 */
178 static void
179 cache_add_to_dirty_mru(
180 struct cache *cache,
181 struct cache_node *node)
182 {
183 struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY];
184
185 pthread_mutex_lock(&mru->cm_mutex);
186 node->cn_old_priority = node->cn_priority;
187 node->cn_priority = CACHE_DIRTY_PRIORITY;
188 list_add(&node->cn_mru, &mru->cm_list);
189 mru->cm_count++;
190 pthread_mutex_unlock(&mru->cm_mutex);
191 }
192
193 /*
194 * We've hit the limit on cache size, so we need to start reclaiming nodes we've
195 * used. The MRU specified by the priority is shaken. Returns new priority at
196 * end of the call (in case we call again). We are not allowed to reclaim dirty
197 * objects, so we have to flush them first. If flushing fails, we move them to
198 * the "dirty, unreclaimable" list.
199 *
200 * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we
201 * park unflushable (and hence unreclaimable) buffers at these priorities.
202 * Trying to shake unreclaimable buffer lists when there is memory pressure is a
203 * waste of time and CPU and greatly slows down cache node recycling operations.
204 * Hence we only try to free them if we are being asked to purge the cache of
205 * all entries.
206 */
207 static unsigned int
208 cache_shake(
209 struct cache * cache,
210 unsigned int priority,
211 bool purge)
212 {
213 struct cache_mru *mru;
214 struct cache_hash * hash;
215 struct list_head temp;
216 struct list_head * head;
217 struct list_head * pos;
218 struct list_head * n;
219 struct cache_node * node;
220 unsigned int count;
221
222 ASSERT(priority <= CACHE_DIRTY_PRIORITY);
223 if (priority > CACHE_MAX_PRIORITY && !purge)
224 priority = 0;
225
226 mru = &cache->c_mrus[priority];
227 count = 0;
228 list_head_init(&temp);
229 head = &mru->cm_list;
230
231 pthread_mutex_lock(&mru->cm_mutex);
232 for (pos = head->prev, n = pos->prev; pos != head;
233 pos = n, n = pos->prev) {
234 node = list_entry(pos, struct cache_node, cn_mru);
235
236 if (pthread_mutex_trylock(&node->cn_mutex) != 0)
237 continue;
238
239 /* memory pressure is not allowed to release dirty objects */
240 if (cache->flush(node) && !purge) {
241 list_del(&node->cn_mru);
242 mru->cm_count--;
243 node->cn_priority = -1;
244 pthread_mutex_unlock(&node->cn_mutex);
245 cache_add_to_dirty_mru(cache, node);
246 continue;
247 }
248
249 hash = cache->c_hash + node->cn_hashidx;
250 if (pthread_mutex_trylock(&hash->ch_mutex) != 0) {
251 pthread_mutex_unlock(&node->cn_mutex);
252 continue;
253 }
254 ASSERT(node->cn_count == 0);
255 ASSERT(node->cn_priority == priority);
256 node->cn_priority = -1;
257
258 list_move(&node->cn_mru, &temp);
259 list_del_init(&node->cn_hash);
260 hash->ch_count--;
261 mru->cm_count--;
262 pthread_mutex_unlock(&hash->ch_mutex);
263 pthread_mutex_unlock(&node->cn_mutex);
264
265 count++;
266 if (!purge && count == CACHE_SHAKE_COUNT)
267 break;
268 }
269 pthread_mutex_unlock(&mru->cm_mutex);
270
271 if (count > 0) {
272 cache->bulkrelse(cache, &temp);
273
274 pthread_mutex_lock(&cache->c_mutex);
275 cache->c_count -= count;
276 pthread_mutex_unlock(&cache->c_mutex);
277 }
278
279 return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
280 }
281
282 /*
283 * Allocate a new hash node (updating atomic counter in the process),
284 * unless doing so will push us over the maximum cache size.
285 */
286 static struct cache_node *
287 cache_node_allocate(
288 struct cache * cache,
289 cache_key_t key)
290 {
291 unsigned int nodesfree;
292 struct cache_node * node;
293
294 pthread_mutex_lock(&cache->c_mutex);
295 nodesfree = (cache->c_count < cache->c_maxcount);
296 if (nodesfree) {
297 cache->c_count++;
298 if (cache->c_count > cache->c_max)
299 cache->c_max = cache->c_count;
300 }
301 cache->c_misses++;
302 pthread_mutex_unlock(&cache->c_mutex);
303 if (!nodesfree)
304 return NULL;
305 node = cache->alloc(key);
306 if (node == NULL) { /* uh-oh */
307 pthread_mutex_lock(&cache->c_mutex);
308 cache->c_count--;
309 pthread_mutex_unlock(&cache->c_mutex);
310 return NULL;
311 }
312 pthread_mutex_init(&node->cn_mutex, NULL);
313 list_head_init(&node->cn_mru);
314 node->cn_count = 1;
315 node->cn_priority = 0;
316 node->cn_old_priority = -1;
317 return node;
318 }
319
320 int
321 cache_overflowed(
322 struct cache * cache)
323 {
324 return cache->c_maxcount == cache->c_max;
325 }
326
327
328 static int
329 __cache_node_purge(
330 struct cache * cache,
331 struct cache_node * node)
332 {
333 int count;
334 struct cache_mru * mru;
335
336 pthread_mutex_lock(&node->cn_mutex);
337 count = node->cn_count;
338 if (count != 0) {
339 pthread_mutex_unlock(&node->cn_mutex);
340 return count;
341 }
342
343 /* can't purge dirty objects */
344 if (cache->flush(node)) {
345 pthread_mutex_unlock(&node->cn_mutex);
346 return 1;
347 }
348
349 mru = &cache->c_mrus[node->cn_priority];
350 pthread_mutex_lock(&mru->cm_mutex);
351 list_del_init(&node->cn_mru);
352 mru->cm_count--;
353 pthread_mutex_unlock(&mru->cm_mutex);
354
355 pthread_mutex_unlock(&node->cn_mutex);
356 pthread_mutex_destroy(&node->cn_mutex);
357 list_del_init(&node->cn_hash);
358 cache->relse(node);
359 return 0;
360 }
361
362 /*
363 * Lookup in the cache hash table. With any luck we'll get a cache
364 * hit, in which case this will all be over quickly and painlessly.
365 * Otherwise, we allocate a new node, taking care not to expand the
366 * cache beyond the requested maximum size (shrink it if it would).
367 * Returns one if hit in cache, otherwise zero. A node is _always_
368 * returned, however.
369 */
370 int
371 cache_node_get(
372 struct cache * cache,
373 cache_key_t key,
374 struct cache_node ** nodep)
375 {
376 struct cache_node * node = NULL;
377 struct cache_hash * hash;
378 struct cache_mru * mru;
379 struct list_head * head;
380 struct list_head * pos;
381 struct list_head * n;
382 unsigned int hashidx;
383 int priority = 0;
384 int purged = 0;
385
386 hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift);
387 hash = cache->c_hash + hashidx;
388 head = &hash->ch_list;
389
390 for (;;) {
391 pthread_mutex_lock(&hash->ch_mutex);
392 for (pos = head->next, n = pos->next; pos != head;
393 pos = n, n = pos->next) {
394 int result;
395
396 node = list_entry(pos, struct cache_node, cn_hash);
397 result = cache->compare(node, key);
398 switch (result) {
399 case CACHE_HIT:
400 break;
401 case CACHE_PURGE:
402 if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) &&
403 !__cache_node_purge(cache, node)) {
404 purged++;
405 hash->ch_count--;
406 }
407 /* FALL THROUGH */
408 case CACHE_MISS:
409 goto next_object;
410 }
411
412 /*
413 * node found, bump node's reference count, remove it
414 * from its MRU list, and update stats.
415 */
416 pthread_mutex_lock(&node->cn_mutex);
417
418 if (node->cn_count == 0) {
419 ASSERT(node->cn_priority >= 0);
420 ASSERT(!list_empty(&node->cn_mru));
421 mru = &cache->c_mrus[node->cn_priority];
422 pthread_mutex_lock(&mru->cm_mutex);
423 mru->cm_count--;
424 list_del_init(&node->cn_mru);
425 pthread_mutex_unlock(&mru->cm_mutex);
426 if (node->cn_old_priority != -1) {
427 ASSERT(node->cn_priority ==
428 CACHE_DIRTY_PRIORITY);
429 node->cn_priority = node->cn_old_priority;
430 node->cn_old_priority = -1;
431 }
432 }
433 node->cn_count++;
434
435 pthread_mutex_unlock(&node->cn_mutex);
436 pthread_mutex_unlock(&hash->ch_mutex);
437
438 pthread_mutex_lock(&cache->c_mutex);
439 cache->c_hits++;
440 pthread_mutex_unlock(&cache->c_mutex);
441
442 *nodep = node;
443 return 0;
444 next_object:
445 continue; /* what the hell, gcc? */
446 }
447 pthread_mutex_unlock(&hash->ch_mutex);
448 /*
449 * not found, allocate a new entry
450 */
451 node = cache_node_allocate(cache, key);
452 if (node)
453 break;
454 priority = cache_shake(cache, priority, false);
455 /*
456 * We start at 0; if we free CACHE_SHAKE_COUNT we get
457 * back the same priority, if not we get back priority+1.
458 * If we exceed CACHE_MAX_PRIORITY all slots are full; grow it.
459 */
460 if (priority > CACHE_MAX_PRIORITY) {
461 priority = 0;
462 cache_expand(cache);
463 }
464 }
465
466 node->cn_hashidx = hashidx;
467
468 /* add new node to appropriate hash */
469 pthread_mutex_lock(&hash->ch_mutex);
470 hash->ch_count++;
471 list_add(&node->cn_hash, &hash->ch_list);
472 pthread_mutex_unlock(&hash->ch_mutex);
473
474 if (purged) {
475 pthread_mutex_lock(&cache->c_mutex);
476 cache->c_count -= purged;
477 pthread_mutex_unlock(&cache->c_mutex);
478 }
479
480 *nodep = node;
481 return 1;
482 }
483
484 void
485 cache_node_put(
486 struct cache * cache,
487 struct cache_node * node)
488 {
489 struct cache_mru * mru;
490
491 pthread_mutex_lock(&node->cn_mutex);
492 #ifdef CACHE_DEBUG
493 if (node->cn_count < 1) {
494 fprintf(stderr, "%s: node put on refcount %u (node=%p)\n",
495 __FUNCTION__, node->cn_count, node);
496 cache_abort();
497 }
498 if (!list_empty(&node->cn_mru)) {
499 fprintf(stderr, "%s: node put on node (%p) in MRU list\n",
500 __FUNCTION__, node);
501 cache_abort();
502 }
503 #endif
504 node->cn_count--;
505
506 if (node->cn_count == 0) {
507 /* add unreferenced node to appropriate MRU for shaker */
508 mru = &cache->c_mrus[node->cn_priority];
509 pthread_mutex_lock(&mru->cm_mutex);
510 mru->cm_count++;
511 list_add(&node->cn_mru, &mru->cm_list);
512 pthread_mutex_unlock(&mru->cm_mutex);
513 }
514
515 pthread_mutex_unlock(&node->cn_mutex);
516 }
517
518 void
519 cache_node_set_priority(
520 struct cache * cache,
521 struct cache_node * node,
522 int priority)
523 {
524 if (priority < 0)
525 priority = 0;
526 else if (priority > CACHE_MAX_PRIORITY)
527 priority = CACHE_MAX_PRIORITY;
528
529 pthread_mutex_lock(&node->cn_mutex);
530 ASSERT(node->cn_count > 0);
531 node->cn_priority = priority;
532 node->cn_old_priority = -1;
533 pthread_mutex_unlock(&node->cn_mutex);
534 }
535
536 int
537 cache_node_get_priority(
538 struct cache_node * node)
539 {
540 int priority;
541
542 pthread_mutex_lock(&node->cn_mutex);
543 priority = node->cn_priority;
544 pthread_mutex_unlock(&node->cn_mutex);
545
546 return priority;
547 }
548
549
550 /*
551 * Purge a specific node from the cache. Reference count must be zero.
552 */
553 int
554 cache_node_purge(
555 struct cache * cache,
556 cache_key_t key,
557 struct cache_node * node)
558 {
559 struct list_head * head;
560 struct list_head * pos;
561 struct list_head * n;
562 struct cache_hash * hash;
563 int count = -1;
564
565 hash = cache->c_hash + cache->hash(key, cache->c_hashsize,
566 cache->c_hashshift);
567 head = &hash->ch_list;
568 pthread_mutex_lock(&hash->ch_mutex);
569 for (pos = head->next, n = pos->next; pos != head;
570 pos = n, n = pos->next) {
571 if ((struct cache_node *)pos != node)
572 continue;
573
574 count = __cache_node_purge(cache, node);
575 if (!count)
576 hash->ch_count--;
577 break;
578 }
579 pthread_mutex_unlock(&hash->ch_mutex);
580
581 if (count == 0) {
582 pthread_mutex_lock(&cache->c_mutex);
583 cache->c_count--;
584 pthread_mutex_unlock(&cache->c_mutex);
585 }
586 #ifdef CACHE_DEBUG
587 if (count >= 1) {
588 fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n",
589 __FUNCTION__, count, node);
590 cache_abort();
591 }
592 if (count == -1) {
593 fprintf(stderr, "%s: purge node not found! (node=%p)\n",
594 __FUNCTION__, node);
595 cache_abort();
596 }
597 #endif
598 return count == 0;
599 }
600
601 /*
602 * Purge all nodes from the cache. All reference counts must be zero.
603 */
604 void
605 cache_purge(
606 struct cache * cache)
607 {
608 int i;
609
610 for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
611 cache_shake(cache, i, true);
612
613 #ifdef CACHE_DEBUG
614 if (cache->c_count != 0) {
615 /* flush referenced nodes to disk */
616 cache_flush(cache);
617 fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n",
618 __FUNCTION__, cache, cache->c_count);
619 cache_abort();
620 }
621 #endif
622 }
623
624 /*
625 * Flush all nodes in the cache to disk.
626 */
627 void
628 cache_flush(
629 struct cache * cache)
630 {
631 struct cache_hash * hash;
632 struct list_head * head;
633 struct list_head * pos;
634 struct cache_node * node;
635 int i;
636
637 if (!cache->flush)
638 return;
639
640 for (i = 0; i < cache->c_hashsize; i++) {
641 hash = &cache->c_hash[i];
642
643 pthread_mutex_lock(&hash->ch_mutex);
644 head = &hash->ch_list;
645 for (pos = head->next; pos != head; pos = pos->next) {
646 node = (struct cache_node *)pos;
647 pthread_mutex_lock(&node->cn_mutex);
648 cache->flush(node);
649 pthread_mutex_unlock(&node->cn_mutex);
650 }
651 pthread_mutex_unlock(&hash->ch_mutex);
652 }
653 }
654
655 #define HASH_REPORT (3 * HASH_CACHE_RATIO)
656 void
657 cache_report(
658 FILE *fp,
659 const char *name,
660 struct cache *cache)
661 {
662 int i;
663 unsigned long count, index, total;
664 unsigned long hash_bucket_lengths[HASH_REPORT + 2];
665
666 if ((cache->c_hits + cache->c_misses) == 0)
667 return;
668
669 /* report cache summary */
670 fprintf(fp, "%s: %p\n"
671 "Max supported entries = %u\n"
672 "Max utilized entries = %u\n"
673 "Active entries = %u\n"
674 "Hash table size = %u\n"
675 "Hits = %llu\n"
676 "Misses = %llu\n"
677 "Hit ratio = %5.2f\n",
678 name, cache,
679 cache->c_maxcount,
680 cache->c_max,
681 cache->c_count,
682 cache->c_hashsize,
683 cache->c_hits,
684 cache->c_misses,
685 (double)cache->c_hits * 100 /
686 (cache->c_hits + cache->c_misses)
687 );
688
689 for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
690 fprintf(fp, "MRU %d entries = %6u (%3u%%)\n",
691 i, cache->c_mrus[i].cm_count,
692 cache->c_mrus[i].cm_count * 100 / cache->c_count);
693
694 i = CACHE_DIRTY_PRIORITY;
695 fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n",
696 i, cache->c_mrus[i].cm_count,
697 cache->c_mrus[i].cm_count * 100 / cache->c_count);
698
699 /* report hash bucket lengths */
700 bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));
701
702 for (i = 0; i < cache->c_hashsize; i++) {
703 count = cache->c_hash[i].ch_count;
704 if (count > HASH_REPORT)
705 index = HASH_REPORT + 1;
706 else
707 index = count;
708 hash_bucket_lengths[index]++;
709 }
710
711 total = 0;
712 for (i = 0; i < HASH_REPORT + 1; i++) {
713 total += i * hash_bucket_lengths[i];
714 if (hash_bucket_lengths[i] == 0)
715 continue;
716 fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n",
717 i, hash_bucket_lengths[i],
718 (i * hash_bucket_lengths[i] * 100) / cache->c_count);
719 }
720 if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */
721 fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n",
722 i - 1, hash_bucket_lengths[i],
723 ((cache->c_count - total) * 100) / cache->c_count);
724 }