]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: Tweak btree key cache shrinker so it actually frees
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 20 Apr 2024 19:35:40 +0000 (15:35 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 20 Apr 2024 21:06:20 +0000 (17:06 -0400)
Freeing key cache items is a multi stage process; we need to wait for an
SRCU grace period to elapse, and we handle this ourselves - partially to
avoid callback overhead, but primarily so that when allocating we can
first allocate from the freed items waiting for an SRCU grace period.

Previously, the shrinker was counting the items on the 'waiting for SRCU
grace period' lists as items being scanned, but this meant that too many
items waiting for an SRCU grace period could prevent it from doing any
work at all.

After this, we're seeing that items skipped due to the accessed bit are
the main cause of the shrinker not making any progress, and we actually
want the key cache shrinker to run quite aggressively because reclaimed
items will still generally be found (more compactly) in the btree node
cache - so we also tweak the shrinker to not count those against
nr_to_scan.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c

index 88a3582a32757e34a28eb37143f9ff78a88a4085..e8c1c530cd95f5bb1c34cb39f848cd842b0a88c6 100644 (file)
@@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
         * Newest freed entries are at the end of the list - once we hit one
         * that's too new to be freed, we can bail out:
         */
-       scanned += bc->nr_freed_nonpcpu;
-
        list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                bc->nr_freed_nonpcpu--;
        }
 
-       if (scanned >= nr)
-               goto out;
-
-       scanned += bc->nr_freed_pcpu;
-
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                bc->nr_freed_pcpu--;
        }
 
-       if (scanned >= nr)
-               goto out;
-
        rcu_read_lock();
        tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
        if (bc->shrink_iter >= tbl->size)
@@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                        next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
                        ck = container_of(pos, struct bkey_cached, hash);
 
-                       if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+                       if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
                                goto next;
-
-                       if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+                       } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
                                clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
-                       else if (bkey_cached_lock_for_evict(ck)) {
+                               goto next;
+                       } else if (bkey_cached_lock_for_evict(ck)) {
                                bkey_cached_evict(bc, ck);
                                bkey_cached_free(bc, ck);
                        }
@@ -916,7 +906,6 @@ next:
        } while (scanned < nr && bc->shrink_iter != start);
 
        rcu_read_unlock();
-out:
        memalloc_nofs_restore(flags);
        srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
        mutex_unlock(&bc->lock);