]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: avoid space_info locking when checking if tickets are served
authorFilipe Manana <fdmanana@suse.com>
Tue, 21 Oct 2025 15:35:19 +0000 (16:35 +0100)
committerDavid Sterba <dsterba@suse.com>
Mon, 24 Nov 2025 21:18:21 +0000 (22:18 +0100)
When checking if a ticket was served, we take the space_info's spinlock.
If the ticket was served (its ->bytes is 0) or had an error (its ->error
it not 0) then we just unlock the space_info and return.

This however causes contention on the space_info's spinlock, which is
heavily used (space reservation, space flushing, allocating and
deallocating an extent from a block group (btrfs_update_block_group()),
etc).

Instead of using the space_info's spinlock to check if a ticket was
served, use a per ticket spinlock which isn't used by anyone other than
the task that created the ticket (stack allocated) and the task that
serves the ticket (a reclaim task or any task deallocating space that
ends up at btrfs_try_granting_tickets()).

After applying this patch and all previous patches from the same patchset
(many attempt to reduce space_info critical sections), lockstat showed
some improvements for a fs_mark test regarding the space_info's spinlock
'lock'. The lockstat results:

Before patchset:

  con-bounces:     13733858
  contentions:     15902322
  waittime-total:  264902529.72
  acq-bounces:     28161791
  acquisitions:    38679282

After patchset:

  con-bounces:     12032220
  contentions:     13598034
  waittime-total:  221806127.28
  acq-bounces:     24717947
  acquisitions:    34103281

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/space-info.c
fs/btrfs/space-info.h

index 86cd87c5884a8010aca982503ec02ae654fc6788..50704e38d1336aa0ded2924d93eb212092a18be3 100644 (file)
@@ -517,18 +517,27 @@ bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes,
 static void remove_ticket(struct btrfs_space_info *space_info,
                          struct reserve_ticket *ticket, int error)
 {
+       lockdep_assert_held(&space_info->lock);
+
        if (!list_empty(&ticket->list)) {
                list_del_init(&ticket->list);
                ASSERT(space_info->reclaim_size >= ticket->bytes);
                space_info->reclaim_size -= ticket->bytes;
        }
 
-       if (error)
+       spin_lock(&ticket->lock);
+       /*
+        * If we are called from a task waiting on the ticket, it may happen
+        * that before it sets an error on the ticket, a reclaim task was able
+        * to satisfy the ticket. In that case ignore the error.
+        */
+       if (error && ticket->bytes > 0)
                ticket->error = error;
        else
                ticket->bytes = 0;
 
        wake_up(&ticket->wait);
+       spin_unlock(&ticket->lock);
 }
 
 /*
@@ -1495,6 +1504,17 @@ static const enum btrfs_flush_state evict_flush_states[] = {
        RESET_ZONES,
 };
 
+static bool is_ticket_served(struct reserve_ticket *ticket)
+{
+       bool ret;
+
+       spin_lock(&ticket->lock);
+       ret = (ticket->bytes == 0);
+       spin_unlock(&ticket->lock);
+
+       return ret;
+}
+
 static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info,
                                            struct reserve_ticket *ticket,
                                            const enum btrfs_flush_state *states,
@@ -1504,31 +1524,27 @@ static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info,
        u64 to_reclaim;
        int flush_state = 0;
 
-       spin_lock(&space_info->lock);
        /*
         * This is the priority reclaim path, so to_reclaim could be >0 still
         * because we may have only satisfied the priority tickets and still
         * left non priority tickets on the list.  We would then have
         * to_reclaim but ->bytes == 0.
         */
-       if (ticket->bytes == 0) {
-               spin_unlock(&space_info->lock);
+       if (is_ticket_served(ticket))
                return;
-       }
 
+       spin_lock(&space_info->lock);
        to_reclaim = btrfs_calc_reclaim_metadata_size(space_info);
+       spin_unlock(&space_info->lock);
 
        while (flush_state < states_nr) {
-               spin_unlock(&space_info->lock);
                flush_space(space_info, to_reclaim, states[flush_state], false);
-               flush_state++;
-               spin_lock(&space_info->lock);
-               if (ticket->bytes == 0) {
-                       spin_unlock(&space_info->lock);
+               if (is_ticket_served(ticket))
                        return;
-               }
+               flush_state++;
        }
 
+       spin_lock(&space_info->lock);
        /*
         * Attempt to steal from the global rsv if we can, except if the fs was
         * turned into error mode due to a transaction abort when flushing space
@@ -1554,22 +1570,17 @@ static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info,
 static void priority_reclaim_data_space(struct btrfs_space_info *space_info,
                                        struct reserve_ticket *ticket)
 {
-       spin_lock(&space_info->lock);
-
        /* We could have been granted before we got here. */
-       if (ticket->bytes == 0) {
-               spin_unlock(&space_info->lock);
+       if (is_ticket_served(ticket))
                return;
-       }
 
+       spin_lock(&space_info->lock);
        while (!space_info->full) {
                spin_unlock(&space_info->lock);
                flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
-               spin_lock(&space_info->lock);
-               if (ticket->bytes == 0) {
-                       spin_unlock(&space_info->lock);
+               if (is_ticket_served(ticket))
                        return;
-               }
+               spin_lock(&space_info->lock);
        }
 
        remove_ticket(space_info, ticket, -ENOSPC);
@@ -1582,11 +1593,13 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
 
 {
        DEFINE_WAIT(wait);
-       int ret = 0;
 
-       spin_lock(&space_info->lock);
+       spin_lock(&ticket->lock);
        while (ticket->bytes > 0 && ticket->error == 0) {
+               int ret;
+
                ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+               spin_unlock(&ticket->lock);
                if (ret) {
                        /*
                         * Delete us from the list. After we unlock the space
@@ -1596,17 +1609,18 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info,
                         * despite getting an error, resulting in a space leak
                         * (bytes_may_use counter of our space_info).
                         */
+                       spin_lock(&space_info->lock);
                        remove_ticket(space_info, ticket, -EINTR);
-                       break;
+                       spin_unlock(&space_info->lock);
+                       return;
                }
-               spin_unlock(&space_info->lock);
 
                schedule();
 
                finish_wait(&ticket->wait, &wait);
-               spin_lock(&space_info->lock);
+               spin_lock(&ticket->lock);
        }
-       spin_unlock(&space_info->lock);
+       spin_unlock(&ticket->lock);
 }
 
 /*
@@ -1804,6 +1818,7 @@ static int reserve_bytes(struct btrfs_space_info *space_info, u64 orig_bytes,
                ticket.error = 0;
                space_info->reclaim_size += ticket.bytes;
                init_waitqueue_head(&ticket.wait);
+               spin_lock_init(&ticket.lock);
                ticket.steal = can_steal(flush);
                if (trace_btrfs_reserve_ticket_enabled())
                        start_ns = ktime_get_ns();
index 7e16d4c116c8ff78f6a17def619e826788e2f758..a4c2a3c8b388094d3e754f3dbda90209620e483e 100644 (file)
@@ -230,6 +230,7 @@ struct reserve_ticket {
        bool steal;
        struct list_head list;
        wait_queue_head_t wait;
+       spinlock_t lock;
 };
 
 static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)