From: Filipe Manana Date: Tue, 21 Oct 2025 15:35:19 +0000 (+0100) Subject: btrfs: avoid space_info locking when checking if tickets are served X-Git-Tag: v6.19-rc1~167^2~75 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f912f0af13aebfd5634ba68c1a077e9a59fca47a;p=thirdparty%2Fkernel%2Flinux.git btrfs: avoid space_info locking when checking if tickets are served When checking if a ticket was served, we take the space_info's spinlock. If the ticket was served (its ->bytes is 0) or had an error (its ->error it not 0) then we just unlock the space_info and return. This however causes contention on the space_info's spinlock, which is heavily used (space reservation, space flushing, allocating and deallocating an extent from a block group (btrfs_update_block_group()), etc). Instead of using the space_info's spinlock to check if a ticket was served, use a per ticket spinlock which isn't used by anyone other than the task that created the ticket (stack allocated) and the task that serves the ticket (a reclaim task or any task deallocating space that ends up at btrfs_try_granting_tickets()). After applying this patch and all previous patches from the same patchset (many attempt to reduce space_info critical sections), lockstat showed some improvements for a fs_mark test regarding the space_info's spinlock 'lock'. The lockstat results: Before patchset: con-bounces: 13733858 contentions: 15902322 waittime-total: 264902529.72 acq-bounces: 28161791 acquisitions: 38679282 After patchset: con-bounces: 12032220 contentions: 13598034 waittime-total: 221806127.28 acq-bounces: 24717947 acquisitions: 34103281 Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 86cd87c5884a8..50704e38d1336 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -517,18 +517,27 @@ bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes, static void remove_ticket(struct btrfs_space_info *space_info, struct reserve_ticket *ticket, int error) { + lockdep_assert_held(&space_info->lock); + if (!list_empty(&ticket->list)) { list_del_init(&ticket->list); ASSERT(space_info->reclaim_size >= ticket->bytes); space_info->reclaim_size -= ticket->bytes; } - if (error) + spin_lock(&ticket->lock); + /* + * If we are called from a task waiting on the ticket, it may happen + * that before it sets an error on the ticket, a reclaim task was able + * to satisfy the ticket. In that case ignore the error. + */ + if (error && ticket->bytes > 0) ticket->error = error; else ticket->bytes = 0; wake_up(&ticket->wait); + spin_unlock(&ticket->lock); } /* @@ -1495,6 +1504,17 @@ static const enum btrfs_flush_state evict_flush_states[] = { RESET_ZONES, }; +static bool is_ticket_served(struct reserve_ticket *ticket) +{ + bool ret; + + spin_lock(&ticket->lock); + ret = (ticket->bytes == 0); + spin_unlock(&ticket->lock); + + return ret; +} + static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info, struct reserve_ticket *ticket, const enum btrfs_flush_state *states, @@ -1504,31 +1524,27 @@ static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info, u64 to_reclaim; int flush_state = 0; - spin_lock(&space_info->lock); /* * This is the priority reclaim path, so to_reclaim could be >0 still * because we may have only satisfied the priority tickets and still * left non priority tickets on the list. We would then have * to_reclaim but ->bytes == 0. */ - if (ticket->bytes == 0) { - spin_unlock(&space_info->lock); + if (is_ticket_served(ticket)) return; - } + spin_lock(&space_info->lock); to_reclaim = btrfs_calc_reclaim_metadata_size(space_info); + spin_unlock(&space_info->lock); while (flush_state < states_nr) { - spin_unlock(&space_info->lock); flush_space(space_info, to_reclaim, states[flush_state], false); - flush_state++; - spin_lock(&space_info->lock); - if (ticket->bytes == 0) { - spin_unlock(&space_info->lock); + if (is_ticket_served(ticket)) return; - } + flush_state++; } + spin_lock(&space_info->lock); /* * Attempt to steal from the global rsv if we can, except if the fs was * turned into error mode due to a transaction abort when flushing space @@ -1554,22 +1570,17 @@ static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info, static void priority_reclaim_data_space(struct btrfs_space_info *space_info, struct reserve_ticket *ticket) { - spin_lock(&space_info->lock); - /* We could have been granted before we got here. */ - if (ticket->bytes == 0) { - spin_unlock(&space_info->lock); + if (is_ticket_served(ticket)) return; - } + spin_lock(&space_info->lock); while (!space_info->full) { spin_unlock(&space_info->lock); flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); - spin_lock(&space_info->lock); - if (ticket->bytes == 0) { - spin_unlock(&space_info->lock); + if (is_ticket_served(ticket)) return; - } + spin_lock(&space_info->lock); } remove_ticket(space_info, ticket, -ENOSPC); @@ -1582,11 +1593,13 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info, { DEFINE_WAIT(wait); - int ret = 0; - spin_lock(&space_info->lock); + spin_lock(&ticket->lock); while (ticket->bytes > 0 && ticket->error == 0) { + int ret; + ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); + spin_unlock(&ticket->lock); if (ret) { /* * Delete us from the list. After we unlock the space @@ -1596,17 +1609,18 @@ static void wait_reserve_ticket(struct btrfs_space_info *space_info, * despite getting an error, resulting in a space leak * (bytes_may_use counter of our space_info). */ + spin_lock(&space_info->lock); remove_ticket(space_info, ticket, -EINTR); - break; + spin_unlock(&space_info->lock); + return; } - spin_unlock(&space_info->lock); schedule(); finish_wait(&ticket->wait, &wait); - spin_lock(&space_info->lock); + spin_lock(&ticket->lock); } - spin_unlock(&space_info->lock); + spin_unlock(&ticket->lock); } /* @@ -1804,6 +1818,7 @@ static int reserve_bytes(struct btrfs_space_info *space_info, u64 orig_bytes, ticket.error = 0; space_info->reclaim_size += ticket.bytes; init_waitqueue_head(&ticket.wait); + spin_lock_init(&ticket.lock); ticket.steal = can_steal(flush); if (trace_btrfs_reserve_ticket_enabled()) start_ns = ktime_get_ns(); diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 7e16d4c116c8f..a4c2a3c8b3880 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -230,6 +230,7 @@ struct reserve_ticket { bool steal; struct list_head list; wait_queue_head_t wait; + spinlock_t lock; }; static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)