]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: add tracepoint for search slot restart tracking
authorLeo Martins <loemra.dev@gmail.com>
Thu, 26 Feb 2026 09:51:08 +0000 (01:51 -0800)
committerDavid Sterba <dsterba@suse.com>
Tue, 7 Apr 2026 16:56:00 +0000 (18:56 +0200)
Add a btrfs_search_slot_restart tracepoint that fires at each restart
site in btrfs_search_slot(), recording the root, tree level, and
reason for the restart. This enables tracking search slot restarts
which contribute to COW amplification under memory pressure.

The four restart reasons are:
 - write_lock: insufficient write lock level, need to restart with
   higher lock
 - setup_nodes: node setup returned -EAGAIN
 - slot_zero: insertion at slot 0 requires higher write lock level
 - read_block: read_block_for_search returned -EAGAIN (block not
   cached or lock contention)

COW counts are already tracked by the existing trace_btrfs_cow_block()
tracepoint. The per-restart-site tracepoint avoids counter overhead
in the critical path when tracepoints are disabled, and provides
richer per-event information that bpftrace scripts can aggregate into
counts, histograms, and per-root breakdowns.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Leo Martins <loemra.dev@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.c
include/trace/events/btrfs.h

index e8d260ecdcf6aecf08b9514d27a9a9ac10e9be3b..71e7ada954777512ed015eb66a22cb9d83a123b1 100644 (file)
@@ -2102,6 +2102,7 @@ again:
                            p->nodes[level + 1])) {
                                write_lock_level = level + 1;
                                btrfs_release_path(p);
+                               trace_btrfs_search_slot_restart(root, level, "write_lock");
                                goto again;
                        }
 
@@ -2164,8 +2165,10 @@ cow_done:
                p->slots[level] = slot;
                ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len,
                                              &write_lock_level);
-               if (ret2 == -EAGAIN)
+               if (ret2 == -EAGAIN) {
+                       trace_btrfs_search_slot_restart(root, level, "setup_nodes");
                        goto again;
+               }
                if (ret2) {
                        ret = ret2;
                        goto done;
@@ -2181,6 +2184,7 @@ cow_done:
                if (slot == 0 && ins_len && write_lock_level < level + 1) {
                        write_lock_level = level + 1;
                        btrfs_release_path(p);
+                       trace_btrfs_search_slot_restart(root, level, "slot_zero");
                        goto again;
                }
 
@@ -2194,8 +2198,10 @@ cow_done:
                }
 
                ret2 = read_block_for_search(root, p, &b, slot, key);
-               if (ret2 == -EAGAIN && !p->nowait)
+               if (ret2 == -EAGAIN && !p->nowait) {
+                       trace_btrfs_search_slot_restart(root, level, "read_block");
                        goto again;
+               }
                if (ret2) {
                        ret = ret2;
                        goto done;
index 0864700f76e0a14c5ae8e73a7c3c4852ddb4909c..8ad7a2d76c1d57a92f225f0fae437b977decbc05 100644 (file)
@@ -1113,6 +1113,30 @@ TRACE_EVENT(btrfs_cow_block,
                  __entry->cow_level)
 );
 
+TRACE_EVENT(btrfs_search_slot_restart,
+
+       TP_PROTO(const struct btrfs_root *root, int level,
+                const char *reason),
+
+       TP_ARGS(root, level, reason),
+
+       TP_STRUCT__entry_btrfs(
+               __field(        u64,    root_objectid           )
+               __field(        int,    level                   )
+               __string(       reason, reason                  )
+       ),
+
+       TP_fast_assign_btrfs(root->fs_info,
+               __entry->root_objectid  = btrfs_root_id(root);
+               __entry->level          = level;
+               __assign_str(reason);
+       ),
+
+       TP_printk_btrfs("root=%llu(%s) level=%d reason=%s",
+                 show_root_type(__entry->root_objectid),
+                 __entry->level, __get_str(reason))
+);
+
 TRACE_EVENT(btrfs_space_reservation,
 
        TP_PROTO(const struct btrfs_fs_info *fs_info, const char *type, u64 val,