]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
perf lock contention: Constify control data for BPF
authorNamhyung Kim <namhyung@kernel.org>
Mon, 2 Sep 2024 20:05:14 +0000 (13:05 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 3 Sep 2024 14:53:15 +0000 (11:53 -0300)
The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf lock contention --use-bpf
   contended   total wait     max wait     avg wait         type   caller

           5     31.57 us     14.93 us      6.31 us        mutex   btrfs_delayed_update_inode+0x43
           1     16.91 us     16.91 us     16.91 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1     15.13 us     15.13 us     15.13 us     spinlock   btrfs_getattr+0xd1
           1      6.65 us      6.65 us      6.65 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1      4.34 us      4.34 us      4.34 us     spinlock   process_one_work+0x1a9
  root@x1:~#
  root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf
       0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8)          = -1 EOPNOTSUPP (Operation not supported)
       0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16
       0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16
      26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148)     = 16
      26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80)                      = 16
      87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40)       = 16
      87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40)       = 16
      88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40)       = 16
      88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148)     = 17
      88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40)       = 16
  root@x1:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/bpf_lock_contention.c
tools/perf/util/bpf_skel/lock_contention.bpf.c

index bc4e92c0c08b8b202c6e0c35d02d346c7343285a..41a1ad08789511c39da6d9b83a030fbb6164ae3d 100644 (file)
@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
        else
                bpf_map__set_max_entries(skel->maps.stacks, 1);
 
-       if (target__has_cpu(target))
+       if (target__has_cpu(target)) {
+               skel->rodata->has_cpu = 1;
                ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
-       if (target__has_task(target))
+       }
+       if (target__has_task(target)) {
+               skel->rodata->has_task = 1;
                ntasks = perf_thread_map__nr(evlist->core.threads);
-       if (con->filters->nr_types)
+       }
+       if (con->filters->nr_types) {
+               skel->rodata->has_type = 1;
                ntypes = con->filters->nr_types;
-       if (con->filters->nr_cgrps)
+       }
+       if (con->filters->nr_cgrps) {
+               skel->rodata->has_cgroup = 1;
                ncgrps = con->filters->nr_cgrps;
+       }
 
        /* resolve lock name filters to addr */
        if (con->filters->nr_syms) {
@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
                        con->filters->addrs = addrs;
                }
                naddrs = con->filters->nr_addrs;
+               skel->rodata->has_addr = 1;
        }
 
        bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
        bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
        bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
 
+       skel->rodata->stack_skip = con->stack_skip;
+       skel->rodata->aggr_mode = con->aggr_mode;
+       skel->rodata->needs_callstack = con->save_callstack;
+       skel->rodata->lock_owner = con->owner;
+
+       if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
+               if (cgroup_is_v2("perf_event"))
+                       skel->rodata->use_cgroup_v2 = 1;
+       }
+
        if (lock_contention_bpf__load(skel) < 0) {
                pr_err("Failed to load lock-contention BPF skeleton\n");
                return -1;
@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
                u32 cpu;
                u8 val = 1;
 
-               skel->bss->has_cpu = 1;
                fd = bpf_map__fd(skel->maps.cpu_filter);
 
                for (i = 0; i < ncpus; i++) {
@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
                u32 pid;
                u8 val = 1;
 
-               skel->bss->has_task = 1;
                fd = bpf_map__fd(skel->maps.task_filter);
 
                for (i = 0; i < ntasks; i++) {
@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
                u32 pid = evlist->workload.pid;
                u8 val = 1;
 
-               skel->bss->has_task = 1;
                fd = bpf_map__fd(skel->maps.task_filter);
                bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
        }
@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
        if (con->filters->nr_types) {
                u8 val = 1;
 
-               skel->bss->has_type = 1;
                fd = bpf_map__fd(skel->maps.type_filter);
 
                for (i = 0; i < con->filters->nr_types; i++)
@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
        if (con->filters->nr_addrs) {
                u8 val = 1;
 
-               skel->bss->has_addr = 1;
                fd = bpf_map__fd(skel->maps.addr_filter);
 
                for (i = 0; i < con->filters->nr_addrs; i++)
@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
        if (con->filters->nr_cgrps) {
                u8 val = 1;
 
-               skel->bss->has_cgroup = 1;
                fd = bpf_map__fd(skel->maps.cgroup_filter);
 
                for (i = 0; i < con->filters->nr_cgrps; i++)
                        bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
        }
 
-       /* these don't work well if in the rodata section */
-       skel->bss->stack_skip = con->stack_skip;
-       skel->bss->aggr_mode = con->aggr_mode;
-       skel->bss->needs_callstack = con->save_callstack;
-       skel->bss->lock_owner = con->owner;
-
-       if (con->aggr_mode == LOCK_AGGR_CGROUP) {
-               if (cgroup_is_v2("perf_event"))
-                       skel->bss->use_cgroup_v2 = 1;
-
+       if (con->aggr_mode == LOCK_AGGR_CGROUP)
                read_all_cgroups(&con->cgroups);
-       }
 
        bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
 
index 52a876b4269917fef33d56427c90b15051dec78f..1069bda5d733887fa290b5aba99999a7e47b1f93 100644 (file)
@@ -117,21 +117,22 @@ struct mm_struct___new {
 } __attribute__((preserve_access_index));
 
 /* control flags */
-int enabled;
-int has_cpu;
-int has_task;
-int has_type;
-int has_addr;
-int has_cgroup;
-int needs_callstack;
-int stack_skip;
-int lock_owner;
-
-int use_cgroup_v2;
-int perf_subsys_id = -1;
+const volatile int has_cpu;
+const volatile int has_task;
+const volatile int has_type;
+const volatile int has_addr;
+const volatile int has_cgroup;
+const volatile int needs_callstack;
+const volatile int stack_skip;
+const volatile int lock_owner;
+const volatile int use_cgroup_v2;
 
 /* determine the key of lock stat */
-int aggr_mode;
+const volatile int aggr_mode;
+
+int enabled;
+
+int perf_subsys_id = -1;
 
 __u64 end_ts;