]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
libperf cpumap: Reduce allocations and sorting in intersect
authorIan Rogers <irogers@google.com>
Thu, 13 Nov 2025 18:05:07 +0000 (10:05 -0800)
committerNamhyung Kim <namhyung@kernel.org>
Tue, 18 Nov 2025 02:43:08 +0000 (18:43 -0800)
On hybrid platforms the CPU maps are often disjoint. Rather than copy
CPUs and trim, compute the number of common CPUs, if none early exit,
otherwise copy in an sorted order. This avoids memory allocation in
the disjoint case and avoids a second malloc and useless sort in the
previous trim cases.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/lib/perf/cpumap.c

index b20a5280f2b333d46db72821d2ef30e9e97787bb..7e88417ba84d125b95b7e2744a1681702560f23d 100644 (file)
@@ -453,21 +453,33 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
 struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
                                             struct perf_cpu_map *other)
 {
-       struct perf_cpu *tmp_cpus;
-       int tmp_len;
        int i, j, k;
-       struct perf_cpu_map *merged = NULL;
+       struct perf_cpu_map *merged;
 
        if (perf_cpu_map__is_subset(other, orig))
                return perf_cpu_map__get(orig);
        if (perf_cpu_map__is_subset(orig, other))
                return perf_cpu_map__get(other);
 
-       tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
-       tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
-       if (!tmp_cpus)
+       i = j = k = 0;
+       while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+               if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+                       i++;
+               else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+                       j++;
+               else { /* CPUs match. */
+                       i++;
+                       j++;
+                       k++;
+               }
+       }
+       if (k == 0) /* Maps are completely disjoint. */
                return NULL;
 
+       merged = perf_cpu_map__alloc(k);
+       if (!merged)
+               return NULL;
+       /* Entries are added to merged in sorted order, so no need to sort again. */
        i = j = k = 0;
        while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
                if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
@@ -476,11 +488,8 @@ struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
                        j++;
                else {
                        j++;
-                       tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+                       RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++);
                }
        }
-       if (k)
-               merged = cpu_map__trim_new(k, tmp_cpus);
-       free(tmp_cpus);
        return merged;
 }