From 9ff885ef8b428febbf41f13a511755d74704949e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Feb 2026 12:05:52 -0800 Subject: [PATCH] drm/xe: Convert GT stats to per-cpu counters Current GT statistics use atomic64_t counters. Atomic operations incur a global coherency penalty. Transition to dynamic per-cpu counters using alloc_percpu(). This allows stats to be incremented via this_cpu_add(), which compiles to a single non-locking instruction. This approach keeps the hot-path updates local to the CPU, avoiding expensive cross-core cache invalidation traffic. Use for_each_possible_cpu() during aggregation and clear operations to ensure data consistency across CPU hotplug events. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260217200552.596718-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 5 ++ drivers/gpu/drm/xe/xe_gt_stats.c | 63 +++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_stats.h | 6 +++ drivers/gpu/drm/xe/xe_gt_stats_types.h | 19 ++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 5 +- 5 files changed, 82 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 68c4771de040b..1203d087b68f0 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -33,6 +33,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" +#include "xe_gt_stats.h" #include "xe_gt_sysfs.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" @@ -455,6 +456,10 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_stats_init(gt); + if (err) + return err; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (!fw_ref.domains) return -ETIMEDOUT; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 37506434d7a36..8ed0160a60417 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -3,12 +3,37 @@ * Copyright © 2024 Intel Corporation */ -#include - +#include #include +#include "xe_device.h" #include "xe_gt_stats.h" -#include "xe_gt_types.h" + +static void xe_gt_stats_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + free_percpu(gt->stats); +} + +/** + * xe_gt_stats_init() - Initialize GT statistics + * @gt: GT structure + * + * Allocate per-CPU GT statistics. Using per-CPU stats allows increments + * to occur without cross-CPU atomics. + * + * Return: 0 on success, -ENOMEM on failure. + */ +int xe_gt_stats_init(struct xe_gt *gt) +{ + gt->stats = alloc_percpu(struct xe_gt_stats); + if (!gt->stats) + return -ENOMEM; + + return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini, + gt); +} /** * xe_gt_stats_incr - Increments the specified stats counter @@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) if (id >= __XE_GT_STATS_NUM_IDS) return; - atomic64_add(incr, >->stats.counters[id]); + this_cpu_add(gt->stats->counters[id], incr); } #define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name @@ -94,23 +119,37 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) { enum xe_gt_stats_id id; - for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) - drm_printf(p, "%s: %lld\n", stat_description[id], - atomic64_read(>->stats.counters[id])); + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) { + u64 total = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); + + total += s->counters[id]; + } + + drm_printf(p, "%s: %lld\n", stat_description[id], total); + } return 0; } /** - * xe_gt_stats_clear - Clear the GT stats + * xe_gt_stats_clear() - Clear the GT stats * @gt: GT structure * - * This clear (zeros) all the available GT stats. + * Clear (zero) all available GT stats. Note that if the stats are being + * updated while this function is running, the results may be unpredictable. + * Intended to be called on an idle GPU. */ void xe_gt_stats_clear(struct xe_gt *gt) { - int id; + int cpu; + + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); - for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) - atomic64_set(>->stats.counters[id], 0); + memset(s, 0, sizeof(*s)); + } } diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 59a7bf60e242c..3d0defab9b30e 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -14,10 +14,16 @@ struct xe_gt; struct drm_printer; #ifdef CONFIG_DEBUG_FS +int xe_gt_stats_init(struct xe_gt *gt); int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else +static inline int xe_gt_stats_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index b8accdbc54eb3..79568591bd676 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_STATS_TYPES_H_ #define _XE_GT_STATS_TYPES_H_ +#include + enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, @@ -58,4 +60,21 @@ enum xe_gt_stats_id { __XE_GT_STATS_NUM_IDS, }; +/** + * struct xe_gt_stats - Per-CPU GT statistics counters + * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id + * + * This structure is used for high-frequency, per-CPU statistics collection + * in the Xe driver. By using a per-CPU allocation and ensuring the structure + * is cache-line aligned, we avoid the performance-heavy atomics and cache + * coherency traffic. + * + * Updates to these counters should be performed using the this_cpu_add() + * macro to ensure they are atomic with respect to local interrupts and + * preemption-safe without the overhead of explicit locking. + */ +struct xe_gt_stats { + u64 counters[__XE_GT_STATS_NUM_IDS]; +} ____cacheline_aligned; + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index caf7e7e78be90..8b55cf25a75fa 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -158,10 +158,7 @@ struct xe_gt { #if IS_ENABLED(CONFIG_DEBUG_FS) /** @stats: GT stats */ - struct { - /** @stats.counters: counters for various GT stats */ - atomic64_t counters[__XE_GT_STATS_NUM_IDS]; - } stats; + struct xe_gt_stats __percpu *stats; #endif /** -- 2.47.3