* Copyright © 2024 Intel Corporation
*/
-#include <linux/atomic.h>
-
+#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include "xe_device.h"
#include "xe_gt_stats.h"
-#include "xe_gt_types.h"
+
+static void xe_gt_stats_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ free_percpu(gt->stats);
+}
+
+/**
+ * xe_gt_stats_init() - Initialize GT statistics
+ * @gt: GT structure
+ *
+ * Allocate per-CPU GT statistics. Using per-CPU stats allows increments
+ * to occur without cross-CPU atomics.
+ *
+ * Return: 0 on success, -ENOMEM on failure.
+ */
+int xe_gt_stats_init(struct xe_gt *gt)
+{
+ gt->stats = alloc_percpu(struct xe_gt_stats);
+ if (!gt->stats)
+ return -ENOMEM;
+
+ return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini,
+ gt);
+}
/**
* xe_gt_stats_incr - Increments the specified stats counter
if (id >= __XE_GT_STATS_NUM_IDS)
return;
- atomic64_add(incr, >->stats.counters[id]);
+ this_cpu_add(gt->stats->counters[id], incr);
}
#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
{
enum xe_gt_stats_id id;
- for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
- drm_printf(p, "%s: %lld\n", stat_description[id],
- atomic64_read(>->stats.counters[id]));
+ for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) {
+ u64 total = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
+
+ total += s->counters[id];
+ }
+
+ drm_printf(p, "%s: %lld\n", stat_description[id], total);
+ }
return 0;
}
/**
- * xe_gt_stats_clear - Clear the GT stats
+ * xe_gt_stats_clear() - Clear the GT stats
* @gt: GT structure
*
- * This clear (zeros) all the available GT stats.
+ * Clear (zero) all available GT stats. Note that if the stats are being
+ * updated while this function is running, the results may be unpredictable.
+ * Intended to be called on an idle GPU.
*/
void xe_gt_stats_clear(struct xe_gt *gt)
{
- int id;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
- for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
- atomic64_set(>->stats.counters[id], 0);
+ memset(s, 0, sizeof(*s));
+ }
}
#ifndef _XE_GT_STATS_TYPES_H_
#define _XE_GT_STATS_TYPES_H_
+#include <linux/types.h>
+
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
__XE_GT_STATS_NUM_IDS,
};
+/**
+ * struct xe_gt_stats - Per-CPU GT statistics counters
+ * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id
+ *
+ * This structure is used for high-frequency, per-CPU statistics collection
+ * in the Xe driver. By using a per-CPU allocation and ensuring the structure
+ * is cache-line aligned, we avoid the performance-heavy atomics and cache
+ * coherency traffic.
+ *
+ * Updates to these counters should be performed using the this_cpu_add()
+ * macro to ensure they are atomic with respect to local interrupts and
+ * preemption-safe without the overhead of explicit locking.
+ */
+struct xe_gt_stats {
+ u64 counters[__XE_GT_STATS_NUM_IDS];
+} ____cacheline_aligned;
+
#endif