+++ /dev/null
-From 1cbfdf7ff79d7d72299f37f928d319e8b081e99b Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 22 Oct 2024 13:03:48 -0700
-Subject: drm/xe/oa: Add input fence dependencies
-
-From: Ashutosh Dixit <ashutosh.dixit@intel.com>
-
-[ Upstream commit 2fb4350a283af03a5ee34ba765783a941f942b82 ]
-
-Add input fence dependencies which will make OA configuration wait till
-these dependencies are met (till input fences signal).
-
-v2: Change add_deps arg to xe_oa_submit_bb from bool to enum (Matt Brost)
-
-Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
-Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-4-ashutosh.dixit@intel.com
-Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/gpu/drm/xe/xe_oa.c | 25 +++++++++++++++++++++----
- 1 file changed, 21 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
-index 94c558d949e1..fd14d62bfb54 100644
---- a/drivers/gpu/drm/xe/xe_oa.c
-+++ b/drivers/gpu/drm/xe/xe_oa.c
-@@ -42,6 +42,11 @@
- #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
- #define XE_OA_UNIT_INVALID U32_MAX
-
-+enum xe_oa_submit_deps {
-+ XE_OA_SUBMIT_NO_DEPS,
-+ XE_OA_SUBMIT_ADD_DEPS,
-+};
-+
- struct xe_oa_reg {
- struct xe_reg addr;
- u32 value;
-@@ -572,7 +577,8 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
- return ret;
- }
-
--static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
-+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
-+ struct xe_bb *bb)
- {
- struct xe_sched_job *job;
- struct dma_fence *fence;
-@@ -585,11 +591,22 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_
- goto exit;
- }
-
-+ if (deps == XE_OA_SUBMIT_ADD_DEPS) {
-+ for (int i = 0; i < stream->num_syncs && !err; i++)
-+ err = xe_sync_entry_add_deps(&stream->syncs[i], job);
-+ if (err) {
-+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
-+ goto err_put_job;
-+ }
-+ }
-+
- xe_sched_job_arm(job);
- fence = dma_fence_get(&job->drm.s_fence->finished);
- xe_sched_job_push(job);
-
- return fence;
-+err_put_job:
-+ xe_sched_job_put(job);
- exit:
- return ERR_PTR(err);
- }
-@@ -667,7 +684,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
-
- xe_oa_store_flex(stream, lrc, bb, flex, count);
-
-- fence = xe_oa_submit_bb(stream, bb);
-+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- goto free_bb;
-@@ -696,7 +713,7 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
-
- write_cs_mi_lri(bb, reg_lri, 1);
-
-- fence = xe_oa_submit_bb(stream, bb);
-+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- goto free_bb;
-@@ -944,7 +961,7 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
- goto exit;
- }
-
-- fence = xe_oa_submit_bb(stream, oa_bo->bb);
-+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- goto exit;
---
-2.39.5
-
+++ /dev/null
-From 9aeced687e728b9de067a502a0780f8029e61763 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 22 Oct 2024 13:03:46 -0700
-Subject: drm/xe/oa: Separate batch submission from waiting for completion
-
-From: Ashutosh Dixit <ashutosh.dixit@intel.com>
-
-[ Upstream commit dddcb19ad4d4bbe943a72a1fb3266c6e8aa8d541 ]
-
-When we introduce xe_syncs, we don't wait for internal OA programming
-batches to complete. That is, xe_syncs are signaled asynchronously. In
-anticipation for this, separate out batch submission from waiting for
-completion of those batches.
-
-v2: Change return type of xe_oa_submit_bb to "struct dma_fence *" (Matt B)
-v3: Retain init "int err = 0;" in xe_oa_submit_bb (Jose)
-
-Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
-Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-2-ashutosh.dixit@intel.com
-Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/gpu/drm/xe/xe_oa.c | 57 +++++++++++++++++++++++++++++---------
- 1 file changed, 44 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
-index 78823f53d290..4962c9eb9a81 100644
---- a/drivers/gpu/drm/xe/xe_oa.c
-+++ b/drivers/gpu/drm/xe/xe_oa.c
-@@ -567,11 +567,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
- return ret;
- }
-
--static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
-+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
- {
- struct xe_sched_job *job;
- struct dma_fence *fence;
-- long timeout;
- int err = 0;
-
- /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
-@@ -585,14 +584,9 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
- fence = dma_fence_get(&job->drm.s_fence->finished);
- xe_sched_job_push(job);
-
-- timeout = dma_fence_wait_timeout(fence, false, HZ);
-- dma_fence_put(fence);
-- if (timeout < 0)
-- err = timeout;
-- else if (!timeout)
-- err = -ETIME;
-+ return fence;
- exit:
-- return err;
-+ return ERR_PTR(err);
- }
-
- static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
-@@ -656,6 +650,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
- static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
- const struct flex *flex, u32 count)
- {
-+ struct dma_fence *fence;
- struct xe_bb *bb;
- int err;
-
-@@ -667,7 +662,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
-
- xe_oa_store_flex(stream, lrc, bb, flex, count);
-
-- err = xe_oa_submit_bb(stream, bb);
-+ fence = xe_oa_submit_bb(stream, bb);
-+ if (IS_ERR(fence)) {
-+ err = PTR_ERR(fence);
-+ goto free_bb;
-+ }
-+ xe_bb_free(bb, fence);
-+ dma_fence_put(fence);
-+
-+ return 0;
-+free_bb:
- xe_bb_free(bb, NULL);
- exit:
- return err;
-@@ -675,6 +679,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
-
- static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
- {
-+ struct dma_fence *fence;
- struct xe_bb *bb;
- int err;
-
-@@ -686,7 +691,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
-
- write_cs_mi_lri(bb, reg_lri, 1);
-
-- err = xe_oa_submit_bb(stream, bb);
-+ fence = xe_oa_submit_bb(stream, bb);
-+ if (IS_ERR(fence)) {
-+ err = PTR_ERR(fence);
-+ goto free_bb;
-+ }
-+ xe_bb_free(bb, fence);
-+ dma_fence_put(fence);
-+
-+ return 0;
-+free_bb:
- xe_bb_free(bb, NULL);
- exit:
- return err;
-@@ -914,15 +928,32 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
- {
- #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
- struct xe_oa_config_bo *oa_bo;
-- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
-+ int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
-+ struct dma_fence *fence;
-+ long timeout;
-
-+ /* Emit OA configuration batch */
- oa_bo = xe_oa_alloc_config_buffer(stream, config);
- if (IS_ERR(oa_bo)) {
- err = PTR_ERR(oa_bo);
- goto exit;
- }
-
-- err = xe_oa_submit_bb(stream, oa_bo->bb);
-+ fence = xe_oa_submit_bb(stream, oa_bo->bb);
-+ if (IS_ERR(fence)) {
-+ err = PTR_ERR(fence);
-+ goto exit;
-+ }
-+
-+ /* Wait till all previous batches have executed */
-+ timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
-+ dma_fence_put(fence);
-+ if (timeout < 0)
-+ err = timeout;
-+ else if (!timeout)
-+ err = -ETIME;
-+ if (err)
-+ drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
-
- /* Additional empirical delay needed for NOA programming after registers are written */
- usleep_range(us, 2 * us);
---
-2.39.5
-
+++ /dev/null
-From 756233c8ca6cada8855f9f98aeadce3a60799ab3 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 22 Oct 2024 13:03:47 -0700
-Subject: drm/xe/oa/uapi: Define and parse OA sync properties
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Ashutosh Dixit <ashutosh.dixit@intel.com>
-
-[ Upstream commit c8507a25cebd179db935dd266a33c51bef1b1e80 ]
-
-Now that we have laid the groundwork, introduce OA sync properties in the
-uapi and parse the input xe_sync array as is done elsewhere in the
-driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace.
-
-v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B)
- Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose)
-
-Acked-by: José Roberto de Souza <jose.souza@intel.com>
-Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
-Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com
-Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/gpu/drm/xe/xe_oa.c | 83 +++++++++++++++++++++++++++++++-
- drivers/gpu/drm/xe/xe_oa_types.h | 6 +++
- drivers/gpu/drm/xe/xe_query.c | 2 +-
- include/uapi/drm/xe_drm.h | 17 +++++++
- 4 files changed, 106 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
-index 4962c9eb9a81..94c558d949e1 100644
---- a/drivers/gpu/drm/xe/xe_oa.c
-+++ b/drivers/gpu/drm/xe/xe_oa.c
-@@ -36,6 +36,7 @@
- #include "xe_pm.h"
- #include "xe_sched_job.h"
- #include "xe_sriov.h"
-+#include "xe_sync.h"
-
- #define DEFAULT_POLL_FREQUENCY_HZ 200
- #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
-@@ -70,6 +71,7 @@ struct flex {
- };
-
- struct xe_oa_open_param {
-+ struct xe_file *xef;
- u32 oa_unit_id;
- bool sample;
- u32 metric_set;
-@@ -81,6 +83,9 @@ struct xe_oa_open_param {
- struct xe_exec_queue *exec_q;
- struct xe_hw_engine *hwe;
- bool no_preempt;
-+ struct drm_xe_sync __user *syncs_user;
-+ int num_syncs;
-+ struct xe_sync_entry *syncs;
- };
-
- struct xe_oa_config_bo {
-@@ -1393,6 +1398,9 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
- stream->period_exponent = param->period_exponent;
- stream->no_preempt = param->no_preempt;
-
-+ stream->num_syncs = param->num_syncs;
-+ stream->syncs = param->syncs;
-+
- /*
- * For Xe2+, when overrun mode is enabled, there are no partial reports at the end
- * of buffer, making the OA buffer effectively a non-power-of-2 size circular
-@@ -1743,6 +1751,20 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
- return 0;
- }
-
-+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
-+ struct xe_oa_open_param *param)
-+{
-+ param->num_syncs = value;
-+ return 0;
-+}
-+
-+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
-+ struct xe_oa_open_param *param)
-+{
-+ param->syncs_user = u64_to_user_ptr(value);
-+ return 0;
-+}
-+
- typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param);
- static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
-@@ -1755,6 +1777,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
- [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
- [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
- [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
-+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
-+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
- };
-
- static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
-@@ -1814,6 +1838,49 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number
- return 0;
- }
-
-+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
-+{
-+ int ret, num_syncs, num_ufence = 0;
-+
-+ if (param->num_syncs && !param->syncs_user) {
-+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
-+ ret = -EINVAL;
-+ goto exit;
-+ }
-+
-+ if (param->num_syncs) {
-+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
-+ if (!param->syncs) {
-+ ret = -ENOMEM;
-+ goto exit;
-+ }
-+ }
-+
-+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
-+ ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs],
-+ ¶m->syncs_user[num_syncs], 0);
-+ if (ret)
-+ goto err_syncs;
-+
-+ if (xe_sync_is_ufence(¶m->syncs[num_syncs]))
-+ num_ufence++;
-+ }
-+
-+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
-+ ret = -EINVAL;
-+ goto err_syncs;
-+ }
-+
-+ return 0;
-+
-+err_syncs:
-+ while (num_syncs--)
-+ xe_sync_entry_cleanup(¶m->syncs[num_syncs]);
-+ kfree(param->syncs);
-+exit:
-+ return ret;
-+}
-+
- /**
- * xe_oa_stream_open_ioctl - Opens an OA stream
- * @dev: @drm_device
-@@ -1839,6 +1906,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
- return -ENODEV;
- }
-
-+ param.xef = xef;
- ret = xe_oa_user_extensions(oa, data, 0, ¶m);
- if (ret)
- return ret;
-@@ -1907,11 +1975,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
- drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
- }
-
-+ ret = xe_oa_parse_syncs(oa, ¶m);
-+ if (ret)
-+ goto err_exec_q;
-+
- mutex_lock(¶m.hwe->gt->oa.gt_lock);
- ret = xe_oa_stream_open_ioctl_locked(oa, ¶m);
- mutex_unlock(¶m.hwe->gt->oa.gt_lock);
-+ if (ret < 0)
-+ goto err_sync_cleanup;
-+
-+ return ret;
-+
-+err_sync_cleanup:
-+ while (param.num_syncs--)
-+ xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]);
-+ kfree(param.syncs);
- err_exec_q:
-- if (ret < 0 && param.exec_q)
-+ if (param.exec_q)
- xe_exec_queue_put(param.exec_q);
- return ret;
- }
-diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
-index 8862eca73fbe..99f4b2d4bdcf 100644
---- a/drivers/gpu/drm/xe/xe_oa_types.h
-+++ b/drivers/gpu/drm/xe/xe_oa_types.h
-@@ -238,5 +238,11 @@ struct xe_oa_stream {
-
- /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
- u32 no_preempt;
-+
-+ /** @num_syncs: size of @syncs array */
-+ u32 num_syncs;
-+
-+ /** @syncs: syncs to wait on and to signal */
-+ struct xe_sync_entry *syncs;
- };
- #endif
-diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
-index 1c96375bd7df..6fec5d1a1eb4 100644
---- a/drivers/gpu/drm/xe/xe_query.c
-+++ b/drivers/gpu/drm/xe/xe_query.c
-@@ -679,7 +679,7 @@ static int query_oa_units(struct xe_device *xe,
- du->oa_unit_id = u->oa_unit_id;
- du->oa_unit_type = u->type;
- du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
-- du->capabilities = DRM_XE_OA_CAPS_BASE;
-+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
-
- j = 0;
- for_each_hw_engine(hwe, gt, hwe_id) {
-diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
-index c4182e95a619..4a8a4a63e99c 100644
---- a/include/uapi/drm/xe_drm.h
-+++ b/include/uapi/drm/xe_drm.h
-@@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit {
- /** @capabilities: OA capabilities bit-mask */
- __u64 capabilities;
- #define DRM_XE_OA_CAPS_BASE (1 << 0)
-+#define DRM_XE_OA_CAPS_SYNCS (1 << 1)
-
- /** @oa_timestamp_freq: OA timestamp freq */
- __u64 oa_timestamp_freq;
-@@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id {
- * to be disabled for the stream exec queue.
- */
- DRM_XE_OA_PROPERTY_NO_PREEMPT,
-+
-+ /**
-+ * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array
-+ * specified in @DRM_XE_OA_PROPERTY_SYNCS
-+ */
-+ DRM_XE_OA_PROPERTY_NUM_SYNCS,
-+
-+ /**
-+ * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array
-+ * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA
-+ * configuration will wait till input fences signal. Output fences
-+ * will signal after the new OA configuration takes effect. For
-+ * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar
-+ * to the VM bind case.
-+ */
-+ DRM_XE_OA_PROPERTY_SYNCS,
- };
-
- /**
---
-2.39.5
-
block-bfq-fix-waker_bfqq-uaf-after-bfq_split_bfqq.patch
arm64-dts-rockchip-add-hevc-power-domain-clock-to-rk.patch
firewall-remove-misplaced-semicolon-from-stm32_firew.patch
-drm-xe-oa-separate-batch-submission-from-waiting-for.patch
-drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch
-drm-xe-oa-add-input-fence-dependencies.patch
-xe-oa-fix-query-mode-of-operation-for-oar-oac.patch
drm-mediatek-only-touch-disp_reg_ovl_pitch_msb-if-af.patch
io_uring-don-t-touch-sqd-thread-off-tw-add.patch
iio-imu-inv_icm42600-fix-spi-burst-write-not-supported.patch
+++ /dev/null
-From a65d438e587efaac9af626908a555e536361984b Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 20 Dec 2024 09:19:18 -0800
-Subject: xe/oa: Fix query mode of operation for OAR/OAC
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
-
-[ Upstream commit f0ed39830e6064d62f9c5393505677a26569bb56 ]
-
-This is a set of squashed commits to facilitate smooth applying to
-stable. Each commit message is retained for reference.
-
-1) Allow a GGTT mapped batch to be submitted to user exec queue
-
-For a OA use case, one of the HW registers needs to be modified by
-submitting an MI_LOAD_REGISTER_IMM command to the users exec queue, so
-that the register is modified in the user's hardware context. In order
-to do this a batch that is mapped in GGTT, needs to be submitted to the
-user exec queue. Since all user submissions use q->vm and hence PPGTT,
-add some plumbing to enable submission of batches mapped in GGTT.
-
-v2: ggtt is zero-initialized, so no need to set it false (Matt Brost)
-
-2) xe/oa: Use MI_LOAD_REGISTER_IMMEDIATE to enable OAR/OAC
-
-To enable OAR/OAC, a bit in RING_CONTEXT_CONTROL needs to be set.
-Setting this bit cause the context image size to change and if not done
-correct, can cause undesired hangs.
-
-Current code uses a separate exec_queue to modify this bit and is
-error-prone. As per HW recommendation, submit MI_LOAD_REGISTER_IMM to
-the target hardware context to modify the relevant bit.
-
-In v2 version, an attempt to submit everything to the user-queue was
-made, but it failed the unprivileged-single-ctx-counters test. It
-appears that the OACTXCONTROL must be modified from a remote context.
-
-In v3 version, all context specific register configurations were moved
-to use LOAD_REGISTER_IMMEDIATE and that seems to work well. This is a
-cleaner way, since we can now submit all configuration to user
-exec_queue and the fence handling is simplified.
-
-v2:
-(Matt)
-- set job->ggtt to true if create job is successful
-- unlock vm on job error
-
-(Ashutosh)
-- don't wait on job submission
-- use kernel exec queue where possible
-
-v3:
-(Ashutosh)
-- Fix checkpatch issues
-- Remove extra spaces/new-lines
-- Add Fixes: and Cc: tags
-- Reset context control bit when OA stream is closed
-- Submit all config via MI_LOAD_REGISTER_IMMEDIATE
-
-(Umesh)
-- Update commit message for v3 experiment
-- Squash patches for easier port to stable
-
-v4:
-(Ashutosh)
-- No need to pass q to xe_oa_submit_bb
-- Do not support exec queues with width > 1
-- Fix disabling of CTX_CTRL_OAC_CONTEXT_ENABLE
-
-v5:
-(Ashutosh)
-- Drop reg_lri related comments
-- Use XE_OA_SUBMIT_NO_DEPS in xe_oa_load_with_lri
-
-Fixes: 8135f1c09dd2 ("drm/xe/oa: Don't reset OAC_CONTEXT_ENABLE on OA stream close")
-Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
-Reviewed-by: Matthew Brost <matthew.brost@intel.com> # commit 1
-Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Cc: stable@vger.kernel.org
-Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
-Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
-Link: https://patchwork.freedesktop.org/patch/msgid/20241220171919.571528-2-umesh.nerlige.ramappa@intel.com
-(cherry picked from commit 55039832f98c7e05f1cf9e0d8c12b2490abd0f16)
-Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/gpu/drm/xe/xe_oa.c | 134 ++++++++----------------
- drivers/gpu/drm/xe/xe_ring_ops.c | 5 +-
- drivers/gpu/drm/xe/xe_sched_job_types.h | 2 +
- 3 files changed, 51 insertions(+), 90 deletions(-)
-
-diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
-index fd14d62bfb54..d81f0b05b2df 100644
---- a/drivers/gpu/drm/xe/xe_oa.c
-+++ b/drivers/gpu/drm/xe/xe_oa.c
-@@ -69,12 +69,6 @@ struct xe_oa_config {
- struct rcu_head rcu;
- };
-
--struct flex {
-- struct xe_reg reg;
-- u32 offset;
-- u32 value;
--};
--
- struct xe_oa_open_param {
- struct xe_file *xef;
- u32 oa_unit_id;
-@@ -577,19 +571,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
- return ret;
- }
-
-+static void xe_oa_lock_vma(struct xe_exec_queue *q)
-+{
-+ if (q->vm) {
-+ down_read(&q->vm->lock);
-+ xe_vm_lock(q->vm, false);
-+ }
-+}
-+
-+static void xe_oa_unlock_vma(struct xe_exec_queue *q)
-+{
-+ if (q->vm) {
-+ xe_vm_unlock(q->vm);
-+ up_read(&q->vm->lock);
-+ }
-+}
-+
- static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
- struct xe_bb *bb)
- {
-+ struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q;
- struct xe_sched_job *job;
- struct dma_fence *fence;
- int err = 0;
-
-- /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
-- job = xe_bb_create_job(stream->k_exec_q, bb);
-+ xe_oa_lock_vma(q);
-+
-+ job = xe_bb_create_job(q, bb);
- if (IS_ERR(job)) {
- err = PTR_ERR(job);
- goto exit;
- }
-+ job->ggtt = true;
-
- if (deps == XE_OA_SUBMIT_ADD_DEPS) {
- for (int i = 0; i < stream->num_syncs && !err; i++)
-@@ -604,10 +617,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa
- fence = dma_fence_get(&job->drm.s_fence->finished);
- xe_sched_job_push(job);
-
-+ xe_oa_unlock_vma(q);
-+
- return fence;
- err_put_job:
- xe_sched_job_put(job);
- exit:
-+ xe_oa_unlock_vma(q);
- return ERR_PTR(err);
- }
-
-@@ -655,63 +671,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
- free_oa_config_bo(oa_bo);
- }
-
--static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
-- struct xe_bb *bb, const struct flex *flex, u32 count)
--{
-- u32 offset = xe_bo_ggtt_addr(lrc->bo);
--
-- do {
-- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
-- bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
-- bb->cs[bb->len++] = 0;
-- bb->cs[bb->len++] = flex->value;
--
-- } while (flex++, --count);
--}
--
--static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
-- const struct flex *flex, u32 count)
--{
-- struct dma_fence *fence;
-- struct xe_bb *bb;
-- int err;
--
-- bb = xe_bb_new(stream->gt, 4 * count, false);
-- if (IS_ERR(bb)) {
-- err = PTR_ERR(bb);
-- goto exit;
-- }
--
-- xe_oa_store_flex(stream, lrc, bb, flex, count);
--
-- fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
-- if (IS_ERR(fence)) {
-- err = PTR_ERR(fence);
-- goto free_bb;
-- }
-- xe_bb_free(bb, fence);
-- dma_fence_put(fence);
--
-- return 0;
--free_bb:
-- xe_bb_free(bb, NULL);
--exit:
-- return err;
--}
--
--static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
-+static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count)
- {
- struct dma_fence *fence;
- struct xe_bb *bb;
- int err;
-
-- bb = xe_bb_new(stream->gt, 3, false);
-+ bb = xe_bb_new(stream->gt, 2 * count + 1, false);
- if (IS_ERR(bb)) {
- err = PTR_ERR(bb);
- goto exit;
- }
-
-- write_cs_mi_lri(bb, reg_lri, 1);
-+ write_cs_mi_lri(bb, reg_lri, count);
-
- fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
- if (IS_ERR(fence)) {
-@@ -731,70 +703,54 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
- static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
- {
- const struct xe_oa_format *format = stream->oa_buffer.format;
-- struct xe_lrc *lrc = stream->exec_q->lrc[0];
-- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
- u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
- (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
-
-- struct flex regs_context[] = {
-+ struct xe_oa_reg reg_lri[] = {
- {
- OACTXCONTROL(stream->hwe->mmio_base),
-- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
- enable ? OA_COUNTER_RESUME : 0,
- },
-+ {
-+ OAR_OACONTROL,
-+ oacontrol,
-+ },
- {
- RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
-- regs_offset + CTX_CONTEXT_CONTROL,
-- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
-+ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
-+ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
- },
- };
-- struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
-- int err;
--
-- /* Modify stream hwe context image with regs_context */
-- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
-- regs_context, ARRAY_SIZE(regs_context));
-- if (err)
-- return err;
-
-- /* Apply reg_lri using LRI */
-- return xe_oa_load_with_lri(stream, ®_lri);
-+ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
- }
-
- static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
- {
- const struct xe_oa_format *format = stream->oa_buffer.format;
-- struct xe_lrc *lrc = stream->exec_q->lrc[0];
-- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
- u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
- (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
-- struct flex regs_context[] = {
-+ struct xe_oa_reg reg_lri[] = {
- {
- OACTXCONTROL(stream->hwe->mmio_base),
-- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
- enable ? OA_COUNTER_RESUME : 0,
- },
-+ {
-+ OAC_OACONTROL,
-+ oacontrol
-+ },
- {
- RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
-- regs_offset + CTX_CONTEXT_CONTROL,
-- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
-+ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
-+ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
- _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
- },
- };
-- struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
-- int err;
-
- /* Set ccs select to enable programming of OAC_OACONTROL */
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
-
-- /* Modify stream hwe context image with regs_context */
-- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
-- regs_context, ARRAY_SIZE(regs_context));
-- if (err)
-- return err;
--
-- /* Apply reg_lri using LRI */
-- return xe_oa_load_with_lri(stream, ®_lri);
-+ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
- }
-
- static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
-@@ -1933,8 +1889,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
- if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
- return -ENOENT;
-
-- if (param.exec_q->width > 1)
-- drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n");
-+ if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1))
-+ return -EOPNOTSUPP;
- }
-
- /*
-diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
-index 0be4f489d3e1..9f327f27c072 100644
---- a/drivers/gpu/drm/xe/xe_ring_ops.c
-+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
-@@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
-
- static u32 get_ppgtt_flag(struct xe_sched_job *job)
- {
-- return job->q->vm ? BIT(8) : 0;
-+ if (job->q->vm && !job->ggtt)
-+ return BIT(8);
-+
-+ return 0;
- }
-
- static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
-diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
-index 0d3f76fb05ce..c207361bf43e 100644
---- a/drivers/gpu/drm/xe/xe_sched_job_types.h
-+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
-@@ -57,6 +57,8 @@ struct xe_sched_job {
- u32 migrate_flush_flags;
- /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
- bool ring_ops_flush_tlb;
-+ /** @ggtt: mapped in ggtt. */
-+ bool ggtt;
- /** @ptrs: per instance pointers. */
- struct xe_job_ptrs ptrs[];
- };
---
-2.39.5
-