From: Greg Kroah-Hartman Date: Fri, 17 Jan 2025 12:21:19 +0000 (+0100) Subject: drop some drm patches from 6.12 X-Git-Tag: v6.1.125~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d479a835bbd294b20a0b5a41f5b5b0f033a8ecbd;p=thirdparty%2Fkernel%2Fstable-queue.git drop some drm patches from 6.12 --- diff --git a/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch b/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch deleted file mode 100644 index 778f606792..0000000000 --- a/queue-6.12/drm-xe-oa-add-input-fence-dependencies.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 1cbfdf7ff79d7d72299f37f928d319e8b081e99b Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 22 Oct 2024 13:03:48 -0700 -Subject: drm/xe/oa: Add input fence dependencies - -From: Ashutosh Dixit - -[ Upstream commit 2fb4350a283af03a5ee34ba765783a941f942b82 ] - -Add input fence dependencies which will make OA configuration wait till -these dependencies are met (till input fences signal). - -v2: Change add_deps arg to xe_oa_submit_bb from bool to enum (Matt Brost) - -Reviewed-by: Jonathan Cavitt -Signed-off-by: Ashutosh Dixit -Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-4-ashutosh.dixit@intel.com -Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") -Signed-off-by: Sasha Levin ---- - drivers/gpu/drm/xe/xe_oa.c | 25 +++++++++++++++++++++---- - 1 file changed, 21 insertions(+), 4 deletions(-) - -diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c -index 94c558d949e1..fd14d62bfb54 100644 ---- a/drivers/gpu/drm/xe/xe_oa.c -+++ b/drivers/gpu/drm/xe/xe_oa.c -@@ -42,6 +42,11 @@ - #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) - #define XE_OA_UNIT_INVALID U32_MAX - -+enum xe_oa_submit_deps { -+ XE_OA_SUBMIT_NO_DEPS, -+ XE_OA_SUBMIT_ADD_DEPS, -+}; -+ - struct xe_oa_reg { - struct xe_reg addr; - u32 value; -@@ -572,7 +577,8 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) - return ret; - } - --static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) -+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, -+ struct xe_bb *bb) - { - struct xe_sched_job *job; - struct dma_fence *fence; -@@ -585,11 +591,22 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_ - goto exit; - } - -+ if (deps == XE_OA_SUBMIT_ADD_DEPS) { -+ for (int i = 0; i < stream->num_syncs && !err; i++) -+ err = xe_sync_entry_add_deps(&stream->syncs[i], job); -+ if (err) { -+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); -+ goto err_put_job; -+ } -+ } -+ - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - return fence; -+err_put_job: -+ xe_sched_job_put(job); - exit: - return ERR_PTR(err); - } -@@ -667,7 +684,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr - - xe_oa_store_flex(stream, lrc, bb, flex, count); - -- fence = xe_oa_submit_bb(stream, bb); -+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_bb; -@@ -696,7 +713,7 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re - - write_cs_mi_lri(bb, reg_lri, 1); - -- fence = xe_oa_submit_bb(stream, bb); -+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_bb; -@@ -944,7 +961,7 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config - goto exit; - } - -- fence = xe_oa_submit_bb(stream, oa_bo->bb); -+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto exit; --- -2.39.5 - diff --git a/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch b/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch deleted file mode 100644 index 5c58c38348..0000000000 --- a/queue-6.12/drm-xe-oa-separate-batch-submission-from-waiting-for.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 9aeced687e728b9de067a502a0780f8029e61763 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 22 Oct 2024 13:03:46 -0700 -Subject: drm/xe/oa: Separate batch submission from waiting for completion - -From: Ashutosh Dixit - -[ Upstream commit dddcb19ad4d4bbe943a72a1fb3266c6e8aa8d541 ] - -When we introduce xe_syncs, we don't wait for internal OA programming -batches to complete. That is, xe_syncs are signaled asynchronously. In -anticipation for this, separate out batch submission from waiting for -completion of those batches. - -v2: Change return type of xe_oa_submit_bb to "struct dma_fence *" (Matt B) -v3: Retain init "int err = 0;" in xe_oa_submit_bb (Jose) - -Reviewed-by: Jonathan Cavitt -Signed-off-by: Ashutosh Dixit -Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-2-ashutosh.dixit@intel.com -Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") -Signed-off-by: Sasha Levin ---- - drivers/gpu/drm/xe/xe_oa.c | 57 +++++++++++++++++++++++++++++--------- - 1 file changed, 44 insertions(+), 13 deletions(-) - -diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c -index 78823f53d290..4962c9eb9a81 100644 ---- a/drivers/gpu/drm/xe/xe_oa.c -+++ b/drivers/gpu/drm/xe/xe_oa.c -@@ -567,11 +567,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) - return ret; - } - --static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) -+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) - { - struct xe_sched_job *job; - struct dma_fence *fence; -- long timeout; - int err = 0; - - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ -@@ -585,14 +584,9 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - -- timeout = dma_fence_wait_timeout(fence, false, HZ); -- dma_fence_put(fence); -- if (timeout < 0) -- err = timeout; -- else if (!timeout) -- err = -ETIME; -+ return fence; - exit: -- return err; -+ return ERR_PTR(err); - } - - static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) -@@ -656,6 +650,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) - { -+ struct dma_fence *fence; - struct xe_bb *bb; - int err; - -@@ -667,7 +662,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr - - xe_oa_store_flex(stream, lrc, bb, flex, count); - -- err = xe_oa_submit_bb(stream, bb); -+ fence = xe_oa_submit_bb(stream, bb); -+ if (IS_ERR(fence)) { -+ err = PTR_ERR(fence); -+ goto free_bb; -+ } -+ xe_bb_free(bb, fence); -+ dma_fence_put(fence); -+ -+ return 0; -+free_bb: - xe_bb_free(bb, NULL); - exit: - return err; -@@ -675,6 +679,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr - - static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) - { -+ struct dma_fence *fence; - struct xe_bb *bb; - int err; - -@@ -686,7 +691,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re - - write_cs_mi_lri(bb, reg_lri, 1); - -- err = xe_oa_submit_bb(stream, bb); -+ fence = xe_oa_submit_bb(stream, bb); -+ if (IS_ERR(fence)) { -+ err = PTR_ERR(fence); -+ goto free_bb; -+ } -+ xe_bb_free(bb, fence); -+ dma_fence_put(fence); -+ -+ return 0; -+free_bb: - xe_bb_free(bb, NULL); - exit: - return err; -@@ -914,15 +928,32 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config - { - #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 - struct xe_oa_config_bo *oa_bo; -- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; -+ int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; -+ struct dma_fence *fence; -+ long timeout; - -+ /* Emit OA configuration batch */ - oa_bo = xe_oa_alloc_config_buffer(stream, config); - if (IS_ERR(oa_bo)) { - err = PTR_ERR(oa_bo); - goto exit; - } - -- err = xe_oa_submit_bb(stream, oa_bo->bb); -+ fence = xe_oa_submit_bb(stream, oa_bo->bb); -+ if (IS_ERR(fence)) { -+ err = PTR_ERR(fence); -+ goto exit; -+ } -+ -+ /* Wait till all previous batches have executed */ -+ timeout = dma_fence_wait_timeout(fence, false, 5 * HZ); -+ dma_fence_put(fence); -+ if (timeout < 0) -+ err = timeout; -+ else if (!timeout) -+ err = -ETIME; -+ if (err) -+ drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err); - - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); --- -2.39.5 - diff --git a/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch b/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch deleted file mode 100644 index 15c144f932..0000000000 --- a/queue-6.12/drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch +++ /dev/null @@ -1,253 +0,0 @@ -From 756233c8ca6cada8855f9f98aeadce3a60799ab3 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 22 Oct 2024 13:03:47 -0700 -Subject: drm/xe/oa/uapi: Define and parse OA sync properties -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From: Ashutosh Dixit - -[ Upstream commit c8507a25cebd179db935dd266a33c51bef1b1e80 ] - -Now that we have laid the groundwork, introduce OA sync properties in the -uapi and parse the input xe_sync array as is done elsewhere in the -driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace. - -v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B) - Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose) - -Acked-by: José Roberto de Souza -Reviewed-by: Jonathan Cavitt -Signed-off-by: Ashutosh Dixit -Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com -Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") -Signed-off-by: Sasha Levin ---- - drivers/gpu/drm/xe/xe_oa.c | 83 +++++++++++++++++++++++++++++++- - drivers/gpu/drm/xe/xe_oa_types.h | 6 +++ - drivers/gpu/drm/xe/xe_query.c | 2 +- - include/uapi/drm/xe_drm.h | 17 +++++++ - 4 files changed, 106 insertions(+), 2 deletions(-) - -diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c -index 4962c9eb9a81..94c558d949e1 100644 ---- a/drivers/gpu/drm/xe/xe_oa.c -+++ b/drivers/gpu/drm/xe/xe_oa.c -@@ -36,6 +36,7 @@ - #include "xe_pm.h" - #include "xe_sched_job.h" - #include "xe_sriov.h" -+#include "xe_sync.h" - - #define DEFAULT_POLL_FREQUENCY_HZ 200 - #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) -@@ -70,6 +71,7 @@ struct flex { - }; - - struct xe_oa_open_param { -+ struct xe_file *xef; - u32 oa_unit_id; - bool sample; - u32 metric_set; -@@ -81,6 +83,9 @@ struct xe_oa_open_param { - struct xe_exec_queue *exec_q; - struct xe_hw_engine *hwe; - bool no_preempt; -+ struct drm_xe_sync __user *syncs_user; -+ int num_syncs; -+ struct xe_sync_entry *syncs; - }; - - struct xe_oa_config_bo { -@@ -1393,6 +1398,9 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, - stream->period_exponent = param->period_exponent; - stream->no_preempt = param->no_preempt; - -+ stream->num_syncs = param->num_syncs; -+ stream->syncs = param->syncs; -+ - /* - * For Xe2+, when overrun mode is enabled, there are no partial reports at the end - * of buffer, making the OA buffer effectively a non-power-of-2 size circular -@@ -1743,6 +1751,20 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - return 0; - } - -+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, -+ struct xe_oa_open_param *param) -+{ -+ param->num_syncs = value; -+ return 0; -+} -+ -+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, -+ struct xe_oa_open_param *param) -+{ -+ param->syncs_user = u64_to_user_ptr(value); -+ return 0; -+} -+ - typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); - static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { -@@ -1755,6 +1777,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, -+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, - }; - - static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, -@@ -1814,6 +1838,49 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number - return 0; - } - -+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) -+{ -+ int ret, num_syncs, num_ufence = 0; -+ -+ if (param->num_syncs && !param->syncs_user) { -+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ if (param->num_syncs) { -+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); -+ if (!param->syncs) { -+ ret = -ENOMEM; -+ goto exit; -+ } -+ } -+ -+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { -+ ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], -+ ¶m->syncs_user[num_syncs], 0); -+ if (ret) -+ goto err_syncs; -+ -+ if (xe_sync_is_ufence(¶m->syncs[num_syncs])) -+ num_ufence++; -+ } -+ -+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { -+ ret = -EINVAL; -+ goto err_syncs; -+ } -+ -+ return 0; -+ -+err_syncs: -+ while (num_syncs--) -+ xe_sync_entry_cleanup(¶m->syncs[num_syncs]); -+ kfree(param->syncs); -+exit: -+ return ret; -+} -+ - /** - * xe_oa_stream_open_ioctl - Opens an OA stream - * @dev: @drm_device -@@ -1839,6 +1906,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f - return -ENODEV; - } - -+ param.xef = xef; - ret = xe_oa_user_extensions(oa, data, 0, ¶m); - if (ret) - return ret; -@@ -1907,11 +1975,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f - drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); - } - -+ ret = xe_oa_parse_syncs(oa, ¶m); -+ if (ret) -+ goto err_exec_q; -+ - mutex_lock(¶m.hwe->gt->oa.gt_lock); - ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); - mutex_unlock(¶m.hwe->gt->oa.gt_lock); -+ if (ret < 0) -+ goto err_sync_cleanup; -+ -+ return ret; -+ -+err_sync_cleanup: -+ while (param.num_syncs--) -+ xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); -+ kfree(param.syncs); - err_exec_q: -- if (ret < 0 && param.exec_q) -+ if (param.exec_q) - xe_exec_queue_put(param.exec_q); - return ret; - } -diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h -index 8862eca73fbe..99f4b2d4bdcf 100644 ---- a/drivers/gpu/drm/xe/xe_oa_types.h -+++ b/drivers/gpu/drm/xe/xe_oa_types.h -@@ -238,5 +238,11 @@ struct xe_oa_stream { - - /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ - u32 no_preempt; -+ -+ /** @num_syncs: size of @syncs array */ -+ u32 num_syncs; -+ -+ /** @syncs: syncs to wait on and to signal */ -+ struct xe_sync_entry *syncs; - }; - #endif -diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c -index 1c96375bd7df..6fec5d1a1eb4 100644 ---- a/drivers/gpu/drm/xe/xe_query.c -+++ b/drivers/gpu/drm/xe/xe_query.c -@@ -679,7 +679,7 @@ static int query_oa_units(struct xe_device *xe, - du->oa_unit_id = u->oa_unit_id; - du->oa_unit_type = u->type; - du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); -- du->capabilities = DRM_XE_OA_CAPS_BASE; -+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; - - j = 0; - for_each_hw_engine(hwe, gt, hwe_id) { -diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h -index c4182e95a619..4a8a4a63e99c 100644 ---- a/include/uapi/drm/xe_drm.h -+++ b/include/uapi/drm/xe_drm.h -@@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit { - /** @capabilities: OA capabilities bit-mask */ - __u64 capabilities; - #define DRM_XE_OA_CAPS_BASE (1 << 0) -+#define DRM_XE_OA_CAPS_SYNCS (1 << 1) - - /** @oa_timestamp_freq: OA timestamp freq */ - __u64 oa_timestamp_freq; -@@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id { - * to be disabled for the stream exec queue. - */ - DRM_XE_OA_PROPERTY_NO_PREEMPT, -+ -+ /** -+ * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array -+ * specified in @DRM_XE_OA_PROPERTY_SYNCS -+ */ -+ DRM_XE_OA_PROPERTY_NUM_SYNCS, -+ -+ /** -+ * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array -+ * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA -+ * configuration will wait till input fences signal. Output fences -+ * will signal after the new OA configuration takes effect. For -+ * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar -+ * to the VM bind case. -+ */ -+ DRM_XE_OA_PROPERTY_SYNCS, - }; - - /** --- -2.39.5 - diff --git a/queue-6.12/series b/queue-6.12/series index bd82ee782c..d91201f4fe 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -179,10 +179,6 @@ hwmon-drivetemp-fix-driver-producing-garbage-data-wh.patch block-bfq-fix-waker_bfqq-uaf-after-bfq_split_bfqq.patch arm64-dts-rockchip-add-hevc-power-domain-clock-to-rk.patch firewall-remove-misplaced-semicolon-from-stm32_firew.patch -drm-xe-oa-separate-batch-submission-from-waiting-for.patch -drm-xe-oa-uapi-define-and-parse-oa-sync-properties.patch -drm-xe-oa-add-input-fence-dependencies.patch -xe-oa-fix-query-mode-of-operation-for-oar-oac.patch drm-mediatek-only-touch-disp_reg_ovl_pitch_msb-if-af.patch io_uring-don-t-touch-sqd-thread-off-tw-add.patch iio-imu-inv_icm42600-fix-spi-burst-write-not-supported.patch diff --git a/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch b/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch deleted file mode 100644 index f0104968fe..0000000000 --- a/queue-6.12/xe-oa-fix-query-mode-of-operation-for-oar-oac.patch +++ /dev/null @@ -1,363 +0,0 @@ -From a65d438e587efaac9af626908a555e536361984b Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 20 Dec 2024 09:19:18 -0800 -Subject: xe/oa: Fix query mode of operation for OAR/OAC -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From: Umesh Nerlige Ramappa - -[ Upstream commit f0ed39830e6064d62f9c5393505677a26569bb56 ] - -This is a set of squashed commits to facilitate smooth applying to -stable. Each commit message is retained for reference. - -1) Allow a GGTT mapped batch to be submitted to user exec queue - -For a OA use case, one of the HW registers needs to be modified by -submitting an MI_LOAD_REGISTER_IMM command to the users exec queue, so -that the register is modified in the user's hardware context. In order -to do this a batch that is mapped in GGTT, needs to be submitted to the -user exec queue. Since all user submissions use q->vm and hence PPGTT, -add some plumbing to enable submission of batches mapped in GGTT. - -v2: ggtt is zero-initialized, so no need to set it false (Matt Brost) - -2) xe/oa: Use MI_LOAD_REGISTER_IMMEDIATE to enable OAR/OAC - -To enable OAR/OAC, a bit in RING_CONTEXT_CONTROL needs to be set. -Setting this bit cause the context image size to change and if not done -correct, can cause undesired hangs. - -Current code uses a separate exec_queue to modify this bit and is -error-prone. As per HW recommendation, submit MI_LOAD_REGISTER_IMM to -the target hardware context to modify the relevant bit. - -In v2 version, an attempt to submit everything to the user-queue was -made, but it failed the unprivileged-single-ctx-counters test. It -appears that the OACTXCONTROL must be modified from a remote context. - -In v3 version, all context specific register configurations were moved -to use LOAD_REGISTER_IMMEDIATE and that seems to work well. This is a -cleaner way, since we can now submit all configuration to user -exec_queue and the fence handling is simplified. - -v2: -(Matt) -- set job->ggtt to true if create job is successful -- unlock vm on job error - -(Ashutosh) -- don't wait on job submission -- use kernel exec queue where possible - -v3: -(Ashutosh) -- Fix checkpatch issues -- Remove extra spaces/new-lines -- Add Fixes: and Cc: tags -- Reset context control bit when OA stream is closed -- Submit all config via MI_LOAD_REGISTER_IMMEDIATE - -(Umesh) -- Update commit message for v3 experiment -- Squash patches for easier port to stable - -v4: -(Ashutosh) -- No need to pass q to xe_oa_submit_bb -- Do not support exec queues with width > 1 -- Fix disabling of CTX_CTRL_OAC_CONTEXT_ENABLE - -v5: -(Ashutosh) -- Drop reg_lri related comments -- Use XE_OA_SUBMIT_NO_DEPS in xe_oa_load_with_lri - -Fixes: 8135f1c09dd2 ("drm/xe/oa: Don't reset OAC_CONTEXT_ENABLE on OA stream close") -Signed-off-by: Umesh Nerlige Ramappa -Reviewed-by: Matthew Brost # commit 1 -Reviewed-by: Ashutosh Dixit -Cc: stable@vger.kernel.org -Reviewed-by: Jonathan Cavitt -Signed-off-by: Ashutosh Dixit -Link: https://patchwork.freedesktop.org/patch/msgid/20241220171919.571528-2-umesh.nerlige.ramappa@intel.com -(cherry picked from commit 55039832f98c7e05f1cf9e0d8c12b2490abd0f16) -Signed-off-by: Thomas Hellström -Signed-off-by: Sasha Levin ---- - drivers/gpu/drm/xe/xe_oa.c | 134 ++++++++---------------- - drivers/gpu/drm/xe/xe_ring_ops.c | 5 +- - drivers/gpu/drm/xe/xe_sched_job_types.h | 2 + - 3 files changed, 51 insertions(+), 90 deletions(-) - -diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c -index fd14d62bfb54..d81f0b05b2df 100644 ---- a/drivers/gpu/drm/xe/xe_oa.c -+++ b/drivers/gpu/drm/xe/xe_oa.c -@@ -69,12 +69,6 @@ struct xe_oa_config { - struct rcu_head rcu; - }; - --struct flex { -- struct xe_reg reg; -- u32 offset; -- u32 value; --}; -- - struct xe_oa_open_param { - struct xe_file *xef; - u32 oa_unit_id; -@@ -577,19 +571,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) - return ret; - } - -+static void xe_oa_lock_vma(struct xe_exec_queue *q) -+{ -+ if (q->vm) { -+ down_read(&q->vm->lock); -+ xe_vm_lock(q->vm, false); -+ } -+} -+ -+static void xe_oa_unlock_vma(struct xe_exec_queue *q) -+{ -+ if (q->vm) { -+ xe_vm_unlock(q->vm); -+ up_read(&q->vm->lock); -+ } -+} -+ - static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, - struct xe_bb *bb) - { -+ struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; - struct xe_sched_job *job; - struct dma_fence *fence; - int err = 0; - -- /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ -- job = xe_bb_create_job(stream->k_exec_q, bb); -+ xe_oa_lock_vma(q); -+ -+ job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - err = PTR_ERR(job); - goto exit; - } -+ job->ggtt = true; - - if (deps == XE_OA_SUBMIT_ADD_DEPS) { - for (int i = 0; i < stream->num_syncs && !err; i++) -@@ -604,10 +617,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - -+ xe_oa_unlock_vma(q); -+ - return fence; - err_put_job: - xe_sched_job_put(job); - exit: -+ xe_oa_unlock_vma(q); - return ERR_PTR(err); - } - -@@ -655,63 +671,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) - free_oa_config_bo(oa_bo); - } - --static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, -- struct xe_bb *bb, const struct flex *flex, u32 count) --{ -- u32 offset = xe_bo_ggtt_addr(lrc->bo); -- -- do { -- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); -- bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); -- bb->cs[bb->len++] = 0; -- bb->cs[bb->len++] = flex->value; -- -- } while (flex++, --count); --} -- --static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, -- const struct flex *flex, u32 count) --{ -- struct dma_fence *fence; -- struct xe_bb *bb; -- int err; -- -- bb = xe_bb_new(stream->gt, 4 * count, false); -- if (IS_ERR(bb)) { -- err = PTR_ERR(bb); -- goto exit; -- } -- -- xe_oa_store_flex(stream, lrc, bb, flex, count); -- -- fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); -- if (IS_ERR(fence)) { -- err = PTR_ERR(fence); -- goto free_bb; -- } -- xe_bb_free(bb, fence); -- dma_fence_put(fence); -- -- return 0; --free_bb: -- xe_bb_free(bb, NULL); --exit: -- return err; --} -- --static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -+static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) - { - struct dma_fence *fence; - struct xe_bb *bb; - int err; - -- bb = xe_bb_new(stream->gt, 3, false); -+ bb = xe_bb_new(stream->gt, 2 * count + 1, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - -- write_cs_mi_lri(bb, reg_lri, 1); -+ write_cs_mi_lri(bb, reg_lri, count); - - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { -@@ -731,70 +703,54 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re - static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) - { - const struct xe_oa_format *format = stream->oa_buffer.format; -- struct xe_lrc *lrc = stream->exec_q->lrc[0]; -- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - -- struct flex regs_context[] = { -+ struct xe_oa_reg reg_lri[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), -- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, -+ { -+ OAR_OACONTROL, -+ oacontrol, -+ }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), -- regs_offset + CTX_CONTEXT_CONTROL, -- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), -+ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, -+ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) - }, - }; -- struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; -- int err; -- -- /* Modify stream hwe context image with regs_context */ -- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], -- regs_context, ARRAY_SIZE(regs_context)); -- if (err) -- return err; - -- /* Apply reg_lri using LRI */ -- return xe_oa_load_with_lri(stream, ®_lri); -+ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); - } - - static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) - { - const struct xe_oa_format *format = stream->oa_buffer.format; -- struct xe_lrc *lrc = stream->exec_q->lrc[0]; -- u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); -- struct flex regs_context[] = { -+ struct xe_oa_reg reg_lri[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), -- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, -+ { -+ OAC_OACONTROL, -+ oacontrol -+ }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), -- regs_offset + CTX_CONTEXT_CONTROL, -- _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | -+ _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, -+ enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), - }, - }; -- struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; -- int err; - - /* Set ccs select to enable programming of OAC_OACONTROL */ - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); - -- /* Modify stream hwe context image with regs_context */ -- err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], -- regs_context, ARRAY_SIZE(regs_context)); -- if (err) -- return err; -- -- /* Apply reg_lri using LRI */ -- return xe_oa_load_with_lri(stream, ®_lri); -+ return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); - } - - static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) -@@ -1933,8 +1889,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f - if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) - return -ENOENT; - -- if (param.exec_q->width > 1) -- drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); -+ if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) -+ return -EOPNOTSUPP; - } - - /* -diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c -index 0be4f489d3e1..9f327f27c072 100644 ---- a/drivers/gpu/drm/xe/xe_ring_ops.c -+++ b/drivers/gpu/drm/xe/xe_ring_ops.c -@@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, - - static u32 get_ppgtt_flag(struct xe_sched_job *job) - { -- return job->q->vm ? BIT(8) : 0; -+ if (job->q->vm && !job->ggtt) -+ return BIT(8); -+ -+ return 0; - } - - static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) -diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h -index 0d3f76fb05ce..c207361bf43e 100644 ---- a/drivers/gpu/drm/xe/xe_sched_job_types.h -+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h -@@ -57,6 +57,8 @@ struct xe_sched_job { - u32 migrate_flush_flags; - /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ - bool ring_ops_flush_tlb; -+ /** @ggtt: mapped in ggtt. */ -+ bool ggtt; - /** @ptrs: per instance pointers. */ - struct xe_job_ptrs ptrs[]; - }; --- -2.39.5 -