From b7fa6016a2bb671306ec2cbd9b3134f903e5d83a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 18 Dec 2025 14:38:51 -0800 Subject: [PATCH] drm/xe/pf: Scheduler groups are incompatible with multi-lrc Since engines in the same class can be divided across multiple groups, the GuC does not allow scheduler groups to be active if there are multi-lrc contexts. This means that: 1) if a MLRC context is registered when we enable scheduler groups, the GuC will silently ignore the configuration 2) if a MLRC context is registered after scheduler groups are enabled, the GuC will disable the groups and generate an adverse event. The expectation is that the admin will ensure that all apps that use MLRC on PF have been terminated before scheduler groups are created. A check is added anyway to make sure we don't still have contexts waiting to be cleaned up laying around. A check is also added at queue creation time to block MLRC queue creation if scheduler groups have been enabled. Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20251218223846.1146344-19-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 16 +++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 17 +++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 8 +++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c | 28 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 21 ++++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.h | 2 ++ 7 files changed, 93 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 41023a4644807..c336dcd19020b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -17,6 +17,7 @@ #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" @@ -1108,6 +1109,14 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, return return_mask; } +static bool has_sched_groups(struct xe_gt *gt) +{ + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt)) + return true; + + return false; +} + int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -1200,6 +1209,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } + /* SRIOV sched groups are not compatible with multi-lrc */ + if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -EINVAL; + } + q = xe_exec_queue_create(xe, vm, logical_mask, args->width, hwe, flags, args->extensions); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 0d97a823e7021..fb5c9101e2756 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -284,3 +284,20 @@ int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) pf_flush_restart(gt); return 0; } + +/** + * xe_gt_sriov_pf_sched_groups_enabled - Check if multiple scheduler groups are + * enabled + * @gt: the &xe_gt + * + * This function is for PF use only. + * + * Return: true if shed groups were enabled, false otherwise. + */ +bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + return xe_gt_sriov_pf_policy_sched_groups_enabled(gt); +} + diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index e7fde3f9937af..1ccfc7137b988 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_SRIOV_PF_H_ #define _XE_GT_SRIOV_PF_H_ +#include + struct xe_gt; #ifdef CONFIG_PCI_IOV @@ -16,6 +18,7 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); +bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) { @@ -38,6 +41,11 @@ static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } + +static inline bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt) +{ + return false; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index cf6ead37bd642..c28606ca6623c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -16,6 +16,7 @@ #include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_klv_helpers.h" +#include "xe_guc_submit.h" #include "xe_pm.h" /* @@ -590,6 +591,19 @@ static int pf_provision_sched_groups(struct xe_gt *gt, enum xe_sriov_sched_group if (xe_sriov_pf_num_vfs(gt_to_xe(gt))) return -EBUSY; + /* + * The GuC silently ignores the setting if any MLRC contexts are + * registered. We expect the admin to make sure that all apps that use + * MLRC are terminated before scheduler groups are enabled, so this + * check is just to make sure that the exec_queue destruction has been + * completed. + */ + if (mode != XE_SRIOV_SCHED_GROUPS_DISABLED && + xe_guc_has_registered_mlrc_queues(>->uc.guc)) { + xe_gt_sriov_notice(gt, "can't enable sched groups with active MLRC queues\n"); + return -EPERM; + } + err = __pf_provision_sched_groups(gt, mode); if (err) return err; @@ -638,6 +652,20 @@ int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt, return pf_provision_sched_groups(gt, mode); } +/** + * xe_gt_sriov_pf_policy_sched_groups_enabled() - check whether the GT has + * multiple scheduler groups enabled + * @gt: the &xe_gt to check + * + * This function can only be called on PF. + * + * Return: true if the GT has multiple groups enabled, false otherwise. + */ +bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt) +{ + return gt->sriov.pf.policy.guc.sched_groups.current_mode != XE_SRIOV_SCHED_GROUPS_DISABLED; +} + static void pf_sanitize_guc_policies(struct xe_gt *gt) { pf_sanitize_sched_if_idle(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h index d6e96302ff68c..bd73aa58f9ca7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h @@ -25,6 +25,7 @@ bool xe_sriov_gt_pf_policy_has_sched_group_mode(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode); int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode); +bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt); void xe_gt_sriov_pf_policy_init(struct xe_gt *gt); void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1646535b86a34..7a4218f760241 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -3563,6 +3563,27 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) mutex_unlock(&guc->submission_state.lock); } +/** + * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues + * registered with the GuC + * @guc: GuC. + * + * Return: true if any MLRC queue is registered with the GuC, false otherwise. + */ +bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + guard(mutex)(&guc->submission_state.lock); + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (q->width > 1) + return true; + + return false; +} + /** * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all * exec queues registered to given GuC. diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 4d89b2975fe93..b3839a90c142b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -52,6 +52,8 @@ xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapsh void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); +bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc); + int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); #endif -- 2.47.3