From: Lucas De Marchi Date: Tue, 4 Nov 2025 22:20:51 +0000 (-0800) Subject: drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=83ccde67a3f73326e89dde66c0c775c932a30570;p=thirdparty%2Fkernel%2Flinux.git drm/xe/gt_throttle: Avoid TOCTOU when monitoring reasons It's currently not possible to safely monitor if there's throttling happening and what are the reasons. The approach of reading the status and then reading the reasons is not reliable as by the time sysadmin reads the reason, the throttling could not be happening anymore. Previous tentative to fix that[1] was breaking the ABI and potentially sysadmin's scripts. This takes a different approach of adding and documenting the additional attribute. It's still valuable, though redundant, to provide the simpler 0/1 interface. In order to avoid userspace knowledge on the bitmask meaning and to be able to maintain the kernel side in sync with possible changes in future, just walk the attribute group and check what are the masks that match the value read. [1] https://lore.kernel.org/intel-xe/20241025092238.167042-1-raag.jadav@intel.com/ Cc: Raag Jadav Cc: Rodrigo Vivi Reviewed-by: Raag Jadav Link: https://patch.msgid.link/20251104-gt-throttle-cri-v5-1-4948b060bbfd@intel.com Signed-off-by: Lucas De Marchi --- diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index fa7068aac3344..82c5fbcdfbe3e 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -22,9 +22,15 @@ * Their availability depend on the platform and some may not be visible if that * reason is not available. * + * The ``reasons`` attribute can be used by sysadmin to monitor all possible + * reasons for throttling and report them. It's preferred over monitoring + * ``status`` and then reading the reason from individual attributes since that + * is racy. If there's no throttling happening, "none" is returned. + * * The following attributes are available on Crescent Island platform: * - * - ``status``: Overall throttle status + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space * - ``reason_pl1``: package PL1 * - ``reason_pl2``: package PL2 * - ``reason_pl4``: package PL4 @@ -43,7 +49,8 @@ * * Other platforms support the following reasons: * - * - ``status``: Overall status + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) + * - ``reasons``: Array of reasons causing throttling separated by space * - ``reason_pl1``: package PL1 * - ``reason_pl2``: package PL2 * - ``reason_pl4``: package PL4, Iccmax etc. @@ -111,12 +118,57 @@ static ssize_t reason_show(struct kobject *kobj, return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask)); } +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe); + +static ssize_t reasons_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + struct xe_gt *gt = throttle_to_gt(kobj); + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group; + struct attribute **pother; + ssize_t ret = 0; + u32 reasons; + + reasons = xe_gt_throttle_get_limit_reasons(gt); + if (!reasons) + goto ret_none; + + group = get_platform_throttle_group(xe); + for (pother = group->attrs; *pother; pother++) { + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr); + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr); + + if (other_ta->mask != U32_MAX && reasons & other_ta->mask) + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name); + } + + if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons)) + goto ret_none; + + /* Drop extra space from last iteration above */ + ret--; + ret += sysfs_emit_at(buff, ret, "\n"); + + return ret; + +ret_none: + return sysfs_emit(buff, "none\n"); +} + #define THROTTLE_ATTR_RO(name, _mask) \ struct throttle_attribute attr_##name = { \ .attr = __ATTR(name, 0444, reason_show, NULL), \ .mask = _mask, \ } +#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, _show, NULL), \ + .mask = _mask, \ + } + +static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show); static THROTTLE_ATTR_RO(status, U32_MAX); static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK); static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK); @@ -128,6 +180,7 @@ static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK); static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK); static struct attribute *throttle_attrs[] = { + &attr_reasons.attr.attr, &attr_status.attr.attr, &attr_reason_pl1.attr.attr, &attr_reason_pl2.attr.attr, @@ -153,6 +206,7 @@ static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK); static struct attribute *cri_throttle_attrs[] = { /* Common */ + &attr_reasons.attr.attr, &attr_status.attr.attr, &attr_reason_pl1.attr.attr, &attr_reason_pl2.attr.attr,