]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe: Move survivability entirely to xe_pci
authorLucas De Marchi <lucas.demarchi@intel.com>
Sat, 22 Feb 2025 00:10:48 +0000 (16:10 -0800)
committerLucas De Marchi <lucas.demarchi@intel.com>
Tue, 25 Feb 2025 22:32:03 +0000 (14:32 -0800)
There's an odd split between xe_pci.c and xe_device.c wrt
xe_survivability: it's initialized by xe_device, but then finalized by
xe_pci. Move it entirely to the outer layer, xe_pci, so it controls
the flow entirely.

This also allows to stop ignoring some of the errors. E.g.: if there's
an -ENOMEM, it shouldn't continue as if it survivability had been
enabled.

One change worth mentioning is that if "wait for lmem" fails, it will
also check the pcode status to decide if it should enter or not in
survivability mode, which it was not doing before. The bit from pcode
for that decision should remain the same after lmem failed
initialization, so it should be fine.

Cc: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250222001051.3012936-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
drivers/gpu/drm/xe/xe_device.c
drivers/gpu/drm/xe/xe_heci_gsc.c
drivers/gpu/drm/xe/xe_pci.c
drivers/gpu/drm/xe/xe_survivability_mode.c
drivers/gpu/drm/xe/xe_survivability_mode.h

index d50ac3d43511f4621670b634742b88f0a6da1466..ef269227b64b16526ee4c9171f31174c7b13507e 100644 (file)
@@ -53,7 +53,6 @@
 #include "xe_pxp.h"
 #include "xe_query.h"
 #include "xe_sriov.h"
-#include "xe_survivability_mode.h"
 #include "xe_tile.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
@@ -695,12 +694,8 @@ int xe_device_probe_early(struct xe_device *xe)
        update_device_info(xe);
 
        err = xe_pcode_probe_early(xe);
-       if (err) {
-               if (xe_survivability_mode_required(xe))
-                       xe_survivability_mode_init(xe);
-
+       if (err)
                return err;
-       }
 
        err = wait_for_lmem_ready(xe);
        if (err)
index 06dc78d3a8123b78867493262ce1892c3f7dc574..992ee47abcdb7963e685f93aa11f544c52df35b0 100644 (file)
@@ -201,7 +201,7 @@ void xe_heci_gsc_init(struct xe_device *xe)
                return;
        }
 
-       if (!def->use_polling && !xe_survivability_mode_enabled(xe)) {
+       if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
                ret = heci_gsc_irq_setup(xe);
                if (ret)
                        goto fail;
index a0f4bd45b61ba836faf39795b1af493c26789009..8b6658b214be04ef2d1cf0a5a43349a65e4023c0 100644 (file)
@@ -770,8 +770,8 @@ static void xe_pci_remove(struct pci_dev *pdev)
        if (IS_SRIOV_PF(xe))
                xe_pci_sriov_configure(pdev, 0);
 
-       if (xe_survivability_mode_enabled(xe))
-               return xe_survivability_mode_remove(xe);
+       if (xe_survivability_mode_is_enabled(xe))
+               return;
 
        xe_device_remove(xe);
        xe_pm_runtime_fini(xe);
@@ -846,13 +846,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        err = xe_device_probe_early(xe);
 
        /*
-        * In Boot Survivability mode, no drm card is exposed
-        * and driver is loaded with bare minimum to allow
-        * for firmware to be flashed through mei. Return
-        * success if survivability mode is enabled.
+        * In Boot Survivability mode, no drm card is exposed and driver is
+        * loaded with bare minimum to allow for firmware to be flashed through
+        * mei. If early probe fails, check if survivability mode is flagged by
+        * HW to be enabled. In that case enable it and return success.
         */
        if (err) {
-               if (xe_survivability_mode_enabled(xe))
+               if (xe_survivability_mode_required(xe) &&
+                   xe_survivability_mode_enable(xe))
                        return 0;
 
                return err;
@@ -946,7 +947,7 @@ static int xe_pci_suspend(struct device *dev)
        struct xe_device *xe = pdev_to_xe_device(pdev);
        int err;
 
-       if (xe_survivability_mode_enabled(xe))
+       if (xe_survivability_mode_is_enabled(xe))
                return -EBUSY;
 
        err = xe_pm_suspend(xe);
index 04a341606a7c503f6924bd13dd7186b2b768c61c..7ba02e085b5b1b53bf127bc3805b675d392e97eb 100644 (file)
@@ -127,40 +127,54 @@ static ssize_t survivability_mode_show(struct device *dev,
 
 static DEVICE_ATTR_ADMIN_RO(survivability_mode);
 
-static void enable_survivability_mode(struct pci_dev *pdev)
+static void xe_survivability_mode_fini(void *arg)
+{
+       struct xe_device *xe = arg;
+       struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+       struct device *dev = &pdev->dev;
+
+       sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+       xe_heci_gsc_fini(xe);
+}
+
+static int enable_survivability_mode(struct pci_dev *pdev)
 {
        struct device *dev = &pdev->dev;
        struct xe_device *xe = pdev_to_xe_device(pdev);
        struct xe_survivability *survivability = &xe->survivability;
        int ret = 0;
 
-       /* set survivability mode */
-       survivability->mode = true;
-       dev_info(dev, "In Survivability Mode\n");
-
        /* create survivability mode sysfs */
        ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
        if (ret) {
                dev_warn(dev, "Failed to create survivability sysfs files\n");
-               return;
+               return ret;
        }
 
+       ret = devm_add_action_or_reset(xe->drm.dev,
+                                      xe_survivability_mode_fini, xe);
+       if (ret)
+               return ret;
+
        xe_heci_gsc_init(xe);
 
        xe_vsec_init(xe);
+
+       survivability->mode = true;
+       dev_err(dev, "In Survivability Mode\n");
+
+       return 0;
 }
 
 /**
- * xe_survivability_mode_enabled - check if survivability mode is enabled
+ * xe_survivability_mode_is_enabled - check if survivability mode is enabled
  * @xe: xe device instance
  *
  * Returns true if in survivability mode, false otherwise
  */
-bool xe_survivability_mode_enabled(struct xe_device *xe)
+bool xe_survivability_mode_is_enabled(struct xe_device *xe)
 {
-       struct xe_survivability *survivability = &xe->survivability;
-
-       return survivability->mode;
+       return xe->survivability.mode;
 }
 
 /**
@@ -183,34 +197,19 @@ bool xe_survivability_mode_required(struct xe_device *xe)
        data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
        survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
 
-       return (survivability->boot_status == NON_CRITICAL_FAILURE ||
-               survivability->boot_status == CRITICAL_FAILURE);
+       return survivability->boot_status == NON_CRITICAL_FAILURE ||
+               survivability->boot_status == CRITICAL_FAILURE;
 }
 
 /**
- * xe_survivability_mode_remove - remove survivability mode
+ * xe_survivability_mode_enable - Initialize and enable the survivability mode
  * @xe: xe device instance
  *
- * clean up sysfs entries of survivability mode
- */
-void xe_survivability_mode_remove(struct xe_device *xe)
-{
-       struct xe_survivability *survivability = &xe->survivability;
-       struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-       struct device *dev = &pdev->dev;
-
-       sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
-       xe_heci_gsc_fini(xe);
-       kfree(survivability->info);
-}
-
-/**
- * xe_survivability_mode_init - Initialize the survivability mode
- * @xe: xe device instance
+ * Initialize survivability information and enable survivability mode
  *
- * Initializes survivability information and enables survivability mode
+ * Return: 0 for success, negative error code otherwise.
  */
-void xe_survivability_mode_init(struct xe_device *xe)
+int xe_survivability_mode_enable(struct xe_device *xe)
 {
        struct xe_survivability *survivability = &xe->survivability;
        struct xe_survivability_info *info;
@@ -218,9 +217,10 @@ void xe_survivability_mode_init(struct xe_device *xe)
 
        survivability->size = MAX_SCRATCH_MMIO;
 
-       info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL);
+       info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
+                           GFP_KERNEL);
        if (!info)
-               return;
+               return -ENOMEM;
 
        survivability->info = info;
 
@@ -229,9 +229,8 @@ void xe_survivability_mode_init(struct xe_device *xe)
        /* Only log debug information and exit if it is a critical failure */
        if (survivability->boot_status == CRITICAL_FAILURE) {
                log_survivability_info(pdev);
-               kfree(survivability->info);
-               return;
+               return -ENXIO;
        }
 
-       enable_survivability_mode(pdev);
+       return enable_survivability_mode(pdev);
 }
index f530507a22c6297ed71af861b7ef3f149216a5af..f4df5f9025ce81202185fd3dc4307e0508e4133f 100644 (file)
@@ -10,9 +10,8 @@
 
 struct xe_device;
 
-void xe_survivability_mode_init(struct xe_device *xe);
-void xe_survivability_mode_remove(struct xe_device *xe);
-bool xe_survivability_mode_enabled(struct xe_device *xe);
+int xe_survivability_mode_enable(struct xe_device *xe);
+bool xe_survivability_mode_is_enabled(struct xe_device *xe);
 bool xe_survivability_mode_required(struct xe_device *xe);
 
 #endif /* _XE_SURVIVABILITY_MODE_H_ */