#include "xe_pcode_api.h"
#include "xe_vsec.h"
-#define MAX_SCRATCH_MMIO 8
-
/**
* DOC: Survivability Mode
*
*
* Refer :ref:`xe_configfs` for more details on how to use configfs
*
- * Survivability mode is indicated by the below admin-only readable sysfs which provides additional
- * debug information::
+ * Survivability mode is indicated by the below admin-only readable sysfs entry. It
+ * provides information about the type of survivability mode (Boot/Runtime).
+ *
+ * .. code-block:: shell
+ *
+ * # cat /sys/bus/pci/devices/<device>/survivability_mode
+ * Boot
+ *
+ *
+ * Any additional debug information if present will be visible under the directory
+ * ``survivability_info``::
+ *
+ * /sys/bus/pci/devices/<device>/survivability_info/
+ * ├── aux_info0
+ * ├── aux_info1
+ * ├── aux_info2
+ * ├── aux_info3
+ * ├── aux_info4
+ * ├── capability_info
+ * ├── fdo_mode
+ * ├── postcode_trace
+ * └── postcode_trace_overflow
+ *
+ * This directory has the following attributes
*
- * /sys/bus/pci/devices/<device>/survivability_mode
+ * - ``capability_info`` : Indicates Boot status and support for additional information
*
- * Capability Information:
- * Provides boot status
- * Postcode Information:
- * Provides information about the failure
- * Overflow Information
- * Provides history of previous failures
- * Auxiliary Information
- * Certain failures may have information in addition to postcode information
+ * - ``postcode_trace``, ``postcode_trace_overflow`` : Each postcode is a 8bit value and
+ * represents a boot failure event. When a new failure event is logged by PCODE the
+ * existing postcodes are shifted left. These entries provide a history of 8 postcodes.
+ *
+ * - ``aux_info<n>`` : Some failures have additional debug information
*
* Runtime Survivability
* =====================
* Certain runtime firmware errors can cause the device to enter a wedged state
* (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation.
* Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and
- * is indicated by the presence of survivability mode sysfs::
+ * is indicated by the presence of survivability mode sysfs.
+ * Survivability mode sysfs provides information about the type of survivability mode.
*
- * /sys/bus/pci/devices/<device>/survivability_mode
+ * .. code-block:: shell
*
- * Survivability mode sysfs provides information about the type of survivability mode.
+ * # cat /sys/bus/pci/devices/<device>/survivability_mode
+ * Runtime
*
* When such errors occur, userspace is notified with the drm device wedged uevent and runtime
* survivability mode. User can then initiate a firmware flash using userspace tools like fwupd
* to restore device to normal operation.
*/
+static const char * const reg_map[] = {
+ [CAPABILITY_INFO] = "Capability Info",
+ [POSTCODE_TRACE] = "Postcode trace",
+ [POSTCODE_TRACE_OVERFLOW] = "Postcode trace overflow",
+ [AUX_INFO0] = "Auxiliary Info 0",
+ [AUX_INFO1] = "Auxiliary Info 1",
+ [AUX_INFO2] = "Auxiliary Info 2",
+ [AUX_INFO3] = "Auxiliary Info 3",
+ [AUX_INFO4] = "Auxiliary Info 4",
+};
+
+struct xe_survivability_attribute {
+ struct device_attribute attr;
+ u8 index;
+};
+
+static struct
+xe_survivability_attribute *dev_attr_to_survivability_attr(struct device_attribute *attr)
+{
+ return container_of(attr, struct xe_survivability_attribute, attr);
+}
+
static u32 aux_history_offset(u32 reg_value)
{
return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value);
}
-static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info,
- int id, char *name)
+static void set_survivability_info(struct xe_mmio *mmio, u32 *info, int id)
{
- strscpy(info[id].name, name, sizeof(info[id].name));
- info[id].reg = PCODE_SCRATCH(id).raw;
- info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
+ info[id] = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
}
static void populate_survivability_info(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
+ u32 *info = survivability->info;
struct xe_mmio *mmio;
u32 id = 0, reg_value;
- char name[NAME_MAX];
int index;
mmio = xe_root_tile_mmio(xe);
- set_survivability_info(mmio, info, id, "Capability Info");
- reg_value = info[id].value;
+ set_survivability_info(mmio, info, CAPABILITY_INFO);
+ reg_value = info[CAPABILITY_INFO];
if (reg_value & HISTORY_TRACKING) {
- id++;
- set_survivability_info(mmio, info, id, "Postcode Info");
+ set_survivability_info(mmio, info, POSTCODE_TRACE);
- if (reg_value & OVERFLOW_SUPPORT) {
- id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value);
- set_survivability_info(mmio, info, id, "Overflow Info");
- }
+ if (reg_value & OVERFLOW_SUPPORT)
+ set_survivability_info(mmio, info, POSTCODE_TRACE_OVERFLOW);
}
if (reg_value & AUXINFO_SUPPORT) {
id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value);
- for (index = 0; id && reg_value; index++, reg_value = info[id].value,
- id = aux_history_offset(reg_value)) {
- snprintf(name, NAME_MAX, "Auxiliary Info %d", index);
- set_survivability_info(mmio, info, id, name);
+ for (index = 0; id >= AUX_INFO0 && id < MAX_SCRATCH_REG; index++) {
+ set_survivability_info(mmio, info, id);
+ id = aux_history_offset(info[id]);
}
}
}
{
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
+ u32 *info = survivability->info;
int id;
dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n",
survivability->boot_status);
- for (id = 0; id < MAX_SCRATCH_MMIO; id++) {
- if (info[id].reg)
- dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name,
- info[id].reg, info[id].value);
+ for (id = 0; id < MAX_SCRATCH_REG; id++) {
+ if (info[id])
+ dev_info(&pdev->dev, "%s: 0x%x\n", reg_map[id], info[id]);
}
}
struct pci_dev *pdev = to_pci_dev(dev);
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
- int index = 0, count = 0;
- count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n",
- survivability->type ? "Runtime" : "Boot");
+ return sysfs_emit(buff, "%s\n", survivability->type ? "Runtime" : "Boot");
+}
- if (!check_boot_failure(xe))
- return count;
+static DEVICE_ATTR_ADMIN_RO(survivability_mode);
- for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
- if (info[index].reg)
- count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
- info[index].reg, info[index].value);
- }
+static ssize_t survivability_info_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ struct xe_survivability_attribute *sa = dev_attr_to_survivability_attr(attr);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ struct xe_survivability *survivability = &xe->survivability;
+ u32 *info = survivability->info;
- return count;
+ return sysfs_emit(buff, "0x%x\n", info[sa->index]);
}
-static DEVICE_ATTR_ADMIN_RO(survivability_mode);
+#define SURVIVABILITY_ATTR_RO(name, _index) \
+ struct xe_survivability_attribute attr_##name = { \
+ .attr = __ATTR(name, 0400, survivability_info_show, NULL), \
+ .index = _index, \
+ }
+
+SURVIVABILITY_ATTR_RO(capability_info, CAPABILITY_INFO);
+SURVIVABILITY_ATTR_RO(postcode_trace, POSTCODE_TRACE);
+SURVIVABILITY_ATTR_RO(postcode_trace_overflow, POSTCODE_TRACE_OVERFLOW);
+SURVIVABILITY_ATTR_RO(aux_info0, AUX_INFO0);
+SURVIVABILITY_ATTR_RO(aux_info1, AUX_INFO1);
+SURVIVABILITY_ATTR_RO(aux_info2, AUX_INFO2);
+SURVIVABILITY_ATTR_RO(aux_info3, AUX_INFO3);
+SURVIVABILITY_ATTR_RO(aux_info4, AUX_INFO4);
static void xe_survivability_mode_fini(void *arg)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
- sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+ device_remove_file(dev, &dev_attr_survivability_mode);
}
+static umode_t survivability_info_attrs_visible(struct kobject *kobj, struct attribute *attr,
+ int idx)
+{
+ struct xe_device *xe = kdev_to_xe_device(kobj_to_dev(kobj));
+ struct xe_survivability *survivability = &xe->survivability;
+ u32 *info = survivability->info;
+
+ if (info[idx])
+ return 0400;
+
+ return 0;
+}
+
+/* Attributes are ordered according to enum scratch_reg */
+static struct attribute *survivability_info_attrs[] = {
+ &attr_capability_info.attr.attr,
+ &attr_postcode_trace.attr.attr,
+ &attr_postcode_trace_overflow.attr.attr,
+ &attr_aux_info0.attr.attr,
+ &attr_aux_info1.attr.attr,
+ &attr_aux_info2.attr.attr,
+ &attr_aux_info3.attr.attr,
+ &attr_aux_info4.attr.attr,
+ NULL,
+};
+
+static const struct attribute_group survivability_info_group = {
+ .name = "survivability_info",
+ .attrs = survivability_info_attrs,
+ .is_visible = survivability_info_attrs_visible,
+};
+
static int create_survivability_sysfs(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
int ret;
- /* create survivability mode sysfs */
- ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+ ret = device_create_file(dev, &dev_attr_survivability_mode);
if (ret) {
dev_warn(dev, "Failed to create survivability sysfs files\n");
return ret;
if (ret)
return ret;
+ if (check_boot_failure(xe)) {
+ ret = devm_device_add_group(dev, &survivability_info_group);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
return ret;
}
-static int init_survivability_mode(struct xe_device *xe)
-{
- struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info;
-
- survivability->size = MAX_SCRATCH_MMIO;
-
- info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
- GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- survivability->info = info;
-
- populate_survivability_info(xe);
-
- return 0;
-}
-
/**
* xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
* @xe: xe device instance
return -EINVAL;
}
- ret = init_survivability_mode(xe);
- if (ret)
- return ret;
+ populate_survivability_info(xe);
ret = create_survivability_sysfs(pdev);
if (ret)
{
struct xe_survivability *survivability = &xe->survivability;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- int ret;
if (!xe_survivability_mode_is_requested(xe))
return 0;
- ret = init_survivability_mode(xe);
- if (ret)
- return ret;
+ populate_survivability_info(xe);
/* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
if (survivability->boot_status == CRITICAL_FAILURE) {