/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
bool has_mig_hw_bug;
+ /* GPU has just been reset */
+ bool reset_done;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
MODULE_DEVICE_TABLE(pci, nvgrace_gpu_vfio_pci_table);
+/*
+ * The GPU reset is required to be serialized against the *first* mapping
+ * faults and read/writes accesses to prevent potential RAS events logging.
+ *
+ * First fault or access after a reset needs to poll device readiness,
+ * flag that a reset has occurred.
+ */
+static void nvgrace_gpu_vfio_pci_reset_done(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+ struct nvgrace_gpu_pci_core_device *nvdev =
+ container_of(core_device, struct nvgrace_gpu_pci_core_device,
+ core_device);
+
+ nvdev->reset_done = true;
+}
+
+static const struct pci_error_handlers nvgrace_gpu_vfio_pci_err_handlers = {
+ .reset_done = nvgrace_gpu_vfio_pci_reset_done,
+ .error_detected = vfio_pci_core_aer_err_detected,
+};
+
static struct pci_driver nvgrace_gpu_vfio_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = nvgrace_gpu_vfio_pci_table,
.probe = nvgrace_gpu_probe,
.remove = nvgrace_gpu_remove,
- .err_handler = &vfio_pci_core_err_handlers,
+ .err_handler = &nvgrace_gpu_vfio_pci_err_handlers,
.driver_managed_dma = true,
};