After we detect that mmio is returning all 0xff, we believe that the GPU
has dropped off the pci bus and is dead. Mark the device as wedged such
that we can propagate the failure back to userspace and wait for
recovery.
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240807091014.469992-1-andi.shyti@linux.intel.com
void intel_gt_bind_context_set_ready(struct intel_gt *gt);
void intel_gt_bind_context_set_unready(struct intel_gt *gt);
bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
+
+static inline void intel_gt_set_wedged_async(struct intel_gt *gt)
+{
+ queue_work(system_highpri_wq, >->wedge);
+}
+
#endif /* __INTEL_GT_H__ */
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
+ struct work_struct wedge;
+
struct i915_perf_gt perf;
/** link: &ggtt.gt_list */
GT_TRACE(gt, "end\n");
}
+static void set_wedged_work(struct work_struct *w)
+{
+ struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
+ intel_wakeref_t wf;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wf)
+ __intel_gt_set_wedged(gt);
+}
+
void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
init_waitqueue_head(>->reset.queue);
mutex_init(>->reset.mutex);
init_srcu_struct(>->reset.backoff_srcu);
+ INIT_WORK(>->wedge, set_wedged_work);
/*
* While undesirable to wait inside the shrinker, complain anyway.
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
- intel_gt_set_wedged(w->gt);
+ set_wedged_work(&w->gt->wedge);
}
void __intel_init_wedge(struct intel_wedge_me *w,
#include <drm/drm_managed.h>
#include <linux/pm_runtime.h>
+#include "gt/intel_gt.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt_regs.h"
if (!wait_ack_clear(d, FORCEWAKE_KERNEL))
return;
- if (fw_ack(d) == ~0)
+ if (fw_ack(d) == ~0) {
drm_err(&d->uncore->i915->drm,
"%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n",
intel_uncore_forcewake_domain_to_str(d->id));
- else
+ intel_gt_set_wedged_async(d->uncore->gt);
+ } else {
drm_err(&d->uncore->i915->drm,
"%s: timed out waiting for forcewake ack to clear.\n",
intel_uncore_forcewake_domain_to_str(d->id));
+ }
add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
}