--- /dev/null
+From bafb2f7d4755bf1571bd5e9a03b97f3fc4fe69ae Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed, 21 Sep 2016 14:51:08 +0100
+Subject: drm/i915/execlists: Reset RING registers upon resume
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bafb2f7d4755bf1571bd5e9a03b97f3fc4fe69ae upstream.
+
+There is a disparity in the context image saved to disk and our own
+bookkeeping - that is we presume the RING_HEAD and RING_TAIL match our
+stored ce->ring->tail value. However, as we emit WA_TAIL_DWORDS into the
+ring but may not tell the GPU about them, the GPU may be lagging behind
+our bookkeeping. Upon hibernation we do not save stolen pages, presuming
+that their contents are volatile. This means that although we start
+writing into the ring at tail, the GPU starts executing from its HEAD
+and there may be some garbage in between and so the GPU promptly hangs
+upon resume.
+
+Testcase: igt/gem_exec_suspend/basic-S4
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20160921135108.29574-3-chris@chris-wilson.co.uk
+Cc: Eric Blau <eblau1@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/intel_lrc.c | 58 +++++++++++++++++++++++----------------
+ 1 file changed, 35 insertions(+), 23 deletions(-)
+
+--- a/drivers/gpu/drm/i915/intel_lrc.c
++++ b/drivers/gpu/drm/i915/intel_lrc.c
+@@ -2152,30 +2152,42 @@ error_deref_obj:
+
+ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
+ {
+- struct i915_gem_context *ctx = dev_priv->kernel_context;
+ struct intel_engine_cs *engine;
++ struct i915_gem_context *ctx;
+
+- for_each_engine(engine, dev_priv) {
+- struct intel_context *ce = &ctx->engine[engine->id];
+- void *vaddr;
+- uint32_t *reg_state;
+-
+- if (!ce->state)
+- continue;
+-
+- vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+- if (WARN_ON(IS_ERR(vaddr)))
+- continue;
+-
+- reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+-
+- reg_state[CTX_RING_HEAD+1] = 0;
+- reg_state[CTX_RING_TAIL+1] = 0;
+-
+- ce->state->obj->dirty = true;
+- i915_gem_object_unpin_map(ce->state->obj);
+-
+- ce->ring->head = 0;
+- ce->ring->tail = 0;
++ /* Because we emit WA_TAIL_DWORDS there may be a disparity
++ * between our bookkeeping in ce->ring->head and ce->ring->tail and
++ * that stored in context. As we only write new commands from
++ * ce->ring->tail onwards, everything before that is junk. If the GPU
++ * starts reading from its RING_HEAD from the context, it may try to
++ * execute that junk and die.
++ *
++ * So to avoid that we reset the context images upon resume. For
++ * simplicity, we just zero everything out.
++ */
++ list_for_each_entry(ctx, &dev_priv->context_list, link) {
++ for_each_engine(engine, dev_priv) {
++ struct intel_context *ce = &ctx->engine[engine->id];
++ u32 *reg;
++
++ if (!ce->state)
++ continue;
++
++ reg = i915_gem_object_pin_map(ce->state->obj,
++ I915_MAP_WB);
++ if (WARN_ON(IS_ERR(reg)))
++ continue;
++
++ reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
++ reg[CTX_RING_HEAD+1] = 0;
++ reg[CTX_RING_TAIL+1] = 0;
++
++ ce->state->obj->dirty = true;
++ i915_gem_object_unpin_map(ce->state->obj);
++
++ ce->ring->head = ce->ring->tail = 0;
++ ce->ring->last_retired_head = -1;
++ intel_ring_update_space(ce->ring);
++ }
+ }
+ }