]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 15:22:32 +0000 (16:22 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 15:22:32 +0000 (16:22 +0100)
added patches:
drm-i915-flush-tlbs-before-releasing-backing-store.patch

queue-4.9/drm-i915-flush-tlbs-before-releasing-backing-store.patch [new file with mode: 0644]
queue-4.9/series [new file with mode: 0644]

diff --git a/queue-4.9/drm-i915-flush-tlbs-before-releasing-backing-store.patch b/queue-4.9/drm-i915-flush-tlbs-before-releasing-backing-store.patch
new file mode 100644 (file)
index 0000000..387a387
--- /dev/null
@@ -0,0 +1,192 @@
+From 7938d61591d33394a21bdd7797a245b65428f44c Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Date: Tue, 19 Oct 2021 13:27:10 +0100
+Subject: drm/i915: Flush TLBs before releasing backing store
+
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+
+commit 7938d61591d33394a21bdd7797a245b65428f44c upstream.
+
+We need to flush TLBs before releasing backing store otherwise userspace
+is able to encounter stale entries if a) it is not declaring access to
+certain buffers and b) it races with the backing store release from a
+such undeclared execution already executing on the GPU in parallel.
+
+The approach taken is to mark any buffer objects which were ever bound
+to the GPU and to trigger a serialized TLB flush when their backing
+store is released.
+
+Alternatively the flushing could be done on VMA unbind, at which point
+we would be able to ascertain whether there is potential a parallel GPU
+execution (which could race), but essentially it boils down to paying
+the cost of TLB flushes potentially needlessly at VMA unbind time (when
+the backing store is not known to be going away so not needed for
+safety), versus potentially needlessly at backing store relase time
+(since we at that point cannot tell whether there is anything executing
+on the GPU which uses that object).
+
+Thereforce simplicity of implementation has been chosen for now with
+scope to benchmark and refine later as required.
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Acked-by: Dave Airlie <airlied@redhat.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Jani Nikula <jani.nikula@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_drv.h     |    5 ++
+ drivers/gpu/drm/i915/i915_gem.c     |   72 ++++++++++++++++++++++++++++++++++++
+ drivers/gpu/drm/i915/i915_gem_gtt.c |    4 ++
+ drivers/gpu/drm/i915/i915_reg.h     |    6 +++
+ 4 files changed, 86 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -1763,6 +1763,8 @@ struct drm_i915_private {
+       struct intel_uncore uncore;
++      struct mutex tlb_invalidate_lock;
++
+       struct i915_virtual_gpu vgpu;
+       struct intel_gvt gvt;
+@@ -2211,7 +2213,8 @@ struct drm_i915_gem_object {
+        * rendering and so a non-zero seqno), and is not set if it i s on
+        * inactive (ready to be unbound) list.
+        */
+-#define I915_BO_ACTIVE_SHIFT 0
++#define I915_BO_WAS_BOUND_BIT    0
++#define I915_BO_ACTIVE_SHIFT 1
+ #define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
+ #define __I915_BO_ACTIVE(bo) \
+       ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -2185,6 +2185,67 @@ i915_gem_object_put_pages_gtt(struct drm
+       kfree(obj->pages);
+ }
++static int
++__intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
++                           i915_reg_t reg,
++                           const u32 mask,
++                           const u32 value,
++                           const unsigned int timeout_us,
++                           const unsigned int timeout_ms)
++{
++#define done ((I915_READ_FW(reg) & mask) == value)
++      int ret = wait_for_us(done, timeout_us);
++      if (ret)
++              ret = wait_for(done, timeout_ms);
++      return ret;
++#undef done
++}
++
++static void invalidate_tlbs(struct drm_i915_private *dev_priv)
++{
++      static const i915_reg_t gen8_regs[] = {
++              [RCS]  = GEN8_RTCR,
++              [VCS]  = GEN8_M1TCR,
++              [VCS2] = GEN8_M2TCR,
++              [VECS] = GEN8_VTCR,
++              [BCS]  = GEN8_BTCR,
++      };
++      struct intel_engine_cs *engine;
++
++      if (INTEL_GEN(dev_priv) < 8)
++              return;
++
++      assert_rpm_wakelock_held(dev_priv);
++
++      mutex_lock(&dev_priv->tlb_invalidate_lock);
++      intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
++
++      for_each_engine(engine, dev_priv) {
++              /*
++               * HW architecture suggest typical invalidation time at 40us,
++               * with pessimistic cases up to 100us and a recommendation to
++               * cap at 1ms. We go a bit higher just in case.
++               */
++              const unsigned int timeout_us = 100;
++              const unsigned int timeout_ms = 4;
++              const enum intel_engine_id id = engine->id;
++
++              if (WARN_ON_ONCE(id >= ARRAY_SIZE(gen8_regs) ||
++                               !i915_mmio_reg_offset(gen8_regs[id])))
++                      continue;
++
++              I915_WRITE_FW(gen8_regs[id], 1);
++              if (__intel_wait_for_register_fw(dev_priv,
++                                               gen8_regs[id], 1, 0,
++                                               timeout_us, timeout_ms))
++                      DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
++                                            engine->name, timeout_ms);
++      }
++
++      intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
++      mutex_unlock(&dev_priv->tlb_invalidate_lock);
++}
++
+ int
+ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+ {
+@@ -2215,6 +2276,15 @@ i915_gem_object_put_pages(struct drm_i91
+               obj->mapping = NULL;
+       }
++      if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
++              struct drm_i915_private *i915 = to_i915(obj->base.dev);
++
++              if (intel_runtime_pm_get_if_in_use(i915)) {
++                      invalidate_tlbs(i915);
++                      intel_runtime_pm_put(i915);
++              }
++      }
++
+       ops->put_pages(obj);
+       obj->pages = NULL;
+@@ -4627,6 +4697,8 @@ i915_gem_load_init(struct drm_device *de
+       atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
++      mutex_init(&dev_priv->tlb_invalidate_lock);
++
+       spin_lock_init(&dev_priv->fb_tracking.lock);
+ }
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -3685,6 +3685,10 @@ int i915_vma_bind(struct i915_vma *vma,
+               return ret;
+       vma->flags |= bind_flags;
++
++      if (vma->obj)
++              set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
++
+       return 0;
+ }
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -1698,6 +1698,12 @@ enum skl_disp_power_wells {
+ #define GAMT_CHKN_BIT_REG     _MMIO(0x4ab8)
+ #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING    (1<<28)
++#define GEN8_RTCR     _MMIO(0x4260)
++#define GEN8_M1TCR    _MMIO(0x4264)
++#define GEN8_M2TCR    _MMIO(0x4268)
++#define GEN8_BTCR     _MMIO(0x426c)
++#define GEN8_VTCR     _MMIO(0x4270)
++
+ #if 0
+ #define PRB0_TAIL     _MMIO(0x2030)
+ #define PRB0_HEAD     _MMIO(0x2034)
diff --git a/queue-4.9/series b/queue-4.9/series
new file mode 100644 (file)
index 0000000..4f89d40
--- /dev/null
@@ -0,0 +1 @@
+drm-i915-flush-tlbs-before-releasing-backing-store.patch