5.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)
diff --git a/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch b/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch

new file mode 100644 (file)

index 0000000..c253d6e
--- /dev/null
+++ b/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch
@@ -0,0 +1,37 @@
+From 4a0a2cf4c03ba49a4c2596c49c7daa719917d509 Mon Sep 17 00:00:00 2001
+From: Likun Gao <Likun.Gao@amd.com>
+Date: Wed, 3 Aug 2022 12:16:35 +0800
+Subject: drm/amdgpu: change vram width algorithm for vram_info v3_0
+
+From: Likun Gao <Likun.Gao@amd.com>
+
+commit 4a0a2cf4c03ba49a4c2596c49c7daa719917d509 upstream.
+
+Update the vram width algorithm for vram_info v3_0 to align with the
+changes of latest IFWI.
+
+Signed-off-by: Likun Gao <Likun.Gao@amd.com>
+Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 5.19.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+index fd8f3731758e..b81b77a9efa6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+                                       mem_channel_number = vram_info->v30.channel_num;
+                                       mem_channel_width = vram_info->v30.channel_width;
+                                       if (vram_width)
+-                                              *vram_width = mem_channel_number * mem_channel_width;
++                                              *vram_width = mem_channel_number * (1 << mem_channel_width);
+                                       break;
+                               default:
+                                       return -EINVAL;
+-- 
+2.37.2
+
diff --git a/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch b/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch

new file mode 100644 (file)

index 0000000..6346915
--- /dev/null
+++ b/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch
@@ -0,0 +1,410 @@
+From 59eda6ce824e95b98c45628fe6c0adb9130c6df2 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris.p.wilson@intel.com>
+Date: Wed, 27 Jul 2022 14:29:55 +0200
+Subject: drm/i915/gt: Batch TLB invalidations
+
+From: Chris Wilson <chris.p.wilson@intel.com>
+
+commit 59eda6ce824e95b98c45628fe6c0adb9130c6df2 upstream.
+
+Invalidate TLB in batches, in order to reduce performance regressions.
+
+Currently, every caller performs a full barrier around a TLB
+invalidation, ignoring all other invalidations that may have already
+removed their PTEs from the cache. As this is a synchronous operation
+and can be quite slow, we cause multiple threads to contend on the TLB
+invalidate mutex blocking userspace.
+
+We only need to invalidate the TLB once after replacing our PTE to
+ensure that there is no possible continued access to the physical
+address before releasing our pages. By tracking a seqno for each full
+TLB invalidate we can quickly determine if one has been performed since
+rewriting the PTE, and only if necessary trigger one for ourselves.
+
+That helps to reduce the performance regression introduced by TLB
+invalidate logic.
+
+[mchehab: rebased to not require moving the code to a separate file]
+
+Cc: stable@vger.kernel.org
+Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
+Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
+Cc: Fei Yang <fei.yang@intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/4e97ef5deb6739cadaaf40aa45620547e9c4ec06.1658924372.git.mchehab@kernel.org
+(cherry picked from commit 5d36acb7198b0e5eb88e6b701f9ad7b9448f8df9)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_object_types.h |    3 -
+ drivers/gpu/drm/i915/gem/i915_gem_pages.c        |   21 +++++----
+ drivers/gpu/drm/i915/gt/intel_gt.c               |   53 +++++++++++++++++------
+ drivers/gpu/drm/i915/gt/intel_gt.h               |   12 ++++-
+ drivers/gpu/drm/i915/gt/intel_gt_types.h         |   18 +++++++
+ drivers/gpu/drm/i915/gt/intel_ppgtt.c            |    8 ++-
+ drivers/gpu/drm/i915/i915_vma.c                  |   33 +++++++++++---
+ drivers/gpu/drm/i915/i915_vma.h                  |    1 
+ drivers/gpu/drm/i915/i915_vma_resource.c         |    5 +-
+ drivers/gpu/drm/i915/i915_vma_resource.h         |    6 ++
+ 10 files changed, 125 insertions(+), 35 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
+ #define I915_BO_READONLY          BIT(7)
+ #define I915_TILING_QUIRK_BIT     8 /* unknown swizzling; do not release! */
+ #define I915_BO_PROTECTED         BIT(9)
+-#define I915_BO_WAS_BOUND_BIT     10
+       /**
+        * @mem_flags - Mutable placement-related flags
+        *
+@@ -598,6 +597,8 @@ struct drm_i915_gem_object {
+                * pages were last acquired.
+                */
+               bool dirty:1;
++
++              u32 tlb;
+       } mm;
+ 
+       struct {
+--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+@@ -191,6 +191,18 @@ static void unmap_object(struct drm_i915
+               vunmap(ptr);
+ }
+ 
++static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
++{
++      struct drm_i915_private *i915 = to_i915(obj->base.dev);
++      struct intel_gt *gt = to_gt(i915);
++
++      if (!obj->mm.tlb)
++              return;
++
++      intel_gt_invalidate_tlb(gt, obj->mm.tlb);
++      obj->mm.tlb = 0;
++}
++
+ struct sg_table *
+ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+ {
+@@ -216,14 +228,7 @@ __i915_gem_object_unset_pages(struct drm
+       __i915_gem_object_reset_page_iter(obj);
+       obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ 
+-      if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+-              struct drm_i915_private *i915 = to_i915(obj->base.dev);
+-              struct intel_gt *gt = to_gt(i915);
+-              intel_wakeref_t wakeref;
+-
+-              with_intel_gt_pm_if_awake(gt, wakeref)
+-                      intel_gt_invalidate_tlbs(gt);
+-      }
++      flush_tlb_invalidate(obj);
+ 
+       return pages;
+ }
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -36,8 +36,6 @@ static void __intel_gt_init_early(struct
+ {
+       spin_lock_init(&gt->irq_lock);
+ 
+-      mutex_init(&gt->tlb_invalidate_lock);
+-
+       INIT_LIST_HEAD(&gt->closed_vma);
+       spin_lock_init(&gt->closed_lock);
+ 
+@@ -48,6 +46,8 @@ static void __intel_gt_init_early(struct
+       intel_gt_init_reset(gt);
+       intel_gt_init_requests(gt);
+       intel_gt_init_timelines(gt);
++      mutex_init(&gt->tlb.invalidate_lock);
++      seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+       intel_gt_pm_init_early(gt);
+ 
+       intel_uc_init_early(&gt->uc);
+@@ -833,6 +833,7 @@ void intel_gt_driver_late_release_all(st
+               intel_gt_fini_requests(gt);
+               intel_gt_fini_reset(gt);
+               intel_gt_fini_timelines(gt);
++              mutex_destroy(&gt->tlb.invalidate_lock);
+               intel_engines_free(gt);
+       }
+ }
+@@ -1165,7 +1166,7 @@ get_reg_and_bit(const struct intel_engin
+       return rb;
+ }
+ 
+-void intel_gt_invalidate_tlbs(struct intel_gt *gt)
++static void mmio_invalidate_full(struct intel_gt *gt)
+ {
+       static const i915_reg_t gen8_regs[] = {
+               [RENDER_CLASS]                  = GEN8_RTCR,
+@@ -1188,12 +1189,6 @@ void intel_gt_invalidate_tlbs(struct int
+       const i915_reg_t *regs;
+       unsigned int num = 0;
+ 
+-      if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+-              return;
+-
+-      if (intel_gt_is_wedged(gt))
+-              return;
+-
+       if (GRAPHICS_VER(i915) == 12) {
+               regs = gen12_regs;
+               num = ARRAY_SIZE(gen12_regs);
+@@ -1208,9 +1203,6 @@ void intel_gt_invalidate_tlbs(struct int
+                         "Platform does not implement TLB invalidation!"))
+               return;
+ 
+-      GEM_TRACE("\n");
+-
+-      mutex_lock(&gt->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ 
+       spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+@@ -1230,6 +1222,8 @@ void intel_gt_invalidate_tlbs(struct int
+               awake |= engine->mask;
+       }
+ 
++      GT_TRACE(gt, "invalidated engines %08x\n", awake);
++
+       /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+       if (awake &&
+           (IS_TIGERLAKE(i915) ||
+@@ -1269,5 +1263,38 @@ void intel_gt_invalidate_tlbs(struct int
+        * transitions.
+        */
+       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+-      mutex_unlock(&gt->tlb_invalidate_lock);
++}
++
++static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
++{
++      u32 cur = intel_gt_tlb_seqno(gt);
++
++      /* Only skip if a *full* TLB invalidate barrier has passed */
++      return (s32)(cur - ALIGN(seqno, 2)) > 0;
++}
++
++void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
++{
++      intel_wakeref_t wakeref;
++
++      if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
++              return;
++
++      if (intel_gt_is_wedged(gt))
++              return;
++
++      if (tlb_seqno_passed(gt, seqno))
++              return;
++
++      with_intel_gt_pm_if_awake(gt, wakeref) {
++              mutex_lock(&gt->tlb.invalidate_lock);
++              if (tlb_seqno_passed(gt, seqno))
++                      goto unlock;
++
++              mmio_invalidate_full(gt);
++
++              write_seqcount_invalidate(&gt->tlb.seqno);
++unlock:
++              mutex_unlock(&gt->tlb.invalidate_lock);
++      }
+ }
+--- a/drivers/gpu/drm/i915/gt/intel_gt.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
+@@ -123,7 +123,17 @@ void intel_gt_info_print(const struct in
+ 
+ void intel_gt_watchdog_work(struct work_struct *work);
+ 
+-void intel_gt_invalidate_tlbs(struct intel_gt *gt);
++static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
++{
++      return seqprop_sequence(&gt->tlb.seqno);
++}
++
++static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
++{
++      return intel_gt_tlb_seqno(gt) | 1;
++}
++
++void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
+ 
+ struct resource intel_pci_resource(struct pci_dev *pdev, int bar);
+ 
+--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
+@@ -11,6 +11,7 @@
+ #include <linux/llist.h>
+ #include <linux/mutex.h>
+ #include <linux/notifier.h>
++#include <linux/seqlock.h>
+ #include <linux/spinlock.h>
+ #include <linux/types.h>
+ #include <linux/workqueue.h>
+@@ -76,7 +77,22 @@ struct intel_gt {
+       struct intel_uc uc;
+       struct intel_gsc gsc;
+ 
+-      struct mutex tlb_invalidate_lock;
++      struct {
++              /* Serialize global tlb invalidations */
++              struct mutex invalidate_lock;
++
++              /*
++               * Batch TLB invalidations
++               *
++               * After unbinding the PTE, we need to ensure the TLB
++               * are invalidated prior to releasing the physical pages.
++               * But we only need one such invalidation for all unbinds,
++               * so we track how many TLB invalidations have been
++               * performed since unbind the PTE and only emit an extra
++               * invalidate if no full barrier has been passed.
++               */
++              seqcount_mutex_t seqno;
++      } tlb;
+ 
+       struct i915_wa_list wa_list;
+ 
+--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
++++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_
+ void ppgtt_unbind_vma(struct i915_address_space *vm,
+                     struct i915_vma_resource *vma_res)
+ {
+-      if (vma_res->allocated)
+-              vm->clear_range(vm, vma_res->start, vma_res->vma_size);
++      if (!vma_res->allocated)
++              return;
++
++      vm->clear_range(vm, vma_res->start, vma_res->vma_size);
++      if (vma_res->tlb)
++              vma_invalidate_tlb(vm, *vma_res->tlb);
+ }
+ 
+ static unsigned long pd_count(u64 size, int shift)
+--- a/drivers/gpu/drm/i915/i915_vma.c
++++ b/drivers/gpu/drm/i915/i915_vma.c
+@@ -537,8 +537,6 @@ int i915_vma_bind(struct i915_vma *vma,
+                                  bind_flags);
+       }
+ 
+-      set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+-
+       atomic_or(bind_flags, &vma->flags);
+       return 0;
+ }
+@@ -1301,6 +1299,19 @@ err_unpin:
+       return err;
+ }
+ 
++void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb)
++{
++      /*
++       * Before we release the pages that were bound by this vma, we
++       * must invalidate all the TLBs that may still have a reference
++       * back to our physical address. It only needs to be done once,
++       * so after updating the PTE to point away from the pages, record
++       * the most recent TLB invalidation seqno, and if we have not yet
++       * flushed the TLBs upon release, perform a full invalidation.
++       */
++      WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
++}
++
+ static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
+ {
+       /* We allocate under vma_get_pages, so beware the shrinker */
+@@ -1927,7 +1938,12 @@ struct dma_fence *__i915_vma_evict(struc
+               vma->vm->skip_pte_rewrite;
+       trace_i915_vma_unbind(vma);
+ 
+-      unbind_fence = i915_vma_resource_unbind(vma_res);
++      if (async)
++              unbind_fence = i915_vma_resource_unbind(vma_res,
++                                                      &vma->obj->mm.tlb);
++      else
++              unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
++
+       vma->resource = NULL;
+ 
+       atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
+@@ -1935,10 +1951,13 @@ struct dma_fence *__i915_vma_evict(struc
+ 
+       i915_vma_detach(vma);
+ 
+-      if (!async && unbind_fence) {
+-              dma_fence_wait(unbind_fence, false);
+-              dma_fence_put(unbind_fence);
+-              unbind_fence = NULL;
++      if (!async) {
++              if (unbind_fence) {
++                      dma_fence_wait(unbind_fence, false);
++                      dma_fence_put(unbind_fence);
++                      unbind_fence = NULL;
++              }
++              vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
+       }
+ 
+       /*
+--- a/drivers/gpu/drm/i915/i915_vma.h
++++ b/drivers/gpu/drm/i915/i915_vma.h
+@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i91
+                       u64 size, u64 alignment, u64 flags);
+ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+ void i915_vma_revoke_mmap(struct i915_vma *vma);
++void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb);
+ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
+ int __i915_vma_unbind(struct i915_vma *vma);
+ int __must_check i915_vma_unbind(struct i915_vma *vma);
+--- a/drivers/gpu/drm/i915/i915_vma_resource.c
++++ b/drivers/gpu/drm/i915/i915_vma_resource.c
+@@ -223,10 +223,13 @@ i915_vma_resource_fence_notify(struct i9
+  * Return: A refcounted pointer to a dma-fence that signals when unbinding is
+  * complete.
+  */
+-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
++struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
++                                         u32 *tlb)
+ {
+       struct i915_address_space *vm = vma_res->vm;
+ 
++      vma_res->tlb = tlb;
++
+       /* Reference for the sw fence */
+       i915_vma_resource_get(vma_res);
+ 
+--- a/drivers/gpu/drm/i915/i915_vma_resource.h
++++ b/drivers/gpu/drm/i915/i915_vma_resource.h
+@@ -67,6 +67,7 @@ struct i915_page_sizes {
+  * taken when the unbind is scheduled.
+  * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
+  * needs to be skipped for unbind.
++ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL
+  *
+  * The lifetime of a struct i915_vma_resource is from a binding request to
+  * the actual possible asynchronous unbind has completed.
+@@ -119,6 +120,8 @@ struct i915_vma_resource {
+       bool immediate_unbind:1;
+       bool needs_wakeref:1;
+       bool skip_pte_rewrite:1;
++
++      u32 *tlb;
+ };
+ 
+ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resou
+ 
+ void i915_vma_resource_free(struct i915_vma_resource *vma_res);
+ 
+-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
++struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
++                                         u32 *tlb);
+ 
+ void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
+ 
diff --git a/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch b/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch

new file mode 100644 (file)

index 0000000..6c40610
--- /dev/null
+++ b/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch
@@ -0,0 +1,151 @@
+From db100e28fdf026a1fc10657c5170bb1e65663805 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris.p.wilson@intel.com>
+Date: Wed, 27 Jul 2022 14:29:51 +0200
+Subject: drm/i915/gt: Ignore TLB invalidations on idle engines
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chris Wilson <chris.p.wilson@intel.com>
+
+commit db100e28fdf026a1fc10657c5170bb1e65663805 upstream.
+
+Check if the device is powered down prior to any engine activity,
+as, on such cases, all the TLBs were already invalidated, so an
+explicit TLB invalidation is not needed, thus reducing the
+performance regression impact due to it.
+
+This becomes more significant with GuC, as it can only do so when
+the connection to the GuC is awake.
+
+Cc: stable@vger.kernel.org
+Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
+Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
+Cc: Fei Yang <fei.yang@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e95da583a5fe.1658924372.git.mchehab@kernel.org
+(cherry picked from commit 4bedceaed1ae1172cfe72d3ff752b3a1d32fe4d9)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_pages.c |   10 ++++++----
+ drivers/gpu/drm/i915/gt/intel_gt.c        |   17 ++++++++++-------
+ drivers/gpu/drm/i915/gt/intel_gt_pm.h     |    3 +++
+ 3 files changed, 19 insertions(+), 11 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+@@ -6,14 +6,15 @@
+ 
+ #include <drm/drm_cache.h>
+ 
++#include "gt/intel_gt.h"
++#include "gt/intel_gt_pm.h"
++
+ #include "i915_drv.h"
+ #include "i915_gem_object.h"
+ #include "i915_scatterlist.h"
+ #include "i915_gem_lmem.h"
+ #include "i915_gem_mman.h"
+ 
+-#include "gt/intel_gt.h"
+-
+ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+                                struct sg_table *pages,
+                                unsigned int sg_page_sizes)
+@@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm
+ 
+       if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
++              struct intel_gt *gt = to_gt(i915);
+               intel_wakeref_t wakeref;
+ 
+-              with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+-                      intel_gt_invalidate_tlbs(to_gt(i915));
++              with_intel_gt_pm_if_awake(gt, wakeref)
++                      intel_gt_invalidate_tlbs(gt);
+       }
+ 
+       return pages;
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -11,6 +11,7 @@
+ 
+ #include "i915_drv.h"
+ #include "intel_context.h"
++#include "intel_engine_pm.h"
+ #include "intel_engine_regs.h"
+ #include "intel_gt.h"
+ #include "intel_gt_buffer_pool.h"
+@@ -1181,6 +1182,7 @@ void intel_gt_invalidate_tlbs(struct int
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_engine_cs *engine;
++      intel_engine_mask_t awake, tmp;
+       enum intel_engine_id id;
+       const i915_reg_t *regs;
+       unsigned int num = 0;
+@@ -1204,26 +1206,31 @@ void intel_gt_invalidate_tlbs(struct int
+ 
+       GEM_TRACE("\n");
+ 
+-      assert_rpm_wakelock_held(&i915->runtime_pm);
+-
+       mutex_lock(&gt->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ 
+       spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+ 
++      awake = 0;
+       for_each_engine(engine, gt, id) {
+               struct reg_and_bit rb;
+ 
++              if (!intel_engine_pm_is_awake(engine))
++                      continue;
++
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+ 
+               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
++              awake |= engine->mask;
+       }
+ 
+       spin_unlock_irq(&uncore->lock);
+ 
+-      for_each_engine(engine, gt, id) {
++      for_each_engine_masked(engine, gt, awake, tmp) {
++              struct reg_and_bit rb;
++
+               /*
+                * HW architecture suggest typical invalidation time at 40us,
+                * with pessimistic cases up to 100us and a recommendation to
+@@ -1231,12 +1238,8 @@ void intel_gt_invalidate_tlbs(struct int
+                */
+               const unsigned int timeout_us = 100;
+               const unsigned int timeout_ms = 4;
+-              struct reg_and_bit rb;
+ 
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+-              if (!i915_mmio_reg_offset(rb.reg))
+-                      continue;
+-
+               if (__intel_wait_for_register_fw(uncore,
+                                                rb.reg, rb.bit, 0,
+                                                timeout_us, timeout_ms,
+--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put
+       for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+            intel_gt_pm_put(gt), tmp = 0)
+ 
++#define with_intel_gt_pm_if_awake(gt, wf) \
++      for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
++
+ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
+ {
+       return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch b/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch

new file mode 100644 (file)

index 0000000..c88332a
--- /dev/null
+++ b/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch
@@ -0,0 +1,59 @@
+From 180abeb2c5032704787151135b6a38c6b71295a6 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris.p.wilson@intel.com>
+Date: Wed, 27 Jul 2022 14:29:53 +0200
+Subject: drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chris Wilson <chris.p.wilson@intel.com>
+
+commit 180abeb2c5032704787151135b6a38c6b71295a6 upstream.
+
+Ensure that the TLB of the OA unit is also invalidated
+on gen12 HW, as just invalidating the TLB of an engine is not
+enough.
+
+Cc: stable@vger.kernel.org
+Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
+Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
+Cc: Fei Yang <fei.yang@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/59724d9f5cf1e93b1620d01b8332ac991555283d.1658924372.git.mchehab@kernel.org
+(cherry picked from commit dfc83de118ff7930acc9a4c8dfdba7c153aa44d6)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_gt.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -10,6 +10,7 @@
+ #include "pxp/intel_pxp.h"
+ 
+ #include "i915_drv.h"
++#include "i915_perf_oa_regs.h"
+ #include "intel_context.h"
+ #include "intel_engine_pm.h"
+ #include "intel_engine_regs.h"
+@@ -1226,6 +1227,15 @@ void intel_gt_invalidate_tlbs(struct int
+               awake |= engine->mask;
+       }
+ 
++      /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
++      if (awake &&
++          (IS_TIGERLAKE(i915) ||
++           IS_DG1(i915) ||
++           IS_ROCKETLAKE(i915) ||
++           IS_ALDERLAKE_S(i915) ||
++           IS_ALDERLAKE_P(i915)))
++              intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
++
+       spin_unlock_irq(&uncore->lock);
+ 
+       for_each_engine_masked(engine, gt, awake, tmp) {
diff --git a/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch b/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch

new file mode 100644 (file)

index 0000000..18669c3
--- /dev/null
+++ b/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch
@@ -0,0 +1,50 @@
+From e5a95c83ed1492c0f442b448b20c90c8faaf702b Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris.p.wilson@intel.com>
+Date: Wed, 27 Jul 2022 14:29:54 +0200
+Subject: drm/i915/gt: Skip TLB invalidations once wedged
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chris Wilson <chris.p.wilson@intel.com>
+
+commit e5a95c83ed1492c0f442b448b20c90c8faaf702b upstream.
+
+Skip all further TLB invalidations once the device is wedged and
+had been reset, as, on such cases, it can no longer process instructions
+on the GPU and the user no longer has access to the TLB's in each engine.
+
+So, an attempt to do a TLB cache invalidation will produce a timeout.
+
+That helps to reduce the performance regression introduced by TLB
+invalidate logic.
+
+Cc: stable@vger.kernel.org
+Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
+Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
+Cc: Fei Yang <fei.yang@intel.com>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/5aa86564b9ec5fe7fe605c1dd7de76855401ed73.1658924372.git.mchehab@kernel.org
+(cherry picked from commit be0366f168033374a93e4c43fdaa1a90ab905184)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_gt.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -1191,6 +1191,9 @@ void intel_gt_invalidate_tlbs(struct int
+       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+               return;
+ 
++      if (intel_gt_is_wedged(gt))
++              return;
++
+       if (GRAPHICS_VER(i915) == 12) {
+               regs = gen12_regs;
+               num = ARRAY_SIZE(gen12_regs);
diff --git a/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch b/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch

new file mode 100644 (file)

index 0000000..0b8cc0b
--- /dev/null
+++ b/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch
@@ -0,0 +1,79 @@
+From 9d50bff40e3e366886ec37299fc317edf84be0c9 Mon Sep 17 00:00:00 2001
+From: Mauro Carvalho Chehab <mchehab@kernel.org>
+Date: Thu, 4 Aug 2022 09:37:22 +0200
+Subject: drm/i915: pass a pointer for tlb seqno at vma_invalidate_tlb()
+
+From: Mauro Carvalho Chehab <mchehab@kernel.org>
+
+commit 9d50bff40e3e366886ec37299fc317edf84be0c9 upstream.
+
+WRITE_ONCE() should happen at the original var, not on a local
+copy of it.
+
+Cc: stable@vger.kernel.org
+Fixes: 59eda6ce824e ("drm/i915/gt: Batch TLB invalidations")
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+[added cc-stable while merging it]
+Link: https://patchwork.freedesktop.org/patch/msgid/f9550e6bacea10131ff40dd8981b69eb9251cdcd.1659598090.git.mchehab@kernel.org
+(cherry picked from commit 3d037d99e61a1e7a3ae3d214146d88db349dd19f)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/intel_ppgtt.c |    2 +-
+ drivers/gpu/drm/i915/i915_vma.c       |    6 +++---
+ drivers/gpu/drm/i915/i915_vma.h       |    2 +-
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
++++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+@@ -211,7 +211,7 @@ void ppgtt_unbind_vma(struct i915_addres
+ 
+       vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+       if (vma_res->tlb)
+-              vma_invalidate_tlb(vm, *vma_res->tlb);
++              vma_invalidate_tlb(vm, vma_res->tlb);
+ }
+ 
+ static unsigned long pd_count(u64 size, int shift)
+--- a/drivers/gpu/drm/i915/i915_vma.c
++++ b/drivers/gpu/drm/i915/i915_vma.c
+@@ -1299,7 +1299,7 @@ err_unpin:
+       return err;
+ }
+ 
+-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb)
++void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
+ {
+       /*
+        * Before we release the pages that were bound by this vma, we
+@@ -1309,7 +1309,7 @@ void vma_invalidate_tlb(struct i915_addr
+        * the most recent TLB invalidation seqno, and if we have not yet
+        * flushed the TLBs upon release, perform a full invalidation.
+        */
+-      WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
++      WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+ }
+ 
+ static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
+@@ -1957,7 +1957,7 @@ struct dma_fence *__i915_vma_evict(struc
+                       dma_fence_put(unbind_fence);
+                       unbind_fence = NULL;
+               }
+-              vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
++              vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb);
+       }
+ 
+       /*
+--- a/drivers/gpu/drm/i915/i915_vma.h
++++ b/drivers/gpu/drm/i915/i915_vma.h
+@@ -213,7 +213,7 @@ bool i915_vma_misplaced(const struct i91
+                       u64 size, u64 alignment, u64 flags);
+ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+ void i915_vma_revoke_mmap(struct i915_vma *vma);
+-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb);
++void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb);
+ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
+ int __i915_vma_unbind(struct i915_vma *vma);
+ int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/queue-5.19/series b/queue-5.19/series

index 3956d33fbe48712f5bd41353e84daf5a2ef098ea..165183755a3080b8544795808da4ea072c32eee6 100644 (file)
--- a/queue-5.19/series
+++ b/queue-5.19/series
@@ -19,3 +19,9 @@ btrfs-unset-reloc-control-if-transaction-commit-fails-in-prepare_to_relocate.pat
  btrfs-reset-ro-counter-on-block-group-if-we-fail-to-relocate.patch
  btrfs-fix-lost-error-handling-when-looking-up-extended-ref-on-log-replay.patch
  btrfs-fix-warning-during-log-replay-when-bumping-inode-link-count.patch
+drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch
+drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch
+drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch
+drm-i915-gt-skip-tlb-invalidations-once-wedged.patch
+drm-i915-gt-batch-tlb-invalidations.patch
+drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 21 Aug 2022 14:09:02 +0000 (16:09 +0200)
queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/series		patch \| blob \| blame \| history