From: Greg Kroah-Hartman Date: Sun, 21 Aug 2022 14:09:02 +0000 (+0200) Subject: 5.19-stable patches X-Git-Tag: v4.9.326~78 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=67a231e07a9d4e3915deb4e9fea2c70eea8c70ae;p=thirdparty%2Fkernel%2Fstable-queue.git 5.19-stable patches added patches: drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch drm-i915-gt-batch-tlb-invalidations.patch drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch drm-i915-gt-skip-tlb-invalidations-once-wedged.patch drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch --- diff --git a/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch b/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch new file mode 100644 index 00000000000..c253d6e8eaa --- /dev/null +++ b/queue-5.19/drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch @@ -0,0 +1,37 @@ +From 4a0a2cf4c03ba49a4c2596c49c7daa719917d509 Mon Sep 17 00:00:00 2001 +From: Likun Gao +Date: Wed, 3 Aug 2022 12:16:35 +0800 +Subject: drm/amdgpu: change vram width algorithm for vram_info v3_0 + +From: Likun Gao + +commit 4a0a2cf4c03ba49a4c2596c49c7daa719917d509 upstream. + +Update the vram width algorithm for vram_info v3_0 to align with the +changes of latest IFWI. + +Signed-off-by: Likun Gao +Reviewed-by: Hawking Zhang +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org # 5.19.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +index fd8f3731758e..b81b77a9efa6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + mem_channel_number = vram_info->v30.channel_num; + mem_channel_width = vram_info->v30.channel_width; + if (vram_width) +- *vram_width = mem_channel_number * mem_channel_width; ++ *vram_width = mem_channel_number * (1 << mem_channel_width); + break; + default: + return -EINVAL; +-- +2.37.2 + diff --git a/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch b/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch new file mode 100644 index 00000000000..63469151f82 --- /dev/null +++ b/queue-5.19/drm-i915-gt-batch-tlb-invalidations.patch @@ -0,0 +1,410 @@ +From 59eda6ce824e95b98c45628fe6c0adb9130c6df2 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 27 Jul 2022 14:29:55 +0200 +Subject: drm/i915/gt: Batch TLB invalidations + +From: Chris Wilson + +commit 59eda6ce824e95b98c45628fe6c0adb9130c6df2 upstream. + +Invalidate TLB in batches, in order to reduce performance regressions. + +Currently, every caller performs a full barrier around a TLB +invalidation, ignoring all other invalidations that may have already +removed their PTEs from the cache. As this is a synchronous operation +and can be quite slow, we cause multiple threads to contend on the TLB +invalidate mutex blocking userspace. + +We only need to invalidate the TLB once after replacing our PTE to +ensure that there is no possible continued access to the physical +address before releasing our pages. By tracking a seqno for each full +TLB invalidate we can quickly determine if one has been performed since +rewriting the PTE, and only if necessary trigger one for ourselves. + +That helps to reduce the performance regression introduced by TLB +invalidate logic. + +[mchehab: rebased to not require moving the code to a separate file] + +Cc: stable@vger.kernel.org +Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") +Suggested-by: Tvrtko Ursulin +Signed-off-by: Chris Wilson +Cc: Fei Yang +Signed-off-by: Mauro Carvalho Chehab +Acked-by: Tvrtko Ursulin +Reviewed-by: Andi Shyti +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/4e97ef5deb6739cadaaf40aa45620547e9c4ec06.1658924372.git.mchehab@kernel.org +(cherry picked from commit 5d36acb7198b0e5eb88e6b701f9ad7b9448f8df9) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 - + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 +++++---- + drivers/gpu/drm/i915/gt/intel_gt.c | 53 +++++++++++++++++------ + drivers/gpu/drm/i915/gt/intel_gt.h | 12 ++++- + drivers/gpu/drm/i915/gt/intel_gt_types.h | 18 +++++++ + drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 ++- + drivers/gpu/drm/i915/i915_vma.c | 33 +++++++++++--- + drivers/gpu/drm/i915/i915_vma.h | 1 + drivers/gpu/drm/i915/i915_vma_resource.c | 5 +- + drivers/gpu/drm/i915/i915_vma_resource.h | 6 ++ + 10 files changed, 125 insertions(+), 35 deletions(-) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +@@ -335,7 +335,6 @@ struct drm_i915_gem_object { + #define I915_BO_READONLY BIT(7) + #define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ + #define I915_BO_PROTECTED BIT(9) +-#define I915_BO_WAS_BOUND_BIT 10 + /** + * @mem_flags - Mutable placement-related flags + * +@@ -598,6 +597,8 @@ struct drm_i915_gem_object { + * pages were last acquired. + */ + bool dirty:1; ++ ++ u32 tlb; + } mm; + + struct { +--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c +@@ -191,6 +191,18 @@ static void unmap_object(struct drm_i915 + vunmap(ptr); + } + ++static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) ++{ ++ struct drm_i915_private *i915 = to_i915(obj->base.dev); ++ struct intel_gt *gt = to_gt(i915); ++ ++ if (!obj->mm.tlb) ++ return; ++ ++ intel_gt_invalidate_tlb(gt, obj->mm.tlb); ++ obj->mm.tlb = 0; ++} ++ + struct sg_table * + __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) + { +@@ -216,14 +228,7 @@ __i915_gem_object_unset_pages(struct drm + __i915_gem_object_reset_page_iter(obj); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + +- if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { +- struct drm_i915_private *i915 = to_i915(obj->base.dev); +- struct intel_gt *gt = to_gt(i915); +- intel_wakeref_t wakeref; +- +- with_intel_gt_pm_if_awake(gt, wakeref) +- intel_gt_invalidate_tlbs(gt); +- } ++ flush_tlb_invalidate(obj); + + return pages; + } +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -36,8 +36,6 @@ static void __intel_gt_init_early(struct + { + spin_lock_init(>->irq_lock); + +- mutex_init(>->tlb_invalidate_lock); +- + INIT_LIST_HEAD(>->closed_vma); + spin_lock_init(>->closed_lock); + +@@ -48,6 +46,8 @@ static void __intel_gt_init_early(struct + intel_gt_init_reset(gt); + intel_gt_init_requests(gt); + intel_gt_init_timelines(gt); ++ mutex_init(>->tlb.invalidate_lock); ++ seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); + intel_gt_pm_init_early(gt); + + intel_uc_init_early(>->uc); +@@ -833,6 +833,7 @@ void intel_gt_driver_late_release_all(st + intel_gt_fini_requests(gt); + intel_gt_fini_reset(gt); + intel_gt_fini_timelines(gt); ++ mutex_destroy(>->tlb.invalidate_lock); + intel_engines_free(gt); + } + } +@@ -1165,7 +1166,7 @@ get_reg_and_bit(const struct intel_engin + return rb; + } + +-void intel_gt_invalidate_tlbs(struct intel_gt *gt) ++static void mmio_invalidate_full(struct intel_gt *gt) + { + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, +@@ -1188,12 +1189,6 @@ void intel_gt_invalidate_tlbs(struct int + const i915_reg_t *regs; + unsigned int num = 0; + +- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) +- return; +- +- if (intel_gt_is_wedged(gt)) +- return; +- + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); +@@ -1208,9 +1203,6 @@ void intel_gt_invalidate_tlbs(struct int + "Platform does not implement TLB invalidation!")) + return; + +- GEM_TRACE("\n"); +- +- mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ +@@ -1230,6 +1222,8 @@ void intel_gt_invalidate_tlbs(struct int + awake |= engine->mask; + } + ++ GT_TRACE(gt, "invalidated engines %08x\n", awake); ++ + /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ + if (awake && + (IS_TIGERLAKE(i915) || +@@ -1269,5 +1263,38 @@ void intel_gt_invalidate_tlbs(struct int + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); +- mutex_unlock(>->tlb_invalidate_lock); ++} ++ ++static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) ++{ ++ u32 cur = intel_gt_tlb_seqno(gt); ++ ++ /* Only skip if a *full* TLB invalidate barrier has passed */ ++ return (s32)(cur - ALIGN(seqno, 2)) > 0; ++} ++ ++void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) ++{ ++ intel_wakeref_t wakeref; ++ ++ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) ++ return; ++ ++ if (intel_gt_is_wedged(gt)) ++ return; ++ ++ if (tlb_seqno_passed(gt, seqno)) ++ return; ++ ++ with_intel_gt_pm_if_awake(gt, wakeref) { ++ mutex_lock(>->tlb.invalidate_lock); ++ if (tlb_seqno_passed(gt, seqno)) ++ goto unlock; ++ ++ mmio_invalidate_full(gt); ++ ++ write_seqcount_invalidate(>->tlb.seqno); ++unlock: ++ mutex_unlock(>->tlb.invalidate_lock); ++ } + } +--- a/drivers/gpu/drm/i915/gt/intel_gt.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt.h +@@ -123,7 +123,17 @@ void intel_gt_info_print(const struct in + + void intel_gt_watchdog_work(struct work_struct *work); + +-void intel_gt_invalidate_tlbs(struct intel_gt *gt); ++static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) ++{ ++ return seqprop_sequence(>->tlb.seqno); ++} ++ ++static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) ++{ ++ return intel_gt_tlb_seqno(gt) | 1; ++} ++ ++void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno); + + struct resource intel_pci_resource(struct pci_dev *pdev, int bar); + +--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -76,7 +77,22 @@ struct intel_gt { + struct intel_uc uc; + struct intel_gsc gsc; + +- struct mutex tlb_invalidate_lock; ++ struct { ++ /* Serialize global tlb invalidations */ ++ struct mutex invalidate_lock; ++ ++ /* ++ * Batch TLB invalidations ++ * ++ * After unbinding the PTE, we need to ensure the TLB ++ * are invalidated prior to releasing the physical pages. ++ * But we only need one such invalidation for all unbinds, ++ * so we track how many TLB invalidations have been ++ * performed since unbind the PTE and only emit an extra ++ * invalidate if no full barrier has been passed. ++ */ ++ seqcount_mutex_t seqno; ++ } tlb; + + struct i915_wa_list wa_list; + +--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c ++++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c +@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_ + void ppgtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma_resource *vma_res) + { +- if (vma_res->allocated) +- vm->clear_range(vm, vma_res->start, vma_res->vma_size); ++ if (!vma_res->allocated) ++ return; ++ ++ vm->clear_range(vm, vma_res->start, vma_res->vma_size); ++ if (vma_res->tlb) ++ vma_invalidate_tlb(vm, *vma_res->tlb); + } + + static unsigned long pd_count(u64 size, int shift) +--- a/drivers/gpu/drm/i915/i915_vma.c ++++ b/drivers/gpu/drm/i915/i915_vma.c +@@ -537,8 +537,6 @@ int i915_vma_bind(struct i915_vma *vma, + bind_flags); + } + +- set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); +- + atomic_or(bind_flags, &vma->flags); + return 0; + } +@@ -1301,6 +1299,19 @@ err_unpin: + return err; + } + ++void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb) ++{ ++ /* ++ * Before we release the pages that were bound by this vma, we ++ * must invalidate all the TLBs that may still have a reference ++ * back to our physical address. It only needs to be done once, ++ * so after updating the PTE to point away from the pages, record ++ * the most recent TLB invalidation seqno, and if we have not yet ++ * flushed the TLBs upon release, perform a full invalidation. ++ */ ++ WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt)); ++} ++ + static void __vma_put_pages(struct i915_vma *vma, unsigned int count) + { + /* We allocate under vma_get_pages, so beware the shrinker */ +@@ -1927,7 +1938,12 @@ struct dma_fence *__i915_vma_evict(struc + vma->vm->skip_pte_rewrite; + trace_i915_vma_unbind(vma); + +- unbind_fence = i915_vma_resource_unbind(vma_res); ++ if (async) ++ unbind_fence = i915_vma_resource_unbind(vma_res, ++ &vma->obj->mm.tlb); ++ else ++ unbind_fence = i915_vma_resource_unbind(vma_res, NULL); ++ + vma->resource = NULL; + + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE), +@@ -1935,10 +1951,13 @@ struct dma_fence *__i915_vma_evict(struc + + i915_vma_detach(vma); + +- if (!async && unbind_fence) { +- dma_fence_wait(unbind_fence, false); +- dma_fence_put(unbind_fence); +- unbind_fence = NULL; ++ if (!async) { ++ if (unbind_fence) { ++ dma_fence_wait(unbind_fence, false); ++ dma_fence_put(unbind_fence); ++ unbind_fence = NULL; ++ } ++ vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb); + } + + /* +--- a/drivers/gpu/drm/i915/i915_vma.h ++++ b/drivers/gpu/drm/i915/i915_vma.h +@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i91 + u64 size, u64 alignment, u64 flags); + void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); + void i915_vma_revoke_mmap(struct i915_vma *vma); ++void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb); + struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); + int __i915_vma_unbind(struct i915_vma *vma); + int __must_check i915_vma_unbind(struct i915_vma *vma); +--- a/drivers/gpu/drm/i915/i915_vma_resource.c ++++ b/drivers/gpu/drm/i915/i915_vma_resource.c +@@ -223,10 +223,13 @@ i915_vma_resource_fence_notify(struct i9 + * Return: A refcounted pointer to a dma-fence that signals when unbinding is + * complete. + */ +-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res) ++struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, ++ u32 *tlb) + { + struct i915_address_space *vm = vma_res->vm; + ++ vma_res->tlb = tlb; ++ + /* Reference for the sw fence */ + i915_vma_resource_get(vma_res); + +--- a/drivers/gpu/drm/i915/i915_vma_resource.h ++++ b/drivers/gpu/drm/i915/i915_vma_resource.h +@@ -67,6 +67,7 @@ struct i915_page_sizes { + * taken when the unbind is scheduled. + * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting + * needs to be skipped for unbind. ++ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL + * + * The lifetime of a struct i915_vma_resource is from a binding request to + * the actual possible asynchronous unbind has completed. +@@ -119,6 +120,8 @@ struct i915_vma_resource { + bool immediate_unbind:1; + bool needs_wakeref:1; + bool skip_pte_rewrite:1; ++ ++ u32 *tlb; + }; + + bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, +@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resou + + void i915_vma_resource_free(struct i915_vma_resource *vma_res); + +-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res); ++struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, ++ u32 *tlb); + + void __i915_vma_resource_init(struct i915_vma_resource *vma_res); + diff --git a/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch b/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch new file mode 100644 index 00000000000..6c40610063b --- /dev/null +++ b/queue-5.19/drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch @@ -0,0 +1,151 @@ +From db100e28fdf026a1fc10657c5170bb1e65663805 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 27 Jul 2022 14:29:51 +0200 +Subject: drm/i915/gt: Ignore TLB invalidations on idle engines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit db100e28fdf026a1fc10657c5170bb1e65663805 upstream. + +Check if the device is powered down prior to any engine activity, +as, on such cases, all the TLBs were already invalidated, so an +explicit TLB invalidation is not needed, thus reducing the +performance regression impact due to it. + +This becomes more significant with GuC, as it can only do so when +the connection to the GuC is awake. + +Cc: stable@vger.kernel.org +Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") +Signed-off-by: Chris Wilson +Cc: Fei Yang +Reviewed-by: Andi Shyti +Acked-by: Thomas Hellström +Acked-by: Tvrtko Ursulin +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e95da583a5fe.1658924372.git.mchehab@kernel.org +(cherry picked from commit 4bedceaed1ae1172cfe72d3ff752b3a1d32fe4d9) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++++++---- + drivers/gpu/drm/i915/gt/intel_gt.c | 17 ++++++++++------- + drivers/gpu/drm/i915/gt/intel_gt_pm.h | 3 +++ + 3 files changed, 19 insertions(+), 11 deletions(-) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c +@@ -6,14 +6,15 @@ + + #include + ++#include "gt/intel_gt.h" ++#include "gt/intel_gt_pm.h" ++ + #include "i915_drv.h" + #include "i915_gem_object.h" + #include "i915_scatterlist.h" + #include "i915_gem_lmem.h" + #include "i915_gem_mman.h" + +-#include "gt/intel_gt.h" +- + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages, + unsigned int sg_page_sizes) +@@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm + + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); ++ struct intel_gt *gt = to_gt(i915); + intel_wakeref_t wakeref; + +- with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) +- intel_gt_invalidate_tlbs(to_gt(i915)); ++ with_intel_gt_pm_if_awake(gt, wakeref) ++ intel_gt_invalidate_tlbs(gt); + } + + return pages; +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -11,6 +11,7 @@ + + #include "i915_drv.h" + #include "intel_context.h" ++#include "intel_engine_pm.h" + #include "intel_engine_regs.h" + #include "intel_gt.h" + #include "intel_gt_buffer_pool.h" +@@ -1181,6 +1182,7 @@ void intel_gt_invalidate_tlbs(struct int + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; ++ intel_engine_mask_t awake, tmp; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; +@@ -1204,26 +1206,31 @@ void intel_gt_invalidate_tlbs(struct int + + GEM_TRACE("\n"); + +- assert_rpm_wakelock_held(&i915->runtime_pm); +- + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + ++ awake = 0; + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + ++ if (!intel_engine_pm_is_awake(engine)) ++ continue; ++ + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); ++ awake |= engine->mask; + } + + spin_unlock_irq(&uncore->lock); + +- for_each_engine(engine, gt, id) { ++ for_each_engine_masked(engine, gt, awake, tmp) { ++ struct reg_and_bit rb; ++ + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to +@@ -1231,12 +1238,8 @@ void intel_gt_invalidate_tlbs(struct int + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; +- struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); +- if (!i915_mmio_reg_offset(rb.reg)) +- continue; +- + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, +--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h +@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put + for (tmp = 1, intel_gt_pm_get(gt); tmp; \ + intel_gt_pm_put(gt), tmp = 0) + ++#define with_intel_gt_pm_if_awake(gt, wf) \ ++ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) ++ + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) + { + return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch b/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch new file mode 100644 index 00000000000..c88332a5120 --- /dev/null +++ b/queue-5.19/drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch @@ -0,0 +1,59 @@ +From 180abeb2c5032704787151135b6a38c6b71295a6 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 27 Jul 2022 14:29:53 +0200 +Subject: drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit 180abeb2c5032704787151135b6a38c6b71295a6 upstream. + +Ensure that the TLB of the OA unit is also invalidated +on gen12 HW, as just invalidating the TLB of an engine is not +enough. + +Cc: stable@vger.kernel.org +Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") +Signed-off-by: Chris Wilson +Cc: Fei Yang +Reviewed-by: Andi Shyti +Acked-by: Tvrtko Ursulin +Acked-by: Thomas Hellström +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/59724d9f5cf1e93b1620d01b8332ac991555283d.1658924372.git.mchehab@kernel.org +(cherry picked from commit dfc83de118ff7930acc9a4c8dfdba7c153aa44d6) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_gt.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -10,6 +10,7 @@ + #include "pxp/intel_pxp.h" + + #include "i915_drv.h" ++#include "i915_perf_oa_regs.h" + #include "intel_context.h" + #include "intel_engine_pm.h" + #include "intel_engine_regs.h" +@@ -1226,6 +1227,15 @@ void intel_gt_invalidate_tlbs(struct int + awake |= engine->mask; + } + ++ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ ++ if (awake && ++ (IS_TIGERLAKE(i915) || ++ IS_DG1(i915) || ++ IS_ROCKETLAKE(i915) || ++ IS_ALDERLAKE_S(i915) || ++ IS_ALDERLAKE_P(i915))) ++ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); ++ + spin_unlock_irq(&uncore->lock); + + for_each_engine_masked(engine, gt, awake, tmp) { diff --git a/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch b/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch new file mode 100644 index 00000000000..18669c3f65b --- /dev/null +++ b/queue-5.19/drm-i915-gt-skip-tlb-invalidations-once-wedged.patch @@ -0,0 +1,50 @@ +From e5a95c83ed1492c0f442b448b20c90c8faaf702b Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 27 Jul 2022 14:29:54 +0200 +Subject: drm/i915/gt: Skip TLB invalidations once wedged +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit e5a95c83ed1492c0f442b448b20c90c8faaf702b upstream. + +Skip all further TLB invalidations once the device is wedged and +had been reset, as, on such cases, it can no longer process instructions +on the GPU and the user no longer has access to the TLB's in each engine. + +So, an attempt to do a TLB cache invalidation will produce a timeout. + +That helps to reduce the performance regression introduced by TLB +invalidate logic. + +Cc: stable@vger.kernel.org +Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") +Signed-off-by: Chris Wilson +Cc: Fei Yang +Cc: Tvrtko Ursulin +Reviewed-by: Andi Shyti +Acked-by: Thomas Hellström +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/5aa86564b9ec5fe7fe605c1dd7de76855401ed73.1658924372.git.mchehab@kernel.org +(cherry picked from commit be0366f168033374a93e4c43fdaa1a90ab905184) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_gt.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -1191,6 +1191,9 @@ void intel_gt_invalidate_tlbs(struct int + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + ++ if (intel_gt_is_wedged(gt)) ++ return; ++ + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); diff --git a/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch b/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch new file mode 100644 index 00000000000..0b8cc0b3ce0 --- /dev/null +++ b/queue-5.19/drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch @@ -0,0 +1,79 @@ +From 9d50bff40e3e366886ec37299fc317edf84be0c9 Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Thu, 4 Aug 2022 09:37:22 +0200 +Subject: drm/i915: pass a pointer for tlb seqno at vma_invalidate_tlb() + +From: Mauro Carvalho Chehab + +commit 9d50bff40e3e366886ec37299fc317edf84be0c9 upstream. + +WRITE_ONCE() should happen at the original var, not on a local +copy of it. + +Cc: stable@vger.kernel.org +Fixes: 59eda6ce824e ("drm/i915/gt: Batch TLB invalidations") +Signed-off-by: Mauro Carvalho Chehab +Reviewed-by: Andi Shyti +Signed-off-by: Rodrigo Vivi +[added cc-stable while merging it] +Link: https://patchwork.freedesktop.org/patch/msgid/f9550e6bacea10131ff40dd8981b69eb9251cdcd.1659598090.git.mchehab@kernel.org +(cherry picked from commit 3d037d99e61a1e7a3ae3d214146d88db349dd19f) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gt/intel_ppgtt.c | 2 +- + drivers/gpu/drm/i915/i915_vma.c | 6 +++--- + drivers/gpu/drm/i915/i915_vma.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c ++++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c +@@ -211,7 +211,7 @@ void ppgtt_unbind_vma(struct i915_addres + + vm->clear_range(vm, vma_res->start, vma_res->vma_size); + if (vma_res->tlb) +- vma_invalidate_tlb(vm, *vma_res->tlb); ++ vma_invalidate_tlb(vm, vma_res->tlb); + } + + static unsigned long pd_count(u64 size, int shift) +--- a/drivers/gpu/drm/i915/i915_vma.c ++++ b/drivers/gpu/drm/i915/i915_vma.c +@@ -1299,7 +1299,7 @@ err_unpin: + return err; + } + +-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb) ++void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) + { + /* + * Before we release the pages that were bound by this vma, we +@@ -1309,7 +1309,7 @@ void vma_invalidate_tlb(struct i915_addr + * the most recent TLB invalidation seqno, and if we have not yet + * flushed the TLBs upon release, perform a full invalidation. + */ +- WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt)); ++ WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt)); + } + + static void __vma_put_pages(struct i915_vma *vma, unsigned int count) +@@ -1957,7 +1957,7 @@ struct dma_fence *__i915_vma_evict(struc + dma_fence_put(unbind_fence); + unbind_fence = NULL; + } +- vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb); ++ vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb); + } + + /* +--- a/drivers/gpu/drm/i915/i915_vma.h ++++ b/drivers/gpu/drm/i915/i915_vma.h +@@ -213,7 +213,7 @@ bool i915_vma_misplaced(const struct i91 + u64 size, u64 alignment, u64 flags); + void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); + void i915_vma_revoke_mmap(struct i915_vma *vma); +-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb); ++void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb); + struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); + int __i915_vma_unbind(struct i915_vma *vma); + int __must_check i915_vma_unbind(struct i915_vma *vma); diff --git a/queue-5.19/series b/queue-5.19/series index 3956d33fbe4..165183755a3 100644 --- a/queue-5.19/series +++ b/queue-5.19/series @@ -19,3 +19,9 @@ btrfs-unset-reloc-control-if-transaction-commit-fails-in-prepare_to_relocate.pat btrfs-reset-ro-counter-on-block-group-if-we-fail-to-relocate.patch btrfs-fix-lost-error-handling-when-looking-up-extended-ref-on-log-replay.patch btrfs-fix-warning-during-log-replay-when-bumping-inode-link-count.patch +drm-amdgpu-change-vram-width-algorithm-for-vram_info-v3_0.patch +drm-i915-gt-ignore-tlb-invalidations-on-idle-engines.patch +drm-i915-gt-invalidate-tlb-of-the-oa-unit-at-tlb-invalidations.patch +drm-i915-gt-skip-tlb-invalidations-once-wedged.patch +drm-i915-gt-batch-tlb-invalidations.patch +drm-i915-pass-a-pointer-for-tlb-seqno-at-vma_invalidate_tlb.patch