From e9702191c460932637ace0541f760facef12e91c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jan 2022 16:00:43 +0100 Subject: [PATCH] 5.15-stable patches added patches: drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch drm-i915-flush-tlbs-before-releasing-backing-store.patch --- ...-reset-dcn31-smu-mailbox-on-failures.patch | 36 ++ ...-tlbs-before-releasing-backing-store.patch | 330 ++++++++++++++++++ queue-5.15/series | 2 + 3 files changed, 368 insertions(+) create mode 100644 queue-5.15/drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch create mode 100644 queue-5.15/drm-i915-flush-tlbs-before-releasing-backing-store.patch create mode 100644 queue-5.15/series diff --git a/queue-5.15/drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch b/queue-5.15/drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch new file mode 100644 index 00000000000..9b2a961a08e --- /dev/null +++ b/queue-5.15/drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch @@ -0,0 +1,36 @@ +From 83293f7f3d15fc56e86bd5067a2c88b6b233ac3a Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Fri, 7 Jan 2022 15:40:10 -0600 +Subject: drm/amd/display: reset dcn31 SMU mailbox on failures + +From: Mario Limonciello + +commit 83293f7f3d15fc56e86bd5067a2c88b6b233ac3a upstream. + +Otherwise future commands may fail as well leading to downstream +problems that look like they stemmed from a timeout the first time +but really didn't. + +Signed-off-by: Mario Limonciello +Reviewed-by: Nicholas Kazlauskas +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +@@ -119,6 +119,12 @@ int dcn31_smu_send_msg_with_param( + + result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000); + ++ if (result == VBIOSSMC_Result_Failed) { ++ ASSERT(0); ++ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK); ++ return -1; ++ } ++ + if (IS_SMU_TIMEOUT(result)) { + ASSERT(0); + dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000); diff --git a/queue-5.15/drm-i915-flush-tlbs-before-releasing-backing-store.patch b/queue-5.15/drm-i915-flush-tlbs-before-releasing-backing-store.patch new file mode 100644 index 00000000000..f13f028bac4 --- /dev/null +++ b/queue-5.15/drm-i915-flush-tlbs-before-releasing-backing-store.patch @@ -0,0 +1,330 @@ +From 7938d61591d33394a21bdd7797a245b65428f44c Mon Sep 17 00:00:00 2001 +From: Tvrtko Ursulin +Date: Tue, 19 Oct 2021 13:27:10 +0100 +Subject: drm/i915: Flush TLBs before releasing backing store + +From: Tvrtko Ursulin + +commit 7938d61591d33394a21bdd7797a245b65428f44c upstream. + +We need to flush TLBs before releasing backing store otherwise userspace +is able to encounter stale entries if a) it is not declaring access to +certain buffers and b) it races with the backing store release from a +such undeclared execution already executing on the GPU in parallel. + +The approach taken is to mark any buffer objects which were ever bound +to the GPU and to trigger a serialized TLB flush when their backing +store is released. + +Alternatively the flushing could be done on VMA unbind, at which point +we would be able to ascertain whether there is potential a parallel GPU +execution (which could race), but essentially it boils down to paying +the cost of TLB flushes potentially needlessly at VMA unbind time (when +the backing store is not known to be going away so not needed for +safety), versus potentially needlessly at backing store relase time +(since we at that point cannot tell whether there is anything executing +on the GPU which uses that object). + +Thereforce simplicity of implementation has been chosen for now with +scope to benchmark and refine later as required. + +Signed-off-by: Tvrtko Ursulin +Reported-by: Sushma Venkatesh Reddy +Reviewed-by: Daniel Vetter +Acked-by: Dave Airlie +Cc: Daniel Vetter +Cc: Jon Bloomfield +Cc: Joonas Lahtinen +Cc: Jani Nikula +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++ + drivers/gpu/drm/i915/gt/intel_gt.c | 102 +++++++++++++++++++++++ + drivers/gpu/drm/i915/gt/intel_gt.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 11 ++ + drivers/gpu/drm/i915/i915_vma.c | 3 + drivers/gpu/drm/i915/intel_uncore.c | 26 ++++- + drivers/gpu/drm/i915/intel_uncore.h | 2 + 9 files changed, 155 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +@@ -298,6 +298,7 @@ struct drm_i915_gem_object { + I915_BO_ALLOC_USER) + #define I915_BO_READONLY BIT(4) + #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ ++#define I915_BO_WAS_BOUND_BIT 6 + + /** + * @mem_flags - Mutable placement-related flags +--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c +@@ -10,6 +10,8 @@ + #include "i915_gem_lmem.h" + #include "i915_gem_mman.h" + ++#include "gt/intel_gt.h" ++ + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages, + unsigned int sg_page_sizes) +@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm + __i915_gem_object_reset_page_iter(obj); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + ++ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { ++ struct drm_i915_private *i915 = to_i915(obj->base.dev); ++ intel_wakeref_t wakeref; ++ ++ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) ++ intel_gt_invalidate_tlbs(&i915->gt); ++ } ++ + return pages; + } + +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt + + spin_lock_init(>->irq_lock); + ++ mutex_init(>->tlb_invalidate_lock); ++ + INIT_LIST_HEAD(>->closed_vma); + spin_lock_init(>->closed_lock); + +@@ -895,3 +897,103 @@ void intel_gt_info_print(const struct in + + intel_sseu_dump(&info->sseu, p); + } ++ ++struct reg_and_bit { ++ i915_reg_t reg; ++ u32 bit; ++}; ++ ++static struct reg_and_bit ++get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, ++ const i915_reg_t *regs, const unsigned int num) ++{ ++ const unsigned int class = engine->class; ++ struct reg_and_bit rb = { }; ++ ++ if (drm_WARN_ON_ONCE(&engine->i915->drm, ++ class >= num || !regs[class].reg)) ++ return rb; ++ ++ rb.reg = regs[class]; ++ if (gen8 && class == VIDEO_DECODE_CLASS) ++ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ ++ else ++ rb.bit = engine->instance; ++ ++ rb.bit = BIT(rb.bit); ++ ++ return rb; ++} ++ ++void intel_gt_invalidate_tlbs(struct intel_gt *gt) ++{ ++ static const i915_reg_t gen8_regs[] = { ++ [RENDER_CLASS] = GEN8_RTCR, ++ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ ++ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, ++ [COPY_ENGINE_CLASS] = GEN8_BTCR, ++ }; ++ static const i915_reg_t gen12_regs[] = { ++ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, ++ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, ++ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, ++ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, ++ }; ++ struct drm_i915_private *i915 = gt->i915; ++ struct intel_uncore *uncore = gt->uncore; ++ struct intel_engine_cs *engine; ++ enum intel_engine_id id; ++ const i915_reg_t *regs; ++ unsigned int num = 0; ++ ++ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) ++ return; ++ ++ if (GRAPHICS_VER(i915) == 12) { ++ regs = gen12_regs; ++ num = ARRAY_SIZE(gen12_regs); ++ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { ++ regs = gen8_regs; ++ num = ARRAY_SIZE(gen8_regs); ++ } else if (GRAPHICS_VER(i915) < 8) { ++ return; ++ } ++ ++ if (drm_WARN_ONCE(&i915->drm, !num, ++ "Platform does not implement TLB invalidation!")) ++ return; ++ ++ GEM_TRACE("\n"); ++ ++ assert_rpm_wakelock_held(&i915->runtime_pm); ++ ++ mutex_lock(>->tlb_invalidate_lock); ++ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); ++ ++ for_each_engine(engine, gt, id) { ++ /* ++ * HW architecture suggest typical invalidation time at 40us, ++ * with pessimistic cases up to 100us and a recommendation to ++ * cap at 1ms. We go a bit higher just in case. ++ */ ++ const unsigned int timeout_us = 100; ++ const unsigned int timeout_ms = 4; ++ struct reg_and_bit rb; ++ ++ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); ++ if (!i915_mmio_reg_offset(rb.reg)) ++ continue; ++ ++ intel_uncore_write_fw(uncore, rb.reg, rb.bit); ++ if (__intel_wait_for_register_fw(uncore, ++ rb.reg, rb.bit, 0, ++ timeout_us, timeout_ms, ++ NULL)) ++ drm_err_ratelimited(>->i915->drm, ++ "%s TLB invalidation did not complete in %ums!\n", ++ engine->name, timeout_ms); ++ } ++ ++ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); ++ mutex_unlock(>->tlb_invalidate_lock); ++} +--- a/drivers/gpu/drm/i915/gt/intel_gt.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt.h +@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct in + + void intel_gt_watchdog_work(struct work_struct *work); + ++void intel_gt_invalidate_tlbs(struct intel_gt *gt); ++ + #endif /* __INTEL_GT_H__ */ +--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h +@@ -72,6 +72,8 @@ struct intel_gt { + + struct intel_uc uc; + ++ struct mutex tlb_invalidate_lock; ++ + struct intel_gt_timelines { + spinlock_t lock; /* protects active_list */ + struct list_head active_list; +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -2669,6 +2669,12 @@ static inline bool i915_mmio_reg_valid(i + #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) + #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) + ++#define GEN8_RTCR _MMIO(0x4260) ++#define GEN8_M1TCR _MMIO(0x4264) ++#define GEN8_M2TCR _MMIO(0x4268) ++#define GEN8_BTCR _MMIO(0x426c) ++#define GEN8_VTCR _MMIO(0x4270) ++ + #if 0 + #define PRB0_TAIL _MMIO(0x2030) + #define PRB0_HEAD _MMIO(0x2034) +@@ -2763,6 +2769,11 @@ static inline bool i915_mmio_reg_valid(i + #define FAULT_VA_HIGH_BITS (0xf << 0) + #define FAULT_GTT_SEL (1 << 4) + ++#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) ++#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) ++#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) ++#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) ++ + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) + + #define FPGA_DBG _MMIO(0x42300) +--- a/drivers/gpu/drm/i915/i915_vma.c ++++ b/drivers/gpu/drm/i915/i915_vma.c +@@ -434,6 +434,9 @@ int i915_vma_bind(struct i915_vma *vma, + vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); + } + ++ if (vma->obj) ++ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); ++ + atomic_or(bind_flags, &vma->flags); + return 0; + } +--- a/drivers/gpu/drm/i915/intel_uncore.c ++++ b/drivers/gpu/drm/i915/intel_uncore.c +@@ -718,7 +718,8 @@ void intel_uncore_forcewake_get__locked( + } + + static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, +- enum forcewake_domains fw_domains) ++ enum forcewake_domains fw_domains, ++ bool delayed) + { + struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; +@@ -733,7 +734,11 @@ static void __intel_uncore_forcewake_put + continue; + } + +- uncore->funcs.force_wake_put(uncore, domain->mask); ++ if (delayed && ++ !(domain->uncore->fw_domains_timer & domain->mask)) ++ fw_domain_arm_timer(domain); ++ else ++ uncore->funcs.force_wake_put(uncore, domain->mask); + } + } + +@@ -754,7 +759,20 @@ void intel_uncore_forcewake_put(struct i + return; + + spin_lock_irqsave(&uncore->lock, irqflags); +- __intel_uncore_forcewake_put(uncore, fw_domains); ++ __intel_uncore_forcewake_put(uncore, fw_domains, false); ++ spin_unlock_irqrestore(&uncore->lock, irqflags); ++} ++ ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, ++ enum forcewake_domains fw_domains) ++{ ++ unsigned long irqflags; ++ ++ if (!uncore->funcs.force_wake_put) ++ return; ++ ++ spin_lock_irqsave(&uncore->lock, irqflags); ++ __intel_uncore_forcewake_put(uncore, fw_domains, true); + spin_unlock_irqrestore(&uncore->lock, irqflags); + } + +@@ -796,7 +814,7 @@ void intel_uncore_forcewake_put__locked( + if (!uncore->funcs.force_wake_put) + return; + +- __intel_uncore_forcewake_put(uncore, fw_domains); ++ __intel_uncore_forcewake_put(uncore, fw_domains, false); + } + + void assert_forcewakes_inactive(struct intel_uncore *uncore) +--- a/drivers/gpu/drm/i915/intel_uncore.h ++++ b/drivers/gpu/drm/i915/intel_uncore.h +@@ -229,6 +229,8 @@ void intel_uncore_forcewake_get(struct i + enum forcewake_domains domains); + void intel_uncore_forcewake_put(struct intel_uncore *uncore, + enum forcewake_domains domains); ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, ++ enum forcewake_domains domains); + void intel_uncore_forcewake_flush(struct intel_uncore *uncore, + enum forcewake_domains fw_domains); + diff --git a/queue-5.15/series b/queue-5.15/series new file mode 100644 index 00000000000..638a593af76 --- /dev/null +++ b/queue-5.15/series @@ -0,0 +1,2 @@ +drm-i915-flush-tlbs-before-releasing-backing-store.patch +drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch -- 2.47.2