From a33a18a5fbf9d8fab42771a06abd630ede09dbdf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 18 Nov 2024 09:35:45 -0500 Subject: [PATCH] Fixes for 6.11 Signed-off-by: Sasha Levin --- .../drm-xe-improve-hibernation-on-igpu.patch | 162 +++++++++++++ ...-restore-system-memory-ggtt-mappings.patch | 97 ++++++++ ..._calc_vm_flag_bits-and-arm64-mte-han.patch | 218 ++++++++++++++++++ queue-6.11/series | 3 + 4 files changed, 480 insertions(+) create mode 100644 queue-6.11/drm-xe-improve-hibernation-on-igpu.patch create mode 100644 queue-6.11/drm-xe-restore-system-memory-ggtt-mappings.patch create mode 100644 queue-6.11/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-han.patch diff --git a/queue-6.11/drm-xe-improve-hibernation-on-igpu.patch b/queue-6.11/drm-xe-improve-hibernation-on-igpu.patch new file mode 100644 index 00000000000..0b29f088057 --- /dev/null +++ b/queue-6.11/drm-xe-improve-hibernation-on-igpu.patch @@ -0,0 +1,162 @@ +From 60b68883bbe2e15d4d65fd991a0ddf73ba358797 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Nov 2024 17:01:57 +0000 +Subject: drm/xe: improve hibernation on igpu + +From: Matthew Auld + +[ Upstream commit 46f1f4b0f3c2a2dff9887de7c66ccc7ef482bd83 ] + +The GGTT looks to be stored inside stolen memory on igpu which is not +treated as normal RAM. The core kernel skips this memory range when +creating the hibernation image, therefore when coming back from +hibernation the GGTT programming is lost. This seems to cause issues +with broken resume where GuC FW fails to load: + +[drm] *ERROR* GT0: load failed: status = 0x400000A0, time = 10ms, freq = 1250MHz (req 1300MHz), done = -1 +[drm] *ERROR* GT0: load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01 +[drm] *ERROR* GT0: firmware signature verification failed +[drm] *ERROR* CRITICAL: Xe has declared device 0000:00:02.0 as wedged. + +Current GGTT users are kernel internal and tracked as pinned, so it +should be possible to hook into the existing save/restore logic that we +use for dgpu, where the actual evict is skipped but on restore we +importantly restore the GGTT programming. This has been confirmed to +fix hibernation on at least ADL and MTL, though likely all igpu +platforms are affected. + +This also means we have a hole in our testing, where the existing s4 +tests only really test the driver hooks, and don't go as far as actually +rebooting and restoring from the hibernation image and in turn powering +down RAM (and therefore losing the contents of stolen). + +v2 (Brost) + - Remove extra newline and drop unnecessary parentheses. + +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3275 +Signed-off-by: Matthew Auld +Cc: Matthew Brost +Cc: # v6.8+ +Reviewed-by: Matthew Brost +Reviewed-by: Lucas De Marchi +Signed-off-by: Matthew Brost +Link: https://patchwork.freedesktop.org/patch/msgid/20241101170156.213490-2-matthew.auld@intel.com +(cherry picked from commit f2a6b8e396666d97ada8e8759dfb6a69d8df6380) +Signed-off-by: Lucas De Marchi +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_bo.c | 37 ++++++++++++++------------------ + drivers/gpu/drm/xe/xe_bo_evict.c | 6 ------ + 2 files changed, 16 insertions(+), 27 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c +index c096e5c06f726..9a01babe679c9 100644 +--- a/drivers/gpu/drm/xe/xe_bo.c ++++ b/drivers/gpu/drm/xe/xe_bo.c +@@ -931,7 +931,10 @@ int xe_bo_restore_pinned(struct xe_bo *bo) + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + +- if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) ++ if (WARN_ON(xe_bo_is_vram(bo))) ++ return -EINVAL; ++ ++ if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo))) + return -EINVAL; + + if (!mem_type_is_vram(place->mem_type)) +@@ -1706,6 +1709,7 @@ int xe_bo_pin_external(struct xe_bo *bo) + + int xe_bo_pin(struct xe_bo *bo) + { ++ struct ttm_place *place = &bo->placements[0]; + struct xe_device *xe = xe_bo_device(bo); + int err; + +@@ -1736,8 +1740,6 @@ int xe_bo_pin(struct xe_bo *bo) + */ + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { +- struct ttm_place *place = &(bo->placements[0]); +- + if (mem_type_is_vram(place->mem_type)) { + xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); + +@@ -1745,13 +1747,12 @@ int xe_bo_pin(struct xe_bo *bo) + vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + } ++ } + +- if (mem_type_is_vram(place->mem_type) || +- bo->flags & XE_BO_FLAG_GGTT) { +- spin_lock(&xe->pinned.lock); +- list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); +- spin_unlock(&xe->pinned.lock); +- } ++ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { ++ spin_lock(&xe->pinned.lock); ++ list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); ++ spin_unlock(&xe->pinned.lock); + } + + ttm_bo_pin(&bo->ttm); +@@ -1799,24 +1800,18 @@ void xe_bo_unpin_external(struct xe_bo *bo) + + void xe_bo_unpin(struct xe_bo *bo) + { ++ struct ttm_place *place = &bo->placements[0]; + struct xe_device *xe = xe_bo_device(bo); + + xe_assert(xe, !bo->ttm.base.import_attach); + xe_assert(xe, xe_bo_is_pinned(bo)); + +- if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && +- bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { +- struct ttm_place *place = &(bo->placements[0]); +- +- if (mem_type_is_vram(place->mem_type) || +- bo->flags & XE_BO_FLAG_GGTT) { +- spin_lock(&xe->pinned.lock); +- xe_assert(xe, !list_empty(&bo->pinned_link)); +- list_del_init(&bo->pinned_link); +- spin_unlock(&xe->pinned.lock); +- } ++ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { ++ spin_lock(&xe->pinned.lock); ++ xe_assert(xe, !list_empty(&bo->pinned_link)); ++ list_del_init(&bo->pinned_link); ++ spin_unlock(&xe->pinned.lock); + } +- + ttm_bo_unpin(&bo->ttm); + } + +diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c +index ef1950ab2c1d8..8fb2be0610035 100644 +--- a/drivers/gpu/drm/xe/xe_bo_evict.c ++++ b/drivers/gpu/drm/xe/xe_bo_evict.c +@@ -34,9 +34,6 @@ int xe_bo_evict_all(struct xe_device *xe) + u8 id; + int ret; + +- if (!IS_DGFX(xe)) +- return 0; +- + /* User memory */ + for (mem_type = XE_PL_TT; mem_type <= XE_PL_VRAM1; ++mem_type) { + struct ttm_resource_manager *man = +@@ -136,9 +133,6 @@ int xe_bo_restore_kernel(struct xe_device *xe) + struct xe_bo *bo; + int ret; + +- if (!IS_DGFX(xe)) +- return 0; +- + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.evicted, +-- +2.43.0 + diff --git a/queue-6.11/drm-xe-restore-system-memory-ggtt-mappings.patch b/queue-6.11/drm-xe-restore-system-memory-ggtt-mappings.patch new file mode 100644 index 00000000000..95ad1bea663 --- /dev/null +++ b/queue-6.11/drm-xe-restore-system-memory-ggtt-mappings.patch @@ -0,0 +1,97 @@ +From d56ecf5a9890635c69549beb23d0a4a02d52356b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 31 Oct 2024 11:22:57 -0700 +Subject: drm/xe: Restore system memory GGTT mappings + +From: Matthew Brost + +[ Upstream commit dd886a63d6e2ce5c16e662c07547c067ad7d91f5 ] + +GGTT mappings reside on the device and this state is lost during suspend +/ d3cold thus this state must be restored resume regardless if the BO is +in system memory or VRAM. + +v2: + - Unnecessary parentheses around bo->placements[0] (Checkpatch) + +Signed-off-by: Matthew Brost +Reviewed-by: Matthew Auld +Link: https://patchwork.freedesktop.org/patch/msgid/20241031182257.2949579-1-matthew.brost@intel.com +(cherry picked from commit a19d1db9a3fa89fabd7c83544b84f393ee9b851f) +Signed-off-by: Lucas De Marchi +Stable-dep-of: 46f1f4b0f3c2 ("drm/xe: improve hibernation on igpu") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_bo.c | 14 +++++++++++--- + drivers/gpu/drm/xe/xe_bo_evict.c | 1 - + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c +index e147ef1d0578f..c096e5c06f726 100644 +--- a/drivers/gpu/drm/xe/xe_bo.c ++++ b/drivers/gpu/drm/xe/xe_bo.c +@@ -869,8 +869,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo) + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + +- if (WARN_ON(!xe_bo_is_vram(bo))) +- return -EINVAL; ++ if (!xe_bo_is_vram(bo)) ++ return 0; + + ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); + if (ret) +@@ -920,6 +920,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) + .interruptible = false, + }; + struct ttm_resource *new_mem; ++ struct ttm_place *place = &bo->placements[0]; + int ret; + + xe_bo_assert_held(bo); +@@ -933,6 +934,9 @@ int xe_bo_restore_pinned(struct xe_bo *bo) + if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) + return -EINVAL; + ++ if (!mem_type_is_vram(place->mem_type)) ++ return 0; ++ + ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); + if (ret) + return ret; +@@ -1740,7 +1744,10 @@ int xe_bo_pin(struct xe_bo *bo) + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - + vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); ++ } + ++ if (mem_type_is_vram(place->mem_type) || ++ bo->flags & XE_BO_FLAG_GGTT) { + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); +@@ -1801,7 +1808,8 @@ void xe_bo_unpin(struct xe_bo *bo) + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { + struct ttm_place *place = &(bo->placements[0]); + +- if (mem_type_is_vram(place->mem_type)) { ++ if (mem_type_is_vram(place->mem_type) || ++ bo->flags & XE_BO_FLAG_GGTT) { + spin_lock(&xe->pinned.lock); + xe_assert(xe, !list_empty(&bo->pinned_link)); + list_del_init(&bo->pinned_link); +diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c +index c202197efbe05..ef1950ab2c1d8 100644 +--- a/drivers/gpu/drm/xe/xe_bo_evict.c ++++ b/drivers/gpu/drm/xe/xe_bo_evict.c +@@ -170,7 +170,6 @@ int xe_bo_restore_kernel(struct xe_device *xe) + * should setup the iosys map. + */ + xe_assert(xe, !iosys_map_is_null(&bo->vmap)); +- xe_assert(xe, xe_bo_is_vram(bo)); + + xe_bo_put(bo); + +-- +2.43.0 + diff --git a/queue-6.11/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-han.patch b/queue-6.11/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-han.patch new file mode 100644 index 00000000000..dc75234f3e8 --- /dev/null +++ b/queue-6.11/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-han.patch @@ -0,0 +1,218 @@ +From 9f5efc1137ba5e6292b53fb718292b173018889d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Oct 2024 18:11:47 +0000 +Subject: mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling + +From: Lorenzo Stoakes + +[ Upstream commit 5baf8b037debf4ec60108ccfeccb8636d1dbad81 ] + +Currently MTE is permitted in two circumstances (desiring to use MTE +having been specified by the VM_MTE flag) - where MAP_ANONYMOUS is +specified, as checked by arch_calc_vm_flag_bits() and actualised by +setting the VM_MTE_ALLOWED flag, or if the file backing the mapping is +shmem, in which case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap +hook is activated in mmap_region(). + +The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also +set is the arm64 implementation of arch_validate_flags(). + +Unfortunately, we intend to refactor mmap_region() to perform this check +earlier, meaning that in the case of a shmem backing we will not have +invoked shmem_mmap() yet, causing the mapping to fail spuriously. + +It is inappropriate to set this architecture-specific flag in general mm +code anyway, so a sensible resolution of this issue is to instead move the +check somewhere else. + +We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via +the arch_calc_vm_flag_bits() call. + +This is an appropriate place to do this as we already check for the +MAP_ANONYMOUS case here, and the shmem file case is simply a variant of +the same idea - we permit RAM-backed memory. + +This requires a modification to the arch_calc_vm_flag_bits() signature to +pass in a pointer to the struct file associated with the mapping, however +this is not too egregious as this is only used by two architectures anyway +- arm64 and parisc. + +So this patch performs this adjustment and removes the unnecessary +assignment of VM_MTE_ALLOWED in shmem_mmap(). + +[akpm@linux-foundation.org: fix whitespace, per Catalin] +Link: https://lkml.kernel.org/r/ec251b20ba1964fb64cf1607d2ad80c47f3873df.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Suggested-by: Catalin Marinas +Reported-by: Jann Horn +Reviewed-by: Catalin Marinas +Reviewed-by: Vlastimil Babka +Cc: Andreas Larsson +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Liam R. Howlett +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/mman.h | 10 +++++++--- + arch/parisc/include/asm/mman.h | 5 +++-- + include/linux/mman.h | 7 ++++--- + mm/mmap.c | 2 +- + mm/nommu.c | 2 +- + mm/shmem.c | 3 --- + 6 files changed, 16 insertions(+), 13 deletions(-) + +diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h +index 5966ee4a61542..ef35c52aabd66 100644 +--- a/arch/arm64/include/asm/mman.h ++++ b/arch/arm64/include/asm/mman.h +@@ -3,6 +3,8 @@ + #define __ASM_MMAN_H__ + + #include ++#include ++#include + #include + #include + +@@ -21,19 +23,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + } + #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) ++static inline unsigned long arch_calc_vm_flag_bits(struct file *file, ++ unsigned long flags) + { + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ +- if (system_supports_mte() && (flags & MAP_ANONYMOUS)) ++ if (system_supports_mte() && ++ ((flags & MAP_ANONYMOUS) || shmem_file(file))) + return VM_MTE_ALLOWED; + + return 0; + } +-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) ++#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) + + static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h +index 89b6beeda0b86..663f587dc7896 100644 +--- a/arch/parisc/include/asm/mman.h ++++ b/arch/parisc/include/asm/mman.h +@@ -2,6 +2,7 @@ + #ifndef __ASM_MMAN_H__ + #define __ASM_MMAN_H__ + ++#include + #include + + /* PARISC cannot allow mdwe as it needs writable stacks */ +@@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void) + } + #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) ++static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags) + { + /* + * The stack on parisc grows upwards, so if userspace requests memory +@@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) + + return 0; + } +-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) ++#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) + + #endif /* __ASM_MMAN_H__ */ +diff --git a/include/linux/mman.h b/include/linux/mman.h +index bcb201ab7a412..c274870343155 100644 +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -2,6 +2,7 @@ + #ifndef _LINUX_MMAN_H + #define _LINUX_MMAN_H + ++#include + #include + #include + +@@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages) + #endif + + #ifndef arch_calc_vm_flag_bits +-#define arch_calc_vm_flag_bits(flags) 0 ++#define arch_calc_vm_flag_bits(file, flags) 0 + #endif + + #ifndef arch_validate_prot +@@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) + * Combine the mmap "flags" argument into "vm_flags" used internally. + */ + static inline unsigned long +-calc_vm_flag_bits(unsigned long flags) ++calc_vm_flag_bits(struct file *file, unsigned long flags) + { + return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +- arch_calc_vm_flag_bits(flags); ++ arch_calc_vm_flag_bits(file, flags); + } + + unsigned long vm_commit_limit(void); +diff --git a/mm/mmap.c b/mm/mmap.c +index 8a04f29aa4230..ccebd17fb48f6 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1316,7 +1316,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, + * to. we assume access permissions have been handled by the open + * of the memory object, so we don't do any here. + */ +- vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | ++ vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | + mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + /* Obtain the address to map to. we verify (or select) it and ensure +diff --git a/mm/nommu.c b/mm/nommu.c +index 7e018da8574ed..50100b909187a 100644 +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -838,7 +838,7 @@ static unsigned long determine_vm_flags(struct file *file, + { + unsigned long vm_flags; + +- vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); ++ vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); + + if (!file) { + /* +diff --git a/mm/shmem.c b/mm/shmem.c +index 27f496d6e43eb..67f2ae6a8f0f3 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2597,9 +2597,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) + if (ret) + return ret; + +- /* arm64 - allow memory tagging on RAM-based files */ +- vm_flags_set(vma, VM_MTE_ALLOWED); +- + file_accessed(file); + /* This is anonymous shared memory if it is unlinked at the time of mmap */ + if (inode->i_nlink) +-- +2.43.0 + diff --git a/queue-6.11/series b/queue-6.11/series index 4267cdb7196..b3e5fe0f215 100644 --- a/queue-6.11/series +++ b/queue-6.11/series @@ -100,3 +100,6 @@ drm-amd-display-require-minimum-vblank-size-for-stutter-optimization.patch drm-amd-display-handle-dml-allocation-failure-to-avoid-crash.patch drm-amd-display-fix-failure-to-read-vram-info-due-to-static-bp_result.patch mm-gup-avoid-an-unnecessary-allocation-call-for-foll_longterm-cases.patch +mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-han.patch +drm-xe-restore-system-memory-ggtt-mappings.patch +drm-xe-improve-hibernation-on-igpu.patch -- 2.47.2