From 7ce2167347475bde9adccf2ed830332622b4d61c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Nov 2024 15:25:45 +0100 Subject: [PATCH] 6.6-stable patches added patches: mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch mm-refactor-map_deny_write_exec.patch mm-resolve-faulty-mmap_region-error-path-behaviour.patch mm-unconditionally-close-vmas-on-error.patch --- ...ation-when-error-arises-on-mmap-hook.patch | 169 +++++++++++ ..._vm_flag_bits-and-arm64-mte-handling.patch | 206 +++++++++++++ .../mm-refactor-map_deny_write_exec.patch | 104 +++++++ ...lty-mmap_region-error-path-behaviour.patch | 280 ++++++++++++++++++ ...-unconditionally-close-vmas-on-error.patch | 126 ++++++++ queue-6.6/series | 5 + 6 files changed, 890 insertions(+) create mode 100644 queue-6.6/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch create mode 100644 queue-6.6/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch create mode 100644 queue-6.6/mm-refactor-map_deny_write_exec.patch create mode 100644 queue-6.6/mm-resolve-faulty-mmap_region-error-path-behaviour.patch create mode 100644 queue-6.6/mm-unconditionally-close-vmas-on-error.patch diff --git a/queue-6.6/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch b/queue-6.6/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch new file mode 100644 index 00000000000..b76b05264f9 --- /dev/null +++ b/queue-6.6/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch @@ -0,0 +1,169 @@ +From stable+bounces-93535-greg=kroah.com@vger.kernel.org Fri Nov 15 13:42:51 2024 +From: Lorenzo Stoakes +Date: Fri, 15 Nov 2024 12:41:54 +0000 +Subject: mm: avoid unsafe VMA hook invocation when error arises on mmap hook +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: <33d70849ec62ba738ca2f8db58fe24076d5282bf.1731672733.git.lorenzo.stoakes@oracle.com> + +From: Lorenzo Stoakes + +[ Upstream commit 3dd6ed34ce1f2356a77fb88edafb5ec96784e3cf ] + +Patch series "fix error handling in mmap_region() and refactor +(hotfixes)", v4. + +mmap_region() is somewhat terrifying, with spaghetti-like control flow and +numerous means by which issues can arise and incomplete state, memory +leaks and other unpleasantness can occur. + +A large amount of the complexity arises from trying to handle errors late +in the process of mapping a VMA, which forms the basis of recently +observed issues with resource leaks and observable inconsistent state. + +This series goes to great lengths to simplify how mmap_region() works and +to avoid unwinding errors late on in the process of setting up the VMA for +the new mapping, and equally avoids such operations occurring while the +VMA is in an inconsistent state. + +The patches in this series comprise the minimal changes required to +resolve existing issues in mmap_region() error handling, in order that +they can be hotfixed and backported. There is additionally a follow up +series which goes further, separated out from the v1 series and sent and +updated separately. + +This patch (of 5): + +After an attempted mmap() fails, we are no longer in a situation where we +can safely interact with VMA hooks. This is currently not enforced, +meaning that we need complicated handling to ensure we do not incorrectly +call these hooks. + +We can avoid the whole issue by treating the VMA as suspect the moment +that the file->f_ops->mmap() function reports an error by replacing +whatever VMA operations were installed with a dummy empty set of VMA +operations. + +We do so through a new helper function internal to mm - mmap_file() - +which is both more logically named than the existing call_mmap() function +and correctly isolates handling of the vm_op reassignment to mm. + +All the existing invocations of call_mmap() outside of mm are ultimately +nested within the call_mmap() from mm, which we now replace. + +It is therefore safe to leave call_mmap() in place as a convenience + function (and to avoid churn). The invokers are: + + ovl_file_operations -> mmap -> ovl_mmap() -> backing_file_mmap() + coda_file_operations -> mmap -> coda_file_mmap() + shm_file_operations -> shm_mmap() +shm_file_operations_huge -> shm_mmap() + dma_buf_fops -> dma_buf_mmap_internal -> i915_dmabuf_ops + -> i915_gem_dmabuf_mmap() + +None of these callers interact with vm_ops or mappings in a problematic +way on error, quickly exiting out. + +Link: https://lkml.kernel.org/r/cover.1730224667.git.lorenzo.stoakes@oracle.com +Link: https://lkml.kernel.org/r/d41fd763496fd0048a962f3fd9407dc72dd4fd86.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Reviewed-by: Jann Horn +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 27 +++++++++++++++++++++++++++ + mm/mmap.c | 4 ++-- + mm/nommu.c | 4 ++-- + 3 files changed, 31 insertions(+), 4 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -83,6 +83,33 @@ static inline void *folio_raw_mapping(st + return (void *)(mapping & ~PAGE_MAPPING_FLAGS); + } + ++/* ++ * This is a file-backed mapping, and is about to be memory mapped - invoke its ++ * mmap hook and safely handle error conditions. On error, VMA hooks will be ++ * mutated. ++ * ++ * @file: File which backs the mapping. ++ * @vma: VMA which we are mapping. ++ * ++ * Returns: 0 if success, error otherwise. ++ */ ++static inline int mmap_file(struct file *file, struct vm_area_struct *vma) ++{ ++ int err = call_mmap(file, vma); ++ ++ if (likely(!err)) ++ return 0; ++ ++ /* ++ * OK, we tried to call the file hook for mmap(), but an error ++ * arose. The mapping is in an inconsistent state and we most not invoke ++ * any further hooks on it. ++ */ ++ vma->vm_ops = &vma_dummy_vm_ops; ++ ++ return err; ++} ++ + void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, + int nr_throttled); + static inline void acct_reclaim_writeback(struct folio *folio) +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2779,7 +2779,7 @@ cannot_expand: + } + + vma->vm_file = get_file(file); +- error = call_mmap(file, vma); ++ error = mmap_file(file, vma); + if (error) + goto unmap_and_free_vma; + +@@ -2793,7 +2793,7 @@ cannot_expand: + + vma_iter_config(&vmi, addr, end); + /* +- * If vm_flags changed after call_mmap(), we should try merge ++ * If vm_flags changed after mmap_file(), we should try merge + * vma again as we may succeed this time. + */ + if (unlikely(vm_flags != vma->vm_flags && prev)) { +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -896,7 +896,7 @@ static int do_mmap_shared_file(struct vm + { + int ret; + +- ret = call_mmap(vma->vm_file, vma); ++ ret = mmap_file(vma->vm_file, vma); + if (ret == 0) { + vma->vm_region->vm_top = vma->vm_region->vm_end; + return 0; +@@ -929,7 +929,7 @@ static int do_mmap_private(struct vm_are + * happy. + */ + if (capabilities & NOMMU_MAP_DIRECT) { +- ret = call_mmap(vma->vm_file, vma); ++ ret = mmap_file(vma->vm_file, vma); + /* shouldn't return success if we're not sharing */ + if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags))) + ret = -ENOSYS; diff --git a/queue-6.6/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch b/queue-6.6/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch new file mode 100644 index 00000000000..1d3827fad5d --- /dev/null +++ b/queue-6.6/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch @@ -0,0 +1,206 @@ +From stable+bounces-93538-greg=kroah.com@vger.kernel.org Fri Nov 15 13:43:44 2024 +From: Lorenzo Stoakes +Date: Fri, 15 Nov 2024 12:41:57 +0000 +Subject: mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: <7c0218d03fd2119025d8cbc1b814639cf09314e0.1731672733.git.lorenzo.stoakes@oracle.com> + +From: Lorenzo Stoakes + +[ Upstream commit 5baf8b037debf4ec60108ccfeccb8636d1dbad81 ] + +Currently MTE is permitted in two circumstances (desiring to use MTE +having been specified by the VM_MTE flag) - where MAP_ANONYMOUS is +specified, as checked by arch_calc_vm_flag_bits() and actualised by +setting the VM_MTE_ALLOWED flag, or if the file backing the mapping is +shmem, in which case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap +hook is activated in mmap_region(). + +The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also +set is the arm64 implementation of arch_validate_flags(). + +Unfortunately, we intend to refactor mmap_region() to perform this check +earlier, meaning that in the case of a shmem backing we will not have +invoked shmem_mmap() yet, causing the mapping to fail spuriously. + +It is inappropriate to set this architecture-specific flag in general mm +code anyway, so a sensible resolution of this issue is to instead move the +check somewhere else. + +We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via +the arch_calc_vm_flag_bits() call. + +This is an appropriate place to do this as we already check for the +MAP_ANONYMOUS case here, and the shmem file case is simply a variant of +the same idea - we permit RAM-backed memory. + +This requires a modification to the arch_calc_vm_flag_bits() signature to +pass in a pointer to the struct file associated with the mapping, however +this is not too egregious as this is only used by two architectures anyway +- arm64 and parisc. + +So this patch performs this adjustment and removes the unnecessary +assignment of VM_MTE_ALLOWED in shmem_mmap(). + +[akpm@linux-foundation.org: fix whitespace, per Catalin] +Link: https://lkml.kernel.org/r/ec251b20ba1964fb64cf1607d2ad80c47f3873df.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Suggested-by: Catalin Marinas +Reported-by: Jann Horn +Reviewed-by: Catalin Marinas +Reviewed-by: Vlastimil Babka +Cc: Andreas Larsson +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Liam R. Howlett +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/mman.h | 10 +++++++--- + arch/parisc/include/asm/mman.h | 5 +++-- + include/linux/mman.h | 7 ++++--- + mm/mmap.c | 2 +- + mm/nommu.c | 2 +- + mm/shmem.c | 3 --- + 6 files changed, 16 insertions(+), 13 deletions(-) + +--- a/arch/arm64/include/asm/mman.h ++++ b/arch/arm64/include/asm/mman.h +@@ -3,6 +3,8 @@ + #define __ASM_MMAN_H__ + + #include ++#include ++#include + #include + #include + +@@ -21,19 +23,21 @@ static inline unsigned long arch_calc_vm + } + #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) ++static inline unsigned long arch_calc_vm_flag_bits(struct file *file, ++ unsigned long flags) + { + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ +- if (system_supports_mte() && (flags & MAP_ANONYMOUS)) ++ if (system_supports_mte() && ++ ((flags & MAP_ANONYMOUS) || shmem_file(file))) + return VM_MTE_ALLOWED; + + return 0; + } +-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) ++#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) + + static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +--- a/arch/parisc/include/asm/mman.h ++++ b/arch/parisc/include/asm/mman.h +@@ -2,6 +2,7 @@ + #ifndef __ASM_MMAN_H__ + #define __ASM_MMAN_H__ + ++#include + #include + + /* PARISC cannot allow mdwe as it needs writable stacks */ +@@ -11,7 +12,7 @@ static inline bool arch_memory_deny_writ + } + #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) ++static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags) + { + /* + * The stack on parisc grows upwards, so if userspace requests memory +@@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm + + return 0; + } +-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) ++#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) + + #endif /* __ASM_MMAN_H__ */ +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -2,6 +2,7 @@ + #ifndef _LINUX_MMAN_H + #define _LINUX_MMAN_H + ++#include + #include + #include + +@@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long + #endif + + #ifndef arch_calc_vm_flag_bits +-#define arch_calc_vm_flag_bits(flags) 0 ++#define arch_calc_vm_flag_bits(file, flags) 0 + #endif + + #ifndef arch_validate_prot +@@ -151,12 +152,12 @@ calc_vm_prot_bits(unsigned long prot, un + * Combine the mmap "flags" argument into "vm_flags" used internally. + */ + static inline unsigned long +-calc_vm_flag_bits(unsigned long flags) ++calc_vm_flag_bits(struct file *file, unsigned long flags) + { + return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +- arch_calc_vm_flag_bits(flags); ++ arch_calc_vm_flag_bits(file, flags); + } + + unsigned long vm_commit_limit(void); +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1273,7 +1273,7 @@ unsigned long do_mmap(struct file *file, + * to. we assume access permissions have been handled by the open + * of the memory object, so we don't do any here. + */ +- vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | ++ vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | + mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + if (flags & MAP_LOCKED) +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -853,7 +853,7 @@ static unsigned long determine_vm_flags( + { + unsigned long vm_flags; + +- vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); ++ vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); + + if (!file) { + /* +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2400,9 +2400,6 @@ static int shmem_mmap(struct file *file, + if (ret) + return ret; + +- /* arm64 - allow memory tagging on RAM-based files */ +- vm_flags_set(vma, VM_MTE_ALLOWED); +- + file_accessed(file); + /* This is anonymous shared memory if it is unlinked at the time of mmap */ + if (inode->i_nlink) diff --git a/queue-6.6/mm-refactor-map_deny_write_exec.patch b/queue-6.6/mm-refactor-map_deny_write_exec.patch new file mode 100644 index 00000000000..3e6ab9c0442 --- /dev/null +++ b/queue-6.6/mm-refactor-map_deny_write_exec.patch @@ -0,0 +1,104 @@ +From stable+bounces-93537-greg=kroah.com@vger.kernel.org Fri Nov 15 13:43:12 2024 +From: Lorenzo Stoakes +Date: Fri, 15 Nov 2024 12:41:56 +0000 +Subject: mm: refactor map_deny_write_exec() +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 0fb4a7ad270b3b209e510eb9dc5b07bf02b7edaf ] + +Refactor the map_deny_write_exec() to not unnecessarily require a VMA +parameter but rather to accept VMA flags parameters, which allows us to +use this function early in mmap_region() in a subsequent commit. + +While we're here, we refactor the function to be more readable and add +some additional documentation. + +Link: https://lkml.kernel.org/r/6be8bb59cd7c68006ebb006eb9d8dc27104b1f70.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Reviewed-by: Jann Horn +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mman.h | 21 ++++++++++++++++++--- + mm/mmap.c | 2 +- + mm/mprotect.c | 2 +- + 3 files changed, 20 insertions(+), 5 deletions(-) + +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -187,16 +187,31 @@ static inline bool arch_memory_deny_writ + * + * d) mmap(PROT_READ | PROT_EXEC) + * mmap(PROT_READ | PROT_EXEC | PROT_BTI) ++ * ++ * This is only applicable if the user has set the Memory-Deny-Write-Execute ++ * (MDWE) protection mask for the current process. ++ * ++ * @old specifies the VMA flags the VMA originally possessed, and @new the ones ++ * we propose to set. ++ * ++ * Return: false if proposed change is OK, true if not ok and should be denied. + */ +-static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) ++static inline bool map_deny_write_exec(unsigned long old, unsigned long new) + { ++ /* If MDWE is disabled, we have nothing to deny. */ + if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + return false; + +- if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) ++ /* If the new VMA is not executable, we have nothing to deny. */ ++ if (!(new & VM_EXEC)) ++ return false; ++ ++ /* Under MDWE we do not accept newly writably executable VMAs... */ ++ if (new & VM_WRITE) + return true; + +- if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) ++ /* ...nor previously non-executable VMAs becoming executable. */ ++ if (!(old & VM_EXEC)) + return true; + + return false; +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2826,7 +2826,7 @@ cannot_expand: + vma_set_anonymous(vma); + } + +- if (map_deny_write_exec(vma, vma->vm_flags)) { ++ if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) { + error = -EACCES; + goto close_and_free_vma; + } +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -791,7 +791,7 @@ static int do_mprotect_pkey(unsigned lon + break; + } + +- if (map_deny_write_exec(vma, newflags)) { ++ if (map_deny_write_exec(vma->vm_flags, newflags)) { + error = -EACCES; + break; + } diff --git a/queue-6.6/mm-resolve-faulty-mmap_region-error-path-behaviour.patch b/queue-6.6/mm-resolve-faulty-mmap_region-error-path-behaviour.patch new file mode 100644 index 00000000000..03778a50743 --- /dev/null +++ b/queue-6.6/mm-resolve-faulty-mmap_region-error-path-behaviour.patch @@ -0,0 +1,280 @@ +From stable+bounces-93539-greg=kroah.com@vger.kernel.org Fri Nov 15 13:43:45 2024 +From: Lorenzo Stoakes +Date: Fri, 15 Nov 2024 12:41:58 +0000 +Subject: mm: resolve faulty mmap_region() error path behaviour +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 5de195060b2e251a835f622759550e6202167641 ] + +The mmap_region() function is somewhat terrifying, with spaghetti-like +control flow and numerous means by which issues can arise and incomplete +state, memory leaks and other unpleasantness can occur. + +A large amount of the complexity arises from trying to handle errors late +in the process of mapping a VMA, which forms the basis of recently +observed issues with resource leaks and observable inconsistent state. + +Taking advantage of previous patches in this series we move a number of +checks earlier in the code, simplifying things by moving the core of the +logic into a static internal function __mmap_region(). + +Doing this allows us to perform a number of checks up front before we do +any real work, and allows us to unwind the writable unmap check +unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE +validation unconditionally also. + +We move a number of things here: + +1. We preallocate memory for the iterator before we call the file-backed + memory hook, allowing us to exit early and avoid having to perform + complicated and error-prone close/free logic. We carefully free + iterator state on both success and error paths. + +2. The enclosing mmap_region() function handles the mapping_map_writable() + logic early. Previously the logic had the mapping_map_writable() at the + point of mapping a newly allocated file-backed VMA, and a matching + mapping_unmap_writable() on success and error paths. + + We now do this unconditionally if this is a file-backed, shared writable + mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however + doing so does not invalidate the seal check we just performed, and we in + any case always decrement the counter in the wrapper. + + We perform a debug assert to ensure a driver does not attempt to do the + opposite. + +3. We also move arch_validate_flags() up into the mmap_region() + function. This is only relevant on arm64 and sparc64, and the check is + only meaningful for SPARC with ADI enabled. We explicitly add a warning + for this arch if a driver invalidates this check, though the code ought + eventually to be fixed to eliminate the need for this. + +With all of these measures in place, we no longer need to explicitly close +the VMA on error paths, as we place all checks which might fail prior to a +call to any driver mmap hook. + +This eliminates an entire class of errors, makes the code easier to reason +about and more robust. + +Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Tested-by: Mark Brown +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/mmap.c | 117 +++++++++++++++++++++++++++++++++++--------------------------- + 1 file changed, 67 insertions(+), 50 deletions(-) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2666,14 +2666,14 @@ int do_munmap(struct mm_struct *mm, unsi + return do_vmi_munmap(&vmi, mm, start, len, uf, false); + } + +-unsigned long mmap_region(struct file *file, unsigned long addr, ++static unsigned long __mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) + { + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + struct vm_area_struct *next, *prev, *merge; +- pgoff_t pglen = len >> PAGE_SHIFT; ++ pgoff_t pglen = PHYS_PFN(len); + unsigned long charged = 0; + unsigned long end = addr + len; + unsigned long merge_start = addr, merge_end = end; +@@ -2770,25 +2770,26 @@ cannot_expand: + vma->vm_page_prot = vm_get_page_prot(vm_flags); + vma->vm_pgoff = pgoff; + +- if (file) { +- if (vm_flags & VM_SHARED) { +- error = mapping_map_writable(file->f_mapping); +- if (error) +- goto free_vma; +- } ++ if (vma_iter_prealloc(&vmi, vma)) { ++ error = -ENOMEM; ++ goto free_vma; ++ } + ++ if (file) { + vma->vm_file = get_file(file); + error = mmap_file(file, vma); + if (error) +- goto unmap_and_free_vma; ++ goto unmap_and_free_file_vma; + ++ /* Drivers cannot alter the address of the VMA. */ ++ WARN_ON_ONCE(addr != vma->vm_start); + /* +- * Expansion is handled above, merging is handled below. +- * Drivers should not alter the address of the VMA. ++ * Drivers should not permit writability when previously it was ++ * disallowed. + */ +- error = -EINVAL; +- if (WARN_ON((addr != vma->vm_start))) +- goto close_and_free_vma; ++ VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && ++ !(vm_flags & VM_MAYWRITE) && ++ (vma->vm_flags & VM_MAYWRITE)); + + vma_iter_config(&vmi, addr, end); + /* +@@ -2800,6 +2801,7 @@ cannot_expand: + vma->vm_end, vma->vm_flags, NULL, + vma->vm_file, vma->vm_pgoff, NULL, + NULL_VM_UFFD_CTX, NULL); ++ + if (merge) { + /* + * ->mmap() can change vma->vm_file and fput +@@ -2813,7 +2815,7 @@ cannot_expand: + vma = merge; + /* Update vm_flags to pick up the change. */ + vm_flags = vma->vm_flags; +- goto unmap_writable; ++ goto file_expanded; + } + } + +@@ -2821,24 +2823,15 @@ cannot_expand: + } else if (vm_flags & VM_SHARED) { + error = shmem_zero_setup(vma); + if (error) +- goto free_vma; ++ goto free_iter_vma; + } else { + vma_set_anonymous(vma); + } + +- if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) { +- error = -EACCES; +- goto close_and_free_vma; +- } +- +- /* Allow architectures to sanity-check the vm_flags */ +- error = -EINVAL; +- if (!arch_validate_flags(vma->vm_flags)) +- goto close_and_free_vma; +- +- error = -ENOMEM; +- if (vma_iter_prealloc(&vmi, vma)) +- goto close_and_free_vma; ++#ifdef CONFIG_SPARC64 ++ /* TODO: Fix SPARC ADI! */ ++ WARN_ON_ONCE(!arch_validate_flags(vm_flags)); ++#endif + + /* Lock the VMA since it is modified after insertion into VMA tree */ + vma_start_write(vma); +@@ -2861,10 +2854,7 @@ cannot_expand: + */ + khugepaged_enter_vma(vma, vma->vm_flags); + +- /* Once vma denies write, undo our temporary denial count */ +-unmap_writable: +- if (file && vm_flags & VM_SHARED) +- mapping_unmap_writable(file->f_mapping); ++file_expanded: + file = vma->vm_file; + ksm_add_vma(vma); + expanded: +@@ -2894,33 +2884,60 @@ expanded: + + vma_set_page_prot(vma); + +- validate_mm(mm); + return addr; + +-close_and_free_vma: +- vma_close(vma); +- +- if (file || vma->vm_file) { +-unmap_and_free_vma: +- fput(vma->vm_file); +- vma->vm_file = NULL; +- +- vma_iter_set(&vmi, vma->vm_end); +- /* Undo any partial mapping done by a device driver. */ +- unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start, +- vma->vm_end, vma->vm_end, true); +- } +- if (file && (vm_flags & VM_SHARED)) +- mapping_unmap_writable(file->f_mapping); ++unmap_and_free_file_vma: ++ fput(vma->vm_file); ++ vma->vm_file = NULL; ++ ++ vma_iter_set(&vmi, vma->vm_end); ++ /* Undo any partial mapping done by a device driver. */ ++ unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start, ++ vma->vm_end, vma->vm_end, true); ++free_iter_vma: ++ vma_iter_free(&vmi); + free_vma: + vm_area_free(vma); + unacct_error: + if (charged) + vm_unacct_memory(charged); +- validate_mm(mm); + return error; + } + ++unsigned long mmap_region(struct file *file, unsigned long addr, ++ unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, ++ struct list_head *uf) ++{ ++ unsigned long ret; ++ bool writable_file_mapping = false; ++ ++ /* Check to see if MDWE is applicable. */ ++ if (map_deny_write_exec(vm_flags, vm_flags)) ++ return -EACCES; ++ ++ /* Allow architectures to sanity-check the vm_flags. */ ++ if (!arch_validate_flags(vm_flags)) ++ return -EINVAL; ++ ++ /* Map writable and ensure this isn't a sealed memfd. */ ++ if (file && (vm_flags & VM_SHARED)) { ++ int error = mapping_map_writable(file->f_mapping); ++ ++ if (error) ++ return error; ++ writable_file_mapping = true; ++ } ++ ++ ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); ++ ++ /* Clear our write mapping regardless of error. */ ++ if (writable_file_mapping) ++ mapping_unmap_writable(file->f_mapping); ++ ++ validate_mm(current->mm); ++ return ret; ++} ++ + static int __vm_munmap(unsigned long start, size_t len, bool unlock) + { + int ret; diff --git a/queue-6.6/mm-unconditionally-close-vmas-on-error.patch b/queue-6.6/mm-unconditionally-close-vmas-on-error.patch new file mode 100644 index 00000000000..5e96fd260b7 --- /dev/null +++ b/queue-6.6/mm-unconditionally-close-vmas-on-error.patch @@ -0,0 +1,126 @@ +From stable+bounces-93536-greg=kroah.com@vger.kernel.org Fri Nov 15 13:42:55 2024 +From: Lorenzo Stoakes +Date: Fri, 15 Nov 2024 12:41:55 +0000 +Subject: mm: unconditionally close VMAs on error +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 4080ef1579b2413435413988d14ac8c68e4d42c8 ] + +Incorrect invocation of VMA callbacks when the VMA is no longer in a +consistent state is bug prone and risky to perform. + +With regards to the important vm_ops->close() callback We have gone to +great lengths to try to track whether or not we ought to close VMAs. + +Rather than doing so and risking making a mistake somewhere, instead +unconditionally close and reset vma->vm_ops to an empty dummy operations +set with a NULL .close operator. + +We introduce a new function to do so - vma_close() - and simplify existing +vms logic which tracked whether we needed to close or not. + +This simplifies the logic, avoids incorrect double-calling of the .close() +callback and allows us to update error paths to simply call vma_close() +unconditionally - making VMA closure idempotent. + +Link: https://lkml.kernel.org/r/28e89dda96f68c505cb6f8e9fc9b57c3e9f74b42.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Vlastimil Babka +Reviewed-by: Liam R. Howlett +Reviewed-by: Jann Horn +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 18 ++++++++++++++++++ + mm/mmap.c | 9 +++------ + mm/nommu.c | 3 +-- + 3 files changed, 22 insertions(+), 8 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -110,6 +110,24 @@ static inline int mmap_file(struct file + return err; + } + ++/* ++ * If the VMA has a close hook then close it, and since closing it might leave ++ * it in an inconsistent state which makes the use of any hooks suspect, clear ++ * them down by installing dummy empty hooks. ++ */ ++static inline void vma_close(struct vm_area_struct *vma) ++{ ++ if (vma->vm_ops && vma->vm_ops->close) { ++ vma->vm_ops->close(vma); ++ ++ /* ++ * The mapping is in an inconsistent state, and no further hooks ++ * may be invoked upon it. ++ */ ++ vma->vm_ops = &vma_dummy_vm_ops; ++ } ++} ++ + void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, + int nr_throttled); + static inline void acct_reclaim_writeback(struct folio *folio) +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -137,8 +137,7 @@ void unlink_file_vma(struct vm_area_stru + static void remove_vma(struct vm_area_struct *vma, bool unreachable) + { + might_sleep(); +- if (vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + if (vma->vm_file) + fput(vma->vm_file); + mpol_put(vma_policy(vma)); +@@ -2899,8 +2898,7 @@ expanded: + return addr; + + close_and_free_vma: +- if (file && vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + + if (file || vma->vm_file) { + unmap_and_free_vma: +@@ -3392,8 +3390,7 @@ struct vm_area_struct *copy_vma(struct v + return new_vma; + + out_vma_link: +- if (new_vma->vm_ops && new_vma->vm_ops->close) +- new_vma->vm_ops->close(new_vma); ++ vma_close(new_vma); + + if (new_vma->vm_file) + fput(new_vma->vm_file); +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -600,8 +600,7 @@ static int delete_vma_from_mm(struct vm_ + */ + static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) + { +- if (vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + if (vma->vm_file) + fput(vma->vm_file); + put_nommu_region(vma->vm_region); diff --git a/queue-6.6/series b/queue-6.6/series index d68b638edf8..aa2ddefd17a 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -72,3 +72,8 @@ mptcp-pm-use-_rcu-variant-under-rcu_read_lock.patch drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-write.patch fs-9p-fix-uninitialized-values-during-inode-evict.patch leds-mlxreg-use-devm_mutex_init-for-mutex-initialization.patch +mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch +mm-unconditionally-close-vmas-on-error.patch +mm-refactor-map_deny_write_exec.patch +mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch +mm-resolve-faulty-mmap_region-error-path-behaviour.patch -- 2.47.2