From 57799fdd080c72dd9078f10aadc3d063f6a5bea8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Nov 2024 15:25:24 +0100 Subject: [PATCH] 6.1-stable patches added patches: mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch mm-resolve-faulty-mmap_region-error-path-behaviour.patch mm-unconditionally-close-vmas-on-error.patch --- ...ation-when-error-arises-on-mmap-hook.patch | 182 ++++++++++++ ..._vm_flag_bits-and-arm64-mte-handling.patch | 178 ++++++++++++ ...lty-mmap_region-error-path-behaviour.patch | 259 ++++++++++++++++++ ...-unconditionally-close-vmas-on-error.patch | 150 ++++++++++ queue-6.1/series | 4 + 5 files changed, 773 insertions(+) create mode 100644 queue-6.1/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch create mode 100644 queue-6.1/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch create mode 100644 queue-6.1/mm-resolve-faulty-mmap_region-error-path-behaviour.patch create mode 100644 queue-6.1/mm-unconditionally-close-vmas-on-error.patch diff --git a/queue-6.1/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch b/queue-6.1/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch new file mode 100644 index 00000000000..0a357f2faac --- /dev/null +++ b/queue-6.1/mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch @@ -0,0 +1,182 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +From: Lorenzo Stoakes +Date: Mon, 18 Nov 2024 16:17:25 +0000 +Subject: mm: avoid unsafe VMA hook invocation when error arises on mmap hook +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 3dd6ed34ce1f2356a77fb88edafb5ec96784e3cf ] + +Patch series "fix error handling in mmap_region() and refactor +(hotfixes)", v4. + +mmap_region() is somewhat terrifying, with spaghetti-like control flow and +numerous means by which issues can arise and incomplete state, memory +leaks and other unpleasantness can occur. + +A large amount of the complexity arises from trying to handle errors late +in the process of mapping a VMA, which forms the basis of recently +observed issues with resource leaks and observable inconsistent state. + +This series goes to great lengths to simplify how mmap_region() works and +to avoid unwinding errors late on in the process of setting up the VMA for +the new mapping, and equally avoids such operations occurring while the +VMA is in an inconsistent state. + +The patches in this series comprise the minimal changes required to +resolve existing issues in mmap_region() error handling, in order that +they can be hotfixed and backported. There is additionally a follow up +series which goes further, separated out from the v1 series and sent and +updated separately. + +This patch (of 5): + +After an attempted mmap() fails, we are no longer in a situation where we +can safely interact with VMA hooks. This is currently not enforced, +meaning that we need complicated handling to ensure we do not incorrectly +call these hooks. + +We can avoid the whole issue by treating the VMA as suspect the moment +that the file->f_ops->mmap() function reports an error by replacing +whatever VMA operations were installed with a dummy empty set of VMA +operations. + +We do so through a new helper function internal to mm - mmap_file() - +which is both more logically named than the existing call_mmap() function +and correctly isolates handling of the vm_op reassignment to mm. + +All the existing invocations of call_mmap() outside of mm are ultimately +nested within the call_mmap() from mm, which we now replace. + +It is therefore safe to leave call_mmap() in place as a convenience + function (and to avoid churn). The invokers are: + + ovl_file_operations -> mmap -> ovl_mmap() -> backing_file_mmap() + coda_file_operations -> mmap -> coda_file_mmap() + shm_file_operations -> shm_mmap() +shm_file_operations_huge -> shm_mmap() + dma_buf_fops -> dma_buf_mmap_internal -> i915_dmabuf_ops + -> i915_gem_dmabuf_mmap() + +None of these callers interact with vm_ops or mappings in a problematic +way on error, quickly exiting out. + +Link: https://lkml.kernel.org/r/cover.1730224667.git.lorenzo.stoakes@oracle.com +Link: https://lkml.kernel.org/r/d41fd763496fd0048a962f3fd9407dc72dd4fd86.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Reviewed-by: Jann Horn +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 12 ++++++++++++ + mm/mmap.c | 4 ++-- + mm/nommu.c | 4 ++-- + mm/util.c | 18 ++++++++++++++++++ + 4 files changed, 34 insertions(+), 4 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -52,6 +52,18 @@ struct folio_batch; + + void page_writeback_init(void); + ++/* ++ * This is a file-backed mapping, and is about to be memory mapped - invoke its ++ * mmap hook and safely handle error conditions. On error, VMA hooks will be ++ * mutated. ++ * ++ * @file: File which backs the mapping. ++ * @vma: VMA which we are mapping. ++ * ++ * Returns: 0 if success, error otherwise. ++ */ ++int mmap_file(struct file *file, struct vm_area_struct *vma); ++ + static inline void *folio_raw_mapping(struct folio *folio) + { + unsigned long mapping = (unsigned long)folio->mapping; +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2760,7 +2760,7 @@ cannot_expand: + } + + vma->vm_file = get_file(file); +- error = call_mmap(file, vma); ++ error = mmap_file(file, vma); + if (error) + goto unmap_and_free_vma; + +@@ -2775,7 +2775,7 @@ cannot_expand: + mas_reset(&mas); + + /* +- * If vm_flags changed after call_mmap(), we should try merge ++ * If vm_flags changed after mmap_file(), we should try merge + * vma again as we may succeed this time. + */ + if (unlikely(vm_flags != vma->vm_flags && prev)) { +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -939,7 +939,7 @@ static int do_mmap_shared_file(struct vm + { + int ret; + +- ret = call_mmap(vma->vm_file, vma); ++ ret = mmap_file(vma->vm_file, vma); + if (ret == 0) { + vma->vm_region->vm_top = vma->vm_region->vm_end; + return 0; +@@ -970,7 +970,7 @@ static int do_mmap_private(struct vm_are + * - VM_MAYSHARE will be set if it may attempt to share + */ + if (capabilities & NOMMU_MAP_DIRECT) { +- ret = call_mmap(vma->vm_file, vma); ++ ret = mmap_file(vma->vm_file, vma); + if (ret == 0) { + /* shouldn't return success if we're not sharing */ + BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); +--- a/mm/util.c ++++ b/mm/util.c +@@ -1103,6 +1103,24 @@ int __weak memcmp_pages(struct page *pag + return ret; + } + ++int mmap_file(struct file *file, struct vm_area_struct *vma) ++{ ++ static const struct vm_operations_struct dummy_vm_ops = {}; ++ int err = call_mmap(file, vma); ++ ++ if (likely(!err)) ++ return 0; ++ ++ /* ++ * OK, we tried to call the file hook for mmap(), but an error ++ * arose. The mapping is in an inconsistent state and we most not invoke ++ * any further hooks on it. ++ */ ++ vma->vm_ops = &dummy_vm_ops; ++ ++ return err; ++} ++ + #ifdef CONFIG_PRINTK + /** + * mem_dump_obj - Print available provenance information diff --git a/queue-6.1/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch b/queue-6.1/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch new file mode 100644 index 00000000000..33b92b11ffe --- /dev/null +++ b/queue-6.1/mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch @@ -0,0 +1,178 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +From: Lorenzo Stoakes +Date: Mon, 18 Nov 2024 16:17:27 +0000 +Subject: mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 5baf8b037debf4ec60108ccfeccb8636d1dbad81 ] + +Currently MTE is permitted in two circumstances (desiring to use MTE +having been specified by the VM_MTE flag) - where MAP_ANONYMOUS is +specified, as checked by arch_calc_vm_flag_bits() and actualised by +setting the VM_MTE_ALLOWED flag, or if the file backing the mapping is +shmem, in which case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap +hook is activated in mmap_region(). + +The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also +set is the arm64 implementation of arch_validate_flags(). + +Unfortunately, we intend to refactor mmap_region() to perform this check +earlier, meaning that in the case of a shmem backing we will not have +invoked shmem_mmap() yet, causing the mapping to fail spuriously. + +It is inappropriate to set this architecture-specific flag in general mm +code anyway, so a sensible resolution of this issue is to instead move the +check somewhere else. + +We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via +the arch_calc_vm_flag_bits() call. + +This is an appropriate place to do this as we already check for the +MAP_ANONYMOUS case here, and the shmem file case is simply a variant of +the same idea - we permit RAM-backed memory. + +This requires a modification to the arch_calc_vm_flag_bits() signature to +pass in a pointer to the struct file associated with the mapping, however +this is not too egregious as this is only used by two architectures anyway +- arm64 and parisc. + +So this patch performs this adjustment and removes the unnecessary +assignment of VM_MTE_ALLOWED in shmem_mmap(). + +[akpm@linux-foundation.org: fix whitespace, per Catalin] +Link: https://lkml.kernel.org/r/ec251b20ba1964fb64cf1607d2ad80c47f3873df.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Suggested-by: Catalin Marinas +Reported-by: Jann Horn +Reviewed-by: Catalin Marinas +Reviewed-by: Vlastimil Babka +Cc: Andreas Larsson +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Liam R. Howlett +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/mman.h | 10 +++++++--- + include/linux/mman.h | 7 ++++--- + mm/mmap.c | 2 +- + mm/nommu.c | 2 +- + mm/shmem.c | 3 --- + 5 files changed, 13 insertions(+), 11 deletions(-) + +--- a/arch/arm64/include/asm/mman.h ++++ b/arch/arm64/include/asm/mman.h +@@ -3,6 +3,8 @@ + #define __ASM_MMAN_H__ + + #include ++#include ++#include + #include + #include + +@@ -21,19 +23,21 @@ static inline unsigned long arch_calc_vm + } + #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) ++static inline unsigned long arch_calc_vm_flag_bits(struct file *file, ++ unsigned long flags) + { + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ +- if (system_supports_mte() && (flags & MAP_ANONYMOUS)) ++ if (system_supports_mte() && ++ ((flags & MAP_ANONYMOUS) || shmem_file(file))) + return VM_MTE_ALLOWED; + + return 0; + } +-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) ++#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) + + static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -2,6 +2,7 @@ + #ifndef _LINUX_MMAN_H + #define _LINUX_MMAN_H + ++#include + #include + #include + +@@ -90,7 +91,7 @@ static inline void vm_unacct_memory(long + #endif + + #ifndef arch_calc_vm_flag_bits +-#define arch_calc_vm_flag_bits(flags) 0 ++#define arch_calc_vm_flag_bits(file, flags) 0 + #endif + + #ifndef arch_validate_prot +@@ -147,12 +148,12 @@ calc_vm_prot_bits(unsigned long prot, un + * Combine the mmap "flags" argument into "vm_flags" used internally. + */ + static inline unsigned long +-calc_vm_flag_bits(unsigned long flags) ++calc_vm_flag_bits(struct file *file, unsigned long flags) + { + return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +- arch_calc_vm_flag_bits(flags); ++ arch_calc_vm_flag_bits(file, flags); + } + + unsigned long vm_commit_limit(void); +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1316,7 +1316,7 @@ unsigned long do_mmap(struct file *file, + * to. we assume access permissions have been handled by the open + * of the memory object, so we don't do any here. + */ +- vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | ++ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | + mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + if (flags & MAP_LOCKED) +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -903,7 +903,7 @@ static unsigned long determine_vm_flags( + { + unsigned long vm_flags; + +- vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); ++ vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); + /* vm_flags |= mm->def_flags; */ + + if (!(capabilities & NOMMU_MAP_DIRECT)) { +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2306,9 +2306,6 @@ static int shmem_mmap(struct file *file, + if (ret) + return ret; + +- /* arm64 - allow memory tagging on RAM-based files */ +- vma->vm_flags |= VM_MTE_ALLOWED; +- + file_accessed(file); + vma->vm_ops = &shmem_vm_ops; + return 0; diff --git a/queue-6.1/mm-resolve-faulty-mmap_region-error-path-behaviour.patch b/queue-6.1/mm-resolve-faulty-mmap_region-error-path-behaviour.patch new file mode 100644 index 00000000000..f991810451e --- /dev/null +++ b/queue-6.1/mm-resolve-faulty-mmap_region-error-path-behaviour.patch @@ -0,0 +1,259 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +From: Lorenzo Stoakes +Date: Mon, 18 Nov 2024 16:17:28 +0000 +Subject: mm: resolve faulty mmap_region() error path behaviour +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 5de195060b2e251a835f622759550e6202167641 ] + +The mmap_region() function is somewhat terrifying, with spaghetti-like +control flow and numerous means by which issues can arise and incomplete +state, memory leaks and other unpleasantness can occur. + +A large amount of the complexity arises from trying to handle errors late +in the process of mapping a VMA, which forms the basis of recently +observed issues with resource leaks and observable inconsistent state. + +Taking advantage of previous patches in this series we move a number of +checks earlier in the code, simplifying things by moving the core of the +logic into a static internal function __mmap_region(). + +Doing this allows us to perform a number of checks up front before we do +any real work, and allows us to unwind the writable unmap check +unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE +validation unconditionally also. + +We move a number of things here: + +1. We preallocate memory for the iterator before we call the file-backed + memory hook, allowing us to exit early and avoid having to perform + complicated and error-prone close/free logic. We carefully free + iterator state on both success and error paths. + +2. The enclosing mmap_region() function handles the mapping_map_writable() + logic early. Previously the logic had the mapping_map_writable() at the + point of mapping a newly allocated file-backed VMA, and a matching + mapping_unmap_writable() on success and error paths. + + We now do this unconditionally if this is a file-backed, shared writable + mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however + doing so does not invalidate the seal check we just performed, and we in + any case always decrement the counter in the wrapper. + + We perform a debug assert to ensure a driver does not attempt to do the + opposite. + +3. We also move arch_validate_flags() up into the mmap_region() + function. This is only relevant on arm64 and sparc64, and the check is + only meaningful for SPARC with ADI enabled. We explicitly add a warning + for this arch if a driver invalidates this check, though the code ought + eventually to be fixed to eliminate the need for this. + +With all of these measures in place, we no longer need to explicitly close +the VMA on error paths, as we place all checks which might fail prior to a +call to any driver mmap hook. + +This eliminates an entire class of errors, makes the code easier to reason +about and more robust. + +Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Liam R. Howlett +Reviewed-by: Vlastimil Babka +Tested-by: Mark Brown +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/mmap.c | 104 +++++++++++++++++++++++++++++++++----------------------------- + 1 file changed, 57 insertions(+), 47 deletions(-) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2652,7 +2652,7 @@ int do_munmap(struct mm_struct *mm, unsi + return do_mas_munmap(&mas, mm, start, len, uf, false); + } + +-unsigned long mmap_region(struct file *file, unsigned long addr, ++static unsigned long __mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) + { +@@ -2750,26 +2750,28 @@ cannot_expand: + vma->vm_page_prot = vm_get_page_prot(vm_flags); + vma->vm_pgoff = pgoff; + +- if (file) { +- if (vm_flags & VM_SHARED) { +- error = mapping_map_writable(file->f_mapping); +- if (error) +- goto free_vma; +- } ++ if (mas_preallocate(&mas, vma, GFP_KERNEL)) { ++ error = -ENOMEM; ++ goto free_vma; ++ } + ++ if (file) { + vma->vm_file = get_file(file); + error = mmap_file(file, vma); + if (error) +- goto unmap_and_free_vma; ++ goto unmap_and_free_file_vma; ++ ++ /* Drivers cannot alter the address of the VMA. */ ++ WARN_ON_ONCE(addr != vma->vm_start); + + /* +- * Expansion is handled above, merging is handled below. +- * Drivers should not alter the address of the VMA. ++ * Drivers should not permit writability when previously it was ++ * disallowed. + */ +- if (WARN_ON((addr != vma->vm_start))) { +- error = -EINVAL; +- goto close_and_free_vma; +- } ++ VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && ++ !(vm_flags & VM_MAYWRITE) && ++ (vma->vm_flags & VM_MAYWRITE)); ++ + mas_reset(&mas); + + /* +@@ -2792,7 +2794,8 @@ cannot_expand: + vma = merge; + /* Update vm_flags to pick up the change. */ + vm_flags = vma->vm_flags; +- goto unmap_writable; ++ mas_destroy(&mas); ++ goto file_expanded; + } + } + +@@ -2800,31 +2803,15 @@ cannot_expand: + } else if (vm_flags & VM_SHARED) { + error = shmem_zero_setup(vma); + if (error) +- goto free_vma; ++ goto free_iter_vma; + } else { + vma_set_anonymous(vma); + } + +- /* Allow architectures to sanity-check the vm_flags */ +- if (!arch_validate_flags(vma->vm_flags)) { +- error = -EINVAL; +- if (file) +- goto close_and_free_vma; +- else if (vma->vm_file) +- goto unmap_and_free_vma; +- else +- goto free_vma; +- } +- +- if (mas_preallocate(&mas, vma, GFP_KERNEL)) { +- error = -ENOMEM; +- if (file) +- goto close_and_free_vma; +- else if (vma->vm_file) +- goto unmap_and_free_vma; +- else +- goto free_vma; +- } ++#ifdef CONFIG_SPARC64 ++ /* TODO: Fix SPARC ADI! */ ++ WARN_ON_ONCE(!arch_validate_flags(vm_flags)); ++#endif + + if (vma->vm_file) + i_mmap_lock_write(vma->vm_file->f_mapping); +@@ -2847,10 +2834,7 @@ cannot_expand: + */ + khugepaged_enter_vma(vma, vma->vm_flags); + +- /* Once vma denies write, undo our temporary denial count */ +-unmap_writable: +- if (file && vm_flags & VM_SHARED) +- mapping_unmap_writable(file->f_mapping); ++file_expanded: + file = vma->vm_file; + expanded: + perf_event_mmap(vma); +@@ -2879,28 +2863,54 @@ expanded: + + vma_set_page_prot(vma); + +- validate_mm(mm); + return addr; + +-close_and_free_vma: +- vma_close(vma); +-unmap_and_free_vma: ++unmap_and_free_file_vma: + fput(vma->vm_file); + vma->vm_file = NULL; + + /* Undo any partial mapping done by a device driver. */ + unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end); +- if (file && (vm_flags & VM_SHARED)) +- mapping_unmap_writable(file->f_mapping); ++free_iter_vma: ++ mas_destroy(&mas); + free_vma: + vm_area_free(vma); + unacct_error: + if (charged) + vm_unacct_memory(charged); +- validate_mm(mm); + return error; + } + ++unsigned long mmap_region(struct file *file, unsigned long addr, ++ unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, ++ struct list_head *uf) ++{ ++ unsigned long ret; ++ bool writable_file_mapping = false; ++ ++ /* Allow architectures to sanity-check the vm_flags. */ ++ if (!arch_validate_flags(vm_flags)) ++ return -EINVAL; ++ ++ /* Map writable and ensure this isn't a sealed memfd. */ ++ if (file && (vm_flags & VM_SHARED)) { ++ int error = mapping_map_writable(file->f_mapping); ++ ++ if (error) ++ return error; ++ writable_file_mapping = true; ++ } ++ ++ ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); ++ ++ /* Clear our write mapping regardless of error. */ ++ if (writable_file_mapping) ++ mapping_unmap_writable(file->f_mapping); ++ ++ validate_mm(current->mm); ++ return ret; ++} ++ + static int __vm_munmap(unsigned long start, size_t len, bool downgrade) + { + int ret; diff --git a/queue-6.1/mm-unconditionally-close-vmas-on-error.patch b/queue-6.1/mm-unconditionally-close-vmas-on-error.patch new file mode 100644 index 00000000000..6f4eaf85fe4 --- /dev/null +++ b/queue-6.1/mm-unconditionally-close-vmas-on-error.patch @@ -0,0 +1,150 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +From: Lorenzo Stoakes +Date: Mon, 18 Nov 2024 16:17:26 +0000 +Subject: mm: unconditionally close VMAs on error +To: stable@vger.kernel.org +Cc: Andrew Morton , "Liam R . Howlett" , Vlastimil Babka , Jann Horn , linux-kernel@vger.kernel.org, linux-mm@kvack.org, Linus Torvalds , Peter Xu , Catalin Marinas , Will Deacon , Mark Brown , "David S . Miller" , Andreas Larsson , "James E . J . Bottomley" , Helge Deller +Message-ID: + +From: Lorenzo Stoakes + +[ Upstream commit 4080ef1579b2413435413988d14ac8c68e4d42c8 ] + +Incorrect invocation of VMA callbacks when the VMA is no longer in a +consistent state is bug prone and risky to perform. + +With regards to the important vm_ops->close() callback We have gone to +great lengths to try to track whether or not we ought to close VMAs. + +Rather than doing so and risking making a mistake somewhere, instead +unconditionally close and reset vma->vm_ops to an empty dummy operations +set with a NULL .close operator. + +We introduce a new function to do so - vma_close() - and simplify existing +vms logic which tracked whether we needed to close or not. + +This simplifies the logic, avoids incorrect double-calling of the .close() +callback and allows us to update error paths to simply call vma_close() +unconditionally - making VMA closure idempotent. + +Link: https://lkml.kernel.org/r/28e89dda96f68c505cb6f8e9fc9b57c3e9f74b42.1730224667.git.lorenzo.stoakes@oracle.com +Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") +Signed-off-by: Lorenzo Stoakes +Reported-by: Jann Horn +Reviewed-by: Vlastimil Babka +Reviewed-by: Liam R. Howlett +Reviewed-by: Jann Horn +Cc: Andreas Larsson +Cc: Catalin Marinas +Cc: David S. Miller +Cc: Helge Deller +Cc: James E.J. Bottomley +Cc: Linus Torvalds +Cc: Mark Brown +Cc: Peter Xu +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Lorenzo Stoakes +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 7 +++++++ + mm/mmap.c | 12 ++++-------- + mm/nommu.c | 3 +-- + mm/util.c | 15 +++++++++++++++ + 4 files changed, 27 insertions(+), 10 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -64,6 +64,13 @@ void page_writeback_init(void); + */ + int mmap_file(struct file *file, struct vm_area_struct *vma); + ++/* ++ * If the VMA has a close hook then close it, and since closing it might leave ++ * it in an inconsistent state which makes the use of any hooks suspect, clear ++ * them down by installing dummy empty hooks. ++ */ ++void vma_close(struct vm_area_struct *vma); ++ + static inline void *folio_raw_mapping(struct folio *folio) + { + unsigned long mapping = (unsigned long)folio->mapping; +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -136,8 +136,7 @@ void unlink_file_vma(struct vm_area_stru + static void remove_vma(struct vm_area_struct *vma) + { + might_sleep(); +- if (vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + if (vma->vm_file) + fput(vma->vm_file); + mpol_put(vma_policy(vma)); +@@ -2388,8 +2387,7 @@ int __split_vma(struct mm_struct *mm, st + new->vm_start = new->vm_end; + new->vm_pgoff = 0; + /* Clean everything up if vma_adjust failed. */ +- if (new->vm_ops && new->vm_ops->close) +- new->vm_ops->close(new); ++ vma_close(new); + if (new->vm_file) + fput(new->vm_file); + unlink_anon_vmas(new); +@@ -2885,8 +2883,7 @@ expanded: + return addr; + + close_and_free_vma: +- if (vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + unmap_and_free_vma: + fput(vma->vm_file); + vma->vm_file = NULL; +@@ -3376,8 +3373,7 @@ struct vm_area_struct *copy_vma(struct v + return new_vma; + + out_vma_link: +- if (new_vma->vm_ops && new_vma->vm_ops->close) +- new_vma->vm_ops->close(new_vma); ++ vma_close(new_vma); + + if (new_vma->vm_file) + fput(new_vma->vm_file); +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -650,8 +650,7 @@ static int delete_vma_from_mm(struct vm_ + */ + static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) + { +- if (vma->vm_ops && vma->vm_ops->close) +- vma->vm_ops->close(vma); ++ vma_close(vma); + if (vma->vm_file) + fput(vma->vm_file); + put_nommu_region(vma->vm_region); +--- a/mm/util.c ++++ b/mm/util.c +@@ -1121,6 +1121,21 @@ int mmap_file(struct file *file, struct + return err; + } + ++void vma_close(struct vm_area_struct *vma) ++{ ++ static const struct vm_operations_struct dummy_vm_ops = {}; ++ ++ if (vma->vm_ops && vma->vm_ops->close) { ++ vma->vm_ops->close(vma); ++ ++ /* ++ * The mapping is in an inconsistent state, and no further hooks ++ * may be invoked upon it. ++ */ ++ vma->vm_ops = &dummy_vm_ops; ++ } ++} ++ + #ifdef CONFIG_PRINTK + /** + * mem_dump_obj - Print available provenance information diff --git a/queue-6.1/series b/queue-6.1/series index 69f51d99707..38a12565b83 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -59,3 +59,7 @@ ipvs-properly-dereference-pe-in-ip_vs_add_service.patch net-sched-taprio-extend-minimum-interval-restriction-to-entire-cycle-too.patch net-fec-remove-.ndo_poll_controller-to-avoid-deadlocks.patch mm-revert-mm-shmem-fix-data-race-in-shmem_getattr.patch +mm-avoid-unsafe-vma-hook-invocation-when-error-arises-on-mmap-hook.patch +mm-unconditionally-close-vmas-on-error.patch +mm-refactor-arch_calc_vm_flag_bits-and-arm64-mte-handling.patch +mm-resolve-faulty-mmap_region-error-path-behaviour.patch -- 2.47.2