]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/vma: move stack expansion logic to mm/vma.c
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tue, 3 Dec 2024 18:05:11 +0000 (18:05 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 14 Jan 2025 06:40:43 +0000 (22:40 -0800)
We build on previous work making expand_downwards() an entirely internal
function.

This logic is subtle and so it is highly useful to get it into vma.c so we
can then userland unit test.

We must additionally move acct_stack_growth() to vma.c as it is a helper
function used by both expand_downwards() and expand_upwards().

We are also then able to mark anon_vma_interval_tree_pre_update_vma() and
anon_vma_interval_tree_post_update_vma() static as these are no longer
used by anything else.

Link: https://lkml.kernel.org/r/0feb104eff85922019d4fb29280f3afb130c5204.1733248985.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/mmap.c
mm/vma.c
mm/vma.h
tools/testing/vma/vma.c
tools/testing/vma/vma_internal.h

index b29728df7f10e6fbcb1417799ff4a5ef555f4d3f..cea10c88cf0f04e767a50b7e9c4468b8950f9434 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -879,211 +879,6 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
        return vma;
 }
 
-/*
- * Verify that the stack growth is acceptable and
- * update accounting. This is shared with both the
- * grow-up and grow-down cases.
- */
-static int acct_stack_growth(struct vm_area_struct *vma,
-                            unsigned long size, unsigned long grow)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       unsigned long new_start;
-
-       /* address space limit tests */
-       if (!may_expand_vm(mm, vma->vm_flags, grow))
-               return -ENOMEM;
-
-       /* Stack limit test */
-       if (size > rlimit(RLIMIT_STACK))
-               return -ENOMEM;
-
-       /* mlock limit tests */
-       if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
-               return -ENOMEM;
-
-       /* Check to ensure the stack will not grow into a hugetlb-only region */
-       new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
-                       vma->vm_end - size;
-       if (is_hugepage_only_range(vma->vm_mm, new_start, size))
-               return -EFAULT;
-
-       /*
-        * Overcommit..  This must be the final test, as it will
-        * update security statistics.
-        */
-       if (security_vm_enough_memory_mm(mm, grow))
-               return -ENOMEM;
-
-       return 0;
-}
-
-#if defined(CONFIG_STACK_GROWSUP)
-/*
- * PA-RISC uses this for its stack.
- * vma is the last one with address > vma->vm_end.  Have to extend vma.
- */
-static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *next;
-       unsigned long gap_addr;
-       int error = 0;
-       VMA_ITERATOR(vmi, mm, vma->vm_start);
-
-       if (!(vma->vm_flags & VM_GROWSUP))
-               return -EFAULT;
-
-       mmap_assert_write_locked(mm);
-
-       /* Guard against exceeding limits of the address space. */
-       address &= PAGE_MASK;
-       if (address >= (TASK_SIZE & PAGE_MASK))
-               return -ENOMEM;
-       address += PAGE_SIZE;
-
-       /* Enforce stack_guard_gap */
-       gap_addr = address + stack_guard_gap;
-
-       /* Guard against overflow */
-       if (gap_addr < address || gap_addr > TASK_SIZE)
-               gap_addr = TASK_SIZE;
-
-       next = find_vma_intersection(mm, vma->vm_end, gap_addr);
-       if (next && vma_is_accessible(next)) {
-               if (!(next->vm_flags & VM_GROWSUP))
-                       return -ENOMEM;
-               /* Check that both stack segments have the same anon_vma? */
-       }
-
-       if (next)
-               vma_iter_prev_range_limit(&vmi, address);
-
-       vma_iter_config(&vmi, vma->vm_start, address);
-       if (vma_iter_prealloc(&vmi, vma))
-               return -ENOMEM;
-
-       /* We must make sure the anon_vma is allocated. */
-       if (unlikely(anon_vma_prepare(vma))) {
-               vma_iter_free(&vmi);
-               return -ENOMEM;
-       }
-
-       /* Lock the VMA before expanding to prevent concurrent page faults */
-       vma_start_write(vma);
-       /* We update the anon VMA tree. */
-       anon_vma_lock_write(vma->anon_vma);
-
-       /* Somebody else might have raced and expanded it already */
-       if (address > vma->vm_end) {
-               unsigned long size, grow;
-
-               size = address - vma->vm_start;
-               grow = (address - vma->vm_end) >> PAGE_SHIFT;
-
-               error = -ENOMEM;
-               if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
-                       error = acct_stack_growth(vma, size, grow);
-                       if (!error) {
-                               if (vma->vm_flags & VM_LOCKED)
-                                       mm->locked_vm += grow;
-                               vm_stat_account(mm, vma->vm_flags, grow);
-                               anon_vma_interval_tree_pre_update_vma(vma);
-                               vma->vm_end = address;
-                               /* Overwrite old entry in mtree. */
-                               vma_iter_store(&vmi, vma);
-                               anon_vma_interval_tree_post_update_vma(vma);
-
-                               perf_event_mmap(vma);
-                       }
-               }
-       }
-       anon_vma_unlock_write(vma->anon_vma);
-       vma_iter_free(&vmi);
-       validate_mm(mm);
-       return error;
-}
-#endif /* CONFIG_STACK_GROWSUP */
-
-/*
- * vma is the first one with address < vma->vm_start.  Have to extend vma.
- * mmap_lock held for writing.
- */
-static int expand_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *prev;
-       int error = 0;
-       VMA_ITERATOR(vmi, mm, vma->vm_start);
-
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               return -EFAULT;
-
-       mmap_assert_write_locked(mm);
-
-       address &= PAGE_MASK;
-       if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
-               return -EPERM;
-
-       /* Enforce stack_guard_gap */
-       prev = vma_prev(&vmi);
-       /* Check that both stack segments have the same anon_vma? */
-       if (prev) {
-               if (!(prev->vm_flags & VM_GROWSDOWN) &&
-                   vma_is_accessible(prev) &&
-                   (address - prev->vm_end < stack_guard_gap))
-                       return -ENOMEM;
-       }
-
-       if (prev)
-               vma_iter_next_range_limit(&vmi, vma->vm_start);
-
-       vma_iter_config(&vmi, address, vma->vm_end);
-       if (vma_iter_prealloc(&vmi, vma))
-               return -ENOMEM;
-
-       /* We must make sure the anon_vma is allocated. */
-       if (unlikely(anon_vma_prepare(vma))) {
-               vma_iter_free(&vmi);
-               return -ENOMEM;
-       }
-
-       /* Lock the VMA before expanding to prevent concurrent page faults */
-       vma_start_write(vma);
-       /* We update the anon VMA tree. */
-       anon_vma_lock_write(vma->anon_vma);
-
-       /* Somebody else might have raced and expanded it already */
-       if (address < vma->vm_start) {
-               unsigned long size, grow;
-
-               size = vma->vm_end - address;
-               grow = (vma->vm_start - address) >> PAGE_SHIFT;
-
-               error = -ENOMEM;
-               if (grow <= vma->vm_pgoff) {
-                       error = acct_stack_growth(vma, size, grow);
-                       if (!error) {
-                               if (vma->vm_flags & VM_LOCKED)
-                                       mm->locked_vm += grow;
-                               vm_stat_account(mm, vma->vm_flags, grow);
-                               anon_vma_interval_tree_pre_update_vma(vma);
-                               vma->vm_start = address;
-                               vma->vm_pgoff -= grow;
-                               /* Overwrite old entry in mtree. */
-                               vma_iter_store(&vmi, vma);
-                               anon_vma_interval_tree_post_update_vma(vma);
-
-                               perf_event_mmap(vma);
-                       }
-               }
-       }
-       anon_vma_unlock_write(vma->anon_vma);
-       vma_iter_free(&vmi);
-       validate_mm(mm);
-       return error;
-}
-
 /* enforced gap between the expanding stack and other mappings. */
 unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
 
index 3972376176e7fd8044050bd2c591462b33806438..e270efc927faba20596f955515989ba1c536ae62 100644 (file)
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -202,6 +202,38 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
        flush_dcache_mmap_unlock(mapping);
 }
 
+/*
+ * vma has some anon_vma assigned, and is already inserted on that
+ * anon_vma's interval trees.
+ *
+ * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
+ * vma must be removed from the anon_vma's interval trees using
+ * anon_vma_interval_tree_pre_update_vma().
+ *
+ * After the update, the vma will be reinserted using
+ * anon_vma_interval_tree_post_update_vma().
+ *
+ * The entire update must be protected by exclusive mmap_lock and by
+ * the root anon_vma's mutex.
+ */
+static void
+anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
+{
+       struct anon_vma_chain *avc;
+
+       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+               anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
+}
+
+static void
+anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
+{
+       struct anon_vma_chain *avc;
+
+       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+               anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
+}
+
 /*
  * vma_prepare() - Helper function for handling locking VMAs prior to altering
  * @vp: The initialized vma_prepare struct
@@ -510,38 +542,6 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
        return __split_vma(vmi, vma, addr, new_below);
 }
 
-/*
- * vma has some anon_vma assigned, and is already inserted on that
- * anon_vma's interval trees.
- *
- * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
- * vma must be removed from the anon_vma's interval trees using
- * anon_vma_interval_tree_pre_update_vma().
- *
- * After the update, the vma will be reinserted using
- * anon_vma_interval_tree_post_update_vma().
- *
- * The entire update must be protected by exclusive mmap_lock and by
- * the root anon_vma's mutex.
- */
-void
-anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma_chain *avc;
-
-       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-               anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
-}
-
-void
-anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma_chain *avc;
-
-       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-               anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
-}
-
 /*
  * dup_anon_vma() - Helper function to duplicate anon_vma
  * @dst: The destination VMA
@@ -2672,3 +2672,208 @@ retry:
 
        return gap;
 }
+
+/*
+ * Verify that the stack growth is acceptable and
+ * update accounting. This is shared with both the
+ * grow-up and grow-down cases.
+ */
+static int acct_stack_growth(struct vm_area_struct *vma,
+                            unsigned long size, unsigned long grow)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long new_start;
+
+       /* address space limit tests */
+       if (!may_expand_vm(mm, vma->vm_flags, grow))
+               return -ENOMEM;
+
+       /* Stack limit test */
+       if (size > rlimit(RLIMIT_STACK))
+               return -ENOMEM;
+
+       /* mlock limit tests */
+       if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
+               return -ENOMEM;
+
+       /* Check to ensure the stack will not grow into a hugetlb-only region */
+       new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
+                       vma->vm_end - size;
+       if (is_hugepage_only_range(vma->vm_mm, new_start, size))
+               return -EFAULT;
+
+       /*
+        * Overcommit..  This must be the final test, as it will
+        * update security statistics.
+        */
+       if (security_vm_enough_memory_mm(mm, grow))
+               return -ENOMEM;
+
+       return 0;
+}
+
+#if defined(CONFIG_STACK_GROWSUP)
+/*
+ * PA-RISC uses this for its stack.
+ * vma is the last one with address > vma->vm_end.  Have to extend vma.
+ */
+int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *next;
+       unsigned long gap_addr;
+       int error = 0;
+       VMA_ITERATOR(vmi, mm, vma->vm_start);
+
+       if (!(vma->vm_flags & VM_GROWSUP))
+               return -EFAULT;
+
+       mmap_assert_write_locked(mm);
+
+       /* Guard against exceeding limits of the address space. */
+       address &= PAGE_MASK;
+       if (address >= (TASK_SIZE & PAGE_MASK))
+               return -ENOMEM;
+       address += PAGE_SIZE;
+
+       /* Enforce stack_guard_gap */
+       gap_addr = address + stack_guard_gap;
+
+       /* Guard against overflow */
+       if (gap_addr < address || gap_addr > TASK_SIZE)
+               gap_addr = TASK_SIZE;
+
+       next = find_vma_intersection(mm, vma->vm_end, gap_addr);
+       if (next && vma_is_accessible(next)) {
+               if (!(next->vm_flags & VM_GROWSUP))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
+       if (next)
+               vma_iter_prev_range_limit(&vmi, address);
+
+       vma_iter_config(&vmi, vma->vm_start, address);
+       if (vma_iter_prealloc(&vmi, vma))
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma))) {
+               vma_iter_free(&vmi);
+               return -ENOMEM;
+       }
+
+       /* Lock the VMA before expanding to prevent concurrent page faults */
+       vma_start_write(vma);
+       /* We update the anon VMA tree. */
+       anon_vma_lock_write(vma->anon_vma);
+
+       /* Somebody else might have raced and expanded it already */
+       if (address > vma->vm_end) {
+               unsigned long size, grow;
+
+               size = address - vma->vm_start;
+               grow = (address - vma->vm_end) >> PAGE_SHIFT;
+
+               error = -ENOMEM;
+               if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
+                       error = acct_stack_growth(vma, size, grow);
+                       if (!error) {
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags, grow);
+                               anon_vma_interval_tree_pre_update_vma(vma);
+                               vma->vm_end = address;
+                               /* Overwrite old entry in mtree. */
+                               vma_iter_store(&vmi, vma);
+                               anon_vma_interval_tree_post_update_vma(vma);
+
+                               perf_event_mmap(vma);
+                       }
+               }
+       }
+       anon_vma_unlock_write(vma->anon_vma);
+       vma_iter_free(&vmi);
+       validate_mm(mm);
+       return error;
+}
+#endif /* CONFIG_STACK_GROWSUP */
+
+/*
+ * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ * mmap_lock held for writing.
+ */
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *prev;
+       int error = 0;
+       VMA_ITERATOR(vmi, mm, vma->vm_start);
+
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               return -EFAULT;
+
+       mmap_assert_write_locked(mm);
+
+       address &= PAGE_MASK;
+       if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
+               return -EPERM;
+
+       /* Enforce stack_guard_gap */
+       prev = vma_prev(&vmi);
+       /* Check that both stack segments have the same anon_vma? */
+       if (prev) {
+               if (!(prev->vm_flags & VM_GROWSDOWN) &&
+                   vma_is_accessible(prev) &&
+                   (address - prev->vm_end < stack_guard_gap))
+                       return -ENOMEM;
+       }
+
+       if (prev)
+               vma_iter_next_range_limit(&vmi, vma->vm_start);
+
+       vma_iter_config(&vmi, address, vma->vm_end);
+       if (vma_iter_prealloc(&vmi, vma))
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma))) {
+               vma_iter_free(&vmi);
+               return -ENOMEM;
+       }
+
+       /* Lock the VMA before expanding to prevent concurrent page faults */
+       vma_start_write(vma);
+       /* We update the anon VMA tree. */
+       anon_vma_lock_write(vma->anon_vma);
+
+       /* Somebody else might have raced and expanded it already */
+       if (address < vma->vm_start) {
+               unsigned long size, grow;
+
+               size = vma->vm_end - address;
+               grow = (vma->vm_start - address) >> PAGE_SHIFT;
+
+               error = -ENOMEM;
+               if (grow <= vma->vm_pgoff) {
+                       error = acct_stack_growth(vma, size, grow);
+                       if (!error) {
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags, grow);
+                               anon_vma_interval_tree_pre_update_vma(vma);
+                               vma->vm_start = address;
+                               vma->vm_pgoff -= grow;
+                               /* Overwrite old entry in mtree. */
+                               vma_iter_store(&vmi, vma);
+                               anon_vma_interval_tree_post_update_vma(vma);
+
+                               perf_event_mmap(vma);
+                       }
+               }
+       }
+       anon_vma_unlock_write(vma->anon_vma);
+       vma_iter_free(&vmi);
+       validate_mm(mm);
+       return error;
+}
index c60f37d89eb1b63a132355a29611fbee424387a9..6c460a120f827e2c16683e5dc52ac3eca5d7c09f 100644 (file)
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -139,12 +139,6 @@ void validate_mm(struct mm_struct *mm);
 #define validate_mm(mm) do { } while (0)
 #endif
 
-/* Required for expand_downwards(). */
-void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma);
-
-/* Required for expand_downwards(). */
-void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma);
-
 int vma_expand(struct vma_merge_struct *vmg);
 int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
               unsigned long start, unsigned long end, pgoff_t pgoff);
@@ -478,4 +472,10 @@ static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
 
 #endif
 
+#if defined(CONFIG_STACK_GROWSUP)
+int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#endif
+
+int expand_downwards(struct vm_area_struct *vma, unsigned long address);
+
 #endif /* __MM_VMA_H */
index 39ee61e5563492535a1413439d49088745c5cbc9..891d87a9ad6bcb961c2e9551816d0ff746e41927 100644 (file)
@@ -53,6 +53,11 @@ struct task_struct *get_current(void)
        return &__current;
 }
 
+unsigned long rlimit(unsigned int limit)
+{
+       return (unsigned long)-1;
+}
+
 /* Helper function to simply allocate a VMA. */
 static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
                                        unsigned long start,
index 6ad8bd8edaad1bb5be6f376e7762c8a09f1beefa..fab3f3bdf2f03ee321a47a0c210a2e0ec4d6ba68 100644 (file)
@@ -79,6 +79,11 @@ extern unsigned long dac_mmap_min_addr;
 
 #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
 
+#define RLIMIT_STACK           3       /* max stack size */
+#define RLIMIT_MEMLOCK         8       /* max locked-in-memory address space */
+
+#define CAP_IPC_LOCK         14
+
 #ifdef CONFIG_64BIT
 /* VM is sealed, in vm_flags */
 #define VM_SEALED      _BITUL(63)
@@ -478,6 +483,8 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
 
 extern const struct vm_operations_struct vma_dummy_vm_ops;
 
+extern unsigned long rlimit(unsigned int limit);
+
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
        memset(vma, 0, sizeof(*vma));
@@ -1114,4 +1121,59 @@ static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
        return vm_end;
 }
 
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+                                       unsigned long addr, unsigned long len)
+{
+       return 0;
+}
+
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+       return vma->vm_flags & VM_ACCESS_FLAGS;
+}
+
+static inline bool capable(int cap)
+{
+       return true;
+}
+
+static inline bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+                       unsigned long bytes)
+{
+       unsigned long locked_pages, limit_pages;
+
+       if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+               return true;
+
+       locked_pages = bytes >> PAGE_SHIFT;
+       locked_pages += mm->locked_vm;
+
+       limit_pages = rlimit(RLIMIT_MEMLOCK);
+       limit_pages >>= PAGE_SHIFT;
+
+       return locked_pages <= limit_pages;
+}
+
+static inline int __anon_vma_prepare(struct vm_area_struct *vma)
+{
+       struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
+
+       if (!anon_vma)
+               return -ENOMEM;
+
+       anon_vma->root = anon_vma;
+       vma->anon_vma = anon_vma;
+
+       return 0;
+}
+
+static inline int anon_vma_prepare(struct vm_area_struct *vma)
+{
+       if (likely(vma->anon_vma))
+               return 0;
+
+       return __anon_vma_prepare(vma);
+}
+
+
 #endif /* __MM_VMA_INTERNAL_H */