]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/vma: move brk() internals to mm/vma.c
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tue, 3 Dec 2024 18:05:08 +0000 (18:05 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 14 Jan 2025 06:40:42 +0000 (22:40 -0800)
Patch series "mm/vma: make more mmap logic userland testable".

This series carries on the work started in previous series and
continued in commit 52956b0d7fb9 ("mm: isolate mmap internal logic to
mm/vma.c"), moving the remainder of memory mapping implementation
details logic into mm/vma.c allowing the bulk of the mapping logic to
be unit tested.

It is highly useful to do so, as this means we can both fundamentally test
this core logic, and introduce regression tests to ensure any issues
previously resolved do not recur.

Vitally, this includes the do_brk_flags() function, meaning we have both
core means of userland mapping memory now testable.

Performance testing was performed after this change given the brk() system
call's sensitivity to change, and no performance regression was observed.

The stack expansion logic is also moved into mm/vma.c, which necessitates
a change in the API exposed to the exec code, removing the invocation of
the expand_downwards() function used in get_arg_page() and instead adding
mmap_read_lock_maybe_expand() to wrap this.

This patch (of 5):

Now we have moved mmap_region() internals to mm/vma.c, making it available
to userland testing, it makes sense to do the same with brk().

This continues the pattern of VMA heavy lifting being done in mm/vma.c in
an environment where it can be subject to straightforward unit and
regression testing, with other VMA-adjacent files becoming wrappers around
this functionality.

[lorenzo.stoakes@oracle.com: add missing personality header import]
Link: https://lkml.kernel.org/r/2a717265-985f-45eb-9257-8b2857088ed4@lucifer.local
Link: https://lkml.kernel.org/r/cover.1733248985.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/3d24b9e67bb0261539ca921d1188a10a1b4d4357.1733248985.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/mmap.c
mm/vma.c
mm/vma.h
mm/vma_internal.h
tools/testing/vma/vma_internal.h

index aec208f90337c97ea134253726110681ced2f7fe..775db706b82288c520d848666896e9e2e0fca010 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -111,8 +111,7 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
        return mlock_future_ok(current->mm, current->mm->def_flags, len)
                ? 0 : -EAGAIN;
 }
-static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
-               unsigned long addr, unsigned long request, unsigned long flags);
+
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
        unsigned long newbrk, oldbrk, origbrk;
@@ -1512,88 +1511,6 @@ out:
        return ret;
 }
 
-/*
- * do_brk_flags() - Increase the brk vma if the flags match.
- * @vmi: The vma iterator
- * @addr: The start address
- * @len: The length of the increase
- * @vma: The vma,
- * @flags: The VMA Flags
- *
- * Extend the brk VMA from addr to addr + len.  If the VMA is NULL or the flags
- * do not match then create a new anonymous VMA.  Eventually we may be able to
- * do some brk-specific accounting here.
- */
-static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
-               unsigned long addr, unsigned long len, unsigned long flags)
-{
-       struct mm_struct *mm = current->mm;
-
-       /*
-        * Check against address space limits by the changed size
-        * Note: This happens *after* clearing old mappings in some code paths.
-        */
-       flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-       if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
-               return -ENOMEM;
-
-       if (mm->map_count > sysctl_max_map_count)
-               return -ENOMEM;
-
-       if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
-               return -ENOMEM;
-
-       /*
-        * Expand the existing vma if possible; Note that singular lists do not
-        * occur after forking, so the expand will only happen on new VMAs.
-        */
-       if (vma && vma->vm_end == addr) {
-               VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
-
-               vmg.prev = vma;
-               /* vmi is positioned at prev, which this mode expects. */
-               vmg.merge_flags = VMG_FLAG_JUST_EXPAND;
-
-               if (vma_merge_new_range(&vmg))
-                       goto out;
-               else if (vmg_nomem(&vmg))
-                       goto unacct_fail;
-       }
-
-       if (vma)
-               vma_iter_next_range(vmi);
-       /* create a vma struct for an anonymous mapping */
-       vma = vm_area_alloc(mm);
-       if (!vma)
-               goto unacct_fail;
-
-       vma_set_anonymous(vma);
-       vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
-       vm_flags_init(vma, flags);
-       vma->vm_page_prot = vm_get_page_prot(flags);
-       vma_start_write(vma);
-       if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
-               goto mas_store_fail;
-
-       mm->map_count++;
-       validate_mm(mm);
-       ksm_add_vma(vma);
-out:
-       perf_event_mmap(vma);
-       mm->total_vm += len >> PAGE_SHIFT;
-       mm->data_vm += len >> PAGE_SHIFT;
-       if (flags & VM_LOCKED)
-               mm->locked_vm += (len >> PAGE_SHIFT);
-       vm_flags_set(vma, VM_SOFTDIRTY);
-       return 0;
-
-mas_store_fail:
-       vm_area_free(vma);
-unacct_fail:
-       vm_unacct_memory(len >> PAGE_SHIFT);
-       return -ENOMEM;
-}
-
 int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
index bb2119e5a0d07986b411d7dbbe55cc5640e01823..7cd174daeeec68b243f5a7d8754c5cb735bc97c8 100644 (file)
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2481,3 +2481,85 @@ abort_munmap:
        vms_abort_munmap_vmas(&map.vms, &map.mas_detach);
        return error;
 }
+
+/*
+ * do_brk_flags() - Increase the brk vma if the flags match.
+ * @vmi: The vma iterator
+ * @addr: The start address
+ * @len: The length of the increase
+ * @vma: The vma,
+ * @flags: The VMA Flags
+ *
+ * Extend the brk VMA from addr to addr + len.  If the VMA is NULL or the flags
+ * do not match then create a new anonymous VMA.  Eventually we may be able to
+ * do some brk-specific accounting here.
+ */
+int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
+                unsigned long addr, unsigned long len, unsigned long flags)
+{
+       struct mm_struct *mm = current->mm;
+
+       /*
+        * Check against address space limits by the changed size
+        * Note: This happens *after* clearing old mappings in some code paths.
+        */
+       flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+       if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
+               return -ENOMEM;
+
+       if (mm->map_count > sysctl_max_map_count)
+               return -ENOMEM;
+
+       if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
+               return -ENOMEM;
+
+       /*
+        * Expand the existing vma if possible; Note that singular lists do not
+        * occur after forking, so the expand will only happen on new VMAs.
+        */
+       if (vma && vma->vm_end == addr) {
+               VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
+
+               vmg.prev = vma;
+               /* vmi is positioned at prev, which this mode expects. */
+               vmg.merge_flags = VMG_FLAG_JUST_EXPAND;
+
+               if (vma_merge_new_range(&vmg))
+                       goto out;
+               else if (vmg_nomem(&vmg))
+                       goto unacct_fail;
+       }
+
+       if (vma)
+               vma_iter_next_range(vmi);
+       /* create a vma struct for an anonymous mapping */
+       vma = vm_area_alloc(mm);
+       if (!vma)
+               goto unacct_fail;
+
+       vma_set_anonymous(vma);
+       vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
+       vm_flags_init(vma, flags);
+       vma->vm_page_prot = vm_get_page_prot(flags);
+       vma_start_write(vma);
+       if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
+               goto mas_store_fail;
+
+       mm->map_count++;
+       validate_mm(mm);
+       ksm_add_vma(vma);
+out:
+       perf_event_mmap(vma);
+       mm->total_vm += len >> PAGE_SHIFT;
+       mm->data_vm += len >> PAGE_SHIFT;
+       if (flags & VM_LOCKED)
+               mm->locked_vm += (len >> PAGE_SHIFT);
+       vm_flags_set(vma, VM_SOFTDIRTY);
+       return 0;
+
+mas_store_fail:
+       vm_area_free(vma);
+unacct_fail:
+       vm_unacct_memory(len >> PAGE_SHIFT);
+       return -ENOMEM;
+}
index 388d347486744a03c05e78d8235049e64b98c6c6..83a15d3a8285fd597687f82123039ed840a2a3ae 100644 (file)
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -247,6 +247,9 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
                unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
                struct list_head *uf);
 
+int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
+                unsigned long addr, unsigned long request, unsigned long flags);
+
 static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
 {
        /*
index fc5f172a36bd7bc3e94ee570a53da759b1804447..2f05735ff190c4304e9e62cfae85e69d6606fc69 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
+#include <linux/personality.h>
 #include <linux/pfn.h>
 #include <linux/rcupdate.h>
 #include <linux/rmap.h>
index e76ff579e1fdca72a001e8c9ffe269a1a81398ee..7c3c15135c5b43f8a6a1cf8b1a9ca76eabc6b7dd 100644 (file)
@@ -39,6 +39,7 @@
 #define VM_SHARED      0x00000008
 #define VM_MAYREAD     0x00000010
 #define VM_MAYWRITE    0x00000020
+#define VM_MAYEXEC     0x00000040
 #define VM_GROWSDOWN   0x00000100
 #define VM_PFNMAP      0x00000400
 #define VM_LOCKED      0x00002000
 /* This mask represents all the VMA flag bits used by mlock */
 #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
 
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+                                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS  VM_DATA_FLAGS_TSK_EXEC
+
 #ifdef CONFIG_64BIT
 /* VM is sealed, in vm_flags */
 #define VM_SEALED      _BITUL(63)
@@ -122,10 +130,22 @@ enum {
        TASK_COMM_LEN = 16,
 };
 
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+       READ_IMPLIES_EXEC =     0x0400000,
+};
+
 struct task_struct {
        char comm[TASK_COMM_LEN];
        pid_t pid;
        struct mm_struct *mm;
+
+       /* Used for emulating ABI behavior of previous Linux versions: */
+       unsigned int                    personality;
 };
 
 struct task_struct *get_current(void);
@@ -186,6 +206,8 @@ struct mm_struct {
        unsigned long data_vm;     /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
        unsigned long exec_vm;     /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
        unsigned long stack_vm;    /* VM_STACK */
+
+       unsigned long def_flags;
 };
 
 struct vma_lock {