]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Jun 2017 15:09:35 +0000 (23:09 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Jun 2017 15:09:35 +0000 (23:09 +0800)
added patches:
mm-larger-stack-guard-gap-between-vmas.patch

queue-3.18/mm-larger-stack-guard-gap-between-vmas.patch [new file with mode: 0644]
queue-3.18/series

diff --git a/queue-3.18/mm-larger-stack-guard-gap-between-vmas.patch b/queue-3.18/mm-larger-stack-guard-gap-between-vmas.patch
new file mode 100644 (file)
index 0000000..f734226
--- /dev/null
@@ -0,0 +1,828 @@
+From 1be7107fbe18eed3e319a6c3e83c78254b693acb Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 19 Jun 2017 04:03:24 -0700
+Subject: mm: larger stack guard gap, between vmas
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream.
+
+Stack guard page is a useful feature to reduce a risk of stack smashing
+into a different mapping. We have been using a single page gap which
+is sufficient to prevent having stack adjacent to a different mapping.
+But this seems to be insufficient in the light of the stack usage in
+userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
+used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
+which is 256kB or stack strings with MAX_ARG_STRLEN.
+
+This will become especially dangerous for suid binaries and the default
+no limit for the stack size limit because those applications can be
+tricked to consume a large portion of the stack and a single glibc call
+could jump over the guard page. These attacks are not theoretical,
+unfortunatelly.
+
+Make those attacks less probable by increasing the stack guard gap
+to 1MB (on systems with 4k pages; but make it depend on the page size
+because systems with larger base pages might cap stack allocations in
+the PAGE_SIZE units) which should cover larger alloca() and VLA stack
+allocations. It is obviously not a full fix because the problem is
+somehow inherent, but it should reduce attack space a lot.
+
+One could argue that the gap size should be configurable from userspace,
+but that can be done later when somebody finds that the new 1MB is wrong
+for some special case applications.  For now, add a kernel command line
+option (stack_guard_gap) to specify the stack gap size (in page units).
+
+Implementation wise, first delete all the old code for stack guard page:
+because although we could get away with accounting one extra page in a
+stack vma, accounting a larger gap can break userspace - case in point,
+a program run with "ulimit -S -v 20000" failed when the 1MB gap was
+counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
+and strict non-overcommit mode.
+
+Instead of keeping gap inside the stack vma, maintain the stack guard
+gap as a gap between vmas: using vm_start_gap() in place of vm_start
+(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
+places which need to respect the gap - mainly arch_get_unmapped_area(),
+and and the vma tree's subtree_gap support for that.
+
+Original-patch-by: Oleg Nesterov <oleg@redhat.com>
+Original-patch-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Tested-by: Helge Deller <deller@gmx.de> # parisc
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[wt: backport to 4.11: adjust context]
+[wt: backport to 4.9: adjust context ; kernel doc was not in admin-guide]
+[wt: backport to 4.4: adjust context ; drop ppc hugetlb_radix changes]
+[wt: backport to 3.18: adjust context ; no FOLL_POPULATE ;
+     s390 uses generic arch_get_unmapped_area()]
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    7 +
+ arch/arc/mm/mmap.c                  |    2 
+ arch/arm/mm/mmap.c                  |    4 
+ arch/frv/mm/elf-fdpic.c             |    2 
+ arch/mips/mm/mmap.c                 |    2 
+ arch/parisc/kernel/sys_parisc.c     |   15 ++-
+ arch/powerpc/mm/slice.c             |    2 
+ arch/sh/mm/mmap.c                   |    4 
+ arch/sparc/kernel/sys_sparc_64.c    |    4 
+ arch/sparc/mm/hugetlbpage.c         |    2 
+ arch/tile/mm/hugetlbpage.c          |    2 
+ arch/x86/kernel/sys_x86_64.c        |    4 
+ arch/x86/mm/hugetlbpage.c           |    2 
+ arch/xtensa/kernel/syscall.c        |    2 
+ fs/hugetlbfs/inode.c                |    2 
+ fs/proc/task_mmu.c                  |    4 
+ include/linux/mm.h                  |   57 ++++++-------
+ mm/gup.c                            |    6 -
+ mm/memory.c                         |   38 ---------
+ mm/mmap.c                           |  147 +++++++++++++++++++++---------------
+ 20 files changed, 148 insertions(+), 160 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -3324,6 +3324,13 @@ bytes respectively. Such letter suffixes
+       spia_pedr=
+       spia_peddr=
++      stack_guard_gap=        [MM]
++                      override the default stack gap protection. The value
++                      is in page units and it defines how many pages prior
++                      to (for stacks growing down) resp. after (for stacks
++                      growing up) the main stack are reserved for no other
++                      mapping. Default value is 256 pages.
++
+       stacktrace      [FTRACE]
+                       Enabled the stack tracer on boot up.
+--- a/arch/arc/mm/mmap.c
++++ b/arch/arc/mm/mmap.c
+@@ -64,7 +64,7 @@ arch_get_unmapped_area(struct file *filp
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/arm/mm/mmap.c
++++ b/arch/arm/mm/mmap.c
+@@ -89,7 +89,7 @@ arch_get_unmapped_area(struct file *filp
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+@@ -140,7 +140,7 @@ arch_get_unmapped_area_topdown(struct fi
+                       addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                              (!vma || addr + len <= vma->vm_start))
++                              (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/frv/mm/elf-fdpic.c
++++ b/arch/frv/mm/elf-fdpic.c
+@@ -74,7 +74,7 @@ unsigned long arch_get_unmapped_area(str
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(current->mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       goto success;
+       }
+--- a/arch/mips/mm/mmap.c
++++ b/arch/mips/mm/mmap.c
+@@ -92,7 +92,7 @@ static unsigned long arch_get_unmapped_a
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(str
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+       struct mm_struct *mm = current->mm;
+-      struct vm_area_struct *vma;
++      struct vm_area_struct *vma, *prev;
+       unsigned long task_size = TASK_SIZE;
+       int do_color_align, last_mmap;
+       struct vm_unmapped_area_info info;
+@@ -115,9 +115,10 @@ unsigned long arch_get_unmapped_area(str
+               else
+                       addr = PAGE_ALIGN(addr);
+-              vma = find_vma(mm, addr);
++              vma = find_vma_prev(mm, addr, &prev);
+               if (task_size - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)) &&
++                  (!prev || addr >= vm_end_gap(prev)))
+                       goto found_addr;
+       }
+@@ -141,7 +142,7 @@ arch_get_unmapped_area_topdown(struct fi
+                         const unsigned long len, const unsigned long pgoff,
+                         const unsigned long flags)
+ {
+-      struct vm_area_struct *vma;
++      struct vm_area_struct *vma, *prev;
+       struct mm_struct *mm = current->mm;
+       unsigned long addr = addr0;
+       int do_color_align, last_mmap;
+@@ -175,9 +176,11 @@ arch_get_unmapped_area_topdown(struct fi
+                       addr = COLOR_ALIGN(addr, last_mmap, pgoff);
+               else
+                       addr = PAGE_ALIGN(addr);
+-              vma = find_vma(mm, addr);
++
++              vma = find_vma_prev(mm, addr, &prev);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)) &&
++                  (!prev || addr >= vm_end_gap(prev)))
+                       goto found_addr;
+       }
+--- a/arch/powerpc/mm/slice.c
++++ b/arch/powerpc/mm/slice.c
+@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_
+       if ((mm->task_size - len) < addr)
+               return 0;
+       vma = find_vma(mm, addr);
+-      return (!vma || (addr + len) <= vma->vm_start);
++      return (!vma || (addr + len) <= vm_start_gap(vma));
+ }
+ static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+--- a/arch/sh/mm/mmap.c
++++ b/arch/sh/mm/mmap.c
+@@ -63,7 +63,7 @@ unsigned long arch_get_unmapped_area(str
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+@@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct fi
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/sparc/kernel/sys_sparc_64.c
++++ b/arch/sparc/kernel/sys_sparc_64.c
+@@ -118,7 +118,7 @@ unsigned long arch_get_unmapped_area(str
+               vma = find_vma(mm, addr);
+               if (task_size - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+@@ -181,7 +181,7 @@ arch_get_unmapped_area_topdown(struct fi
+               vma = find_vma(mm, addr);
+               if (task_size - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -115,7 +115,7 @@ hugetlb_get_unmapped_area(struct file *f
+               addr = ALIGN(addr, HPAGE_SIZE);
+               vma = find_vma(mm, addr);
+               if (task_size - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+       if (mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/tile/mm/hugetlbpage.c
++++ b/arch/tile/mm/hugetlbpage.c
+@@ -237,7 +237,7 @@ unsigned long hugetlb_get_unmapped_area(
+               addr = ALIGN(addr, huge_page_size(h));
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+       if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/x86/kernel/sys_x86_64.c
++++ b/arch/x86/kernel/sys_x86_64.c
+@@ -127,7 +127,7 @@ arch_get_unmapped_area(struct file *filp
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (end - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+@@ -166,7 +166,7 @@ arch_get_unmapped_area_topdown(struct fi
+               addr = PAGE_ALIGN(addr);
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                              (!vma || addr + len <= vma->vm_start))
++                              (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -144,7 +144,7 @@ hugetlb_get_unmapped_area(struct file *f
+               addr = ALIGN(addr, huge_page_size(h));
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+       if (mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/xtensa/kernel/syscall.c
++++ b/arch/xtensa/kernel/syscall.c
+@@ -86,7 +86,7 @@ unsigned long arch_get_unmapped_area(str
+               /* At this point:  (!vmm || addr < vmm->vm_end). */
+               if (TASK_SIZE - len < addr)
+                       return -ENOMEM;
+-              if (!vmm || addr + len <= vmm->vm_start)
++              if (!vmm || addr + len <= vm_start_gap(vmm))
+                       return addr;
+               addr = vmm->vm_end;
+               if (flags & MAP_SHARED)
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -171,7 +171,7 @@ hugetlb_get_unmapped_area(struct file *f
+               addr = ALIGN(addr, huge_page_size(h));
+               vma = find_vma(mm, addr);
+               if (TASK_SIZE - len >= addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)))
+                       return addr;
+       }
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -284,11 +284,7 @@ show_map_vma(struct seq_file *m, struct
+       /* We don't show the stack guard page in /proc/maps */
+       start = vma->vm_start;
+-      if (stack_guard_page_start(vma, start))
+-              start += PAGE_SIZE;
+       end = vma->vm_end;
+-      if (stack_guard_page_end(vma, end))
+-              end -= PAGE_SIZE;
+       seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+       seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1242,37 +1242,6 @@ int set_page_dirty_lock(struct page *pag
+ int clear_page_dirty_for_io(struct page *page);
+ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
+-/* Is the vma a continuation of the stack vma above it? */
+-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
+-{
+-      return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+-}
+-
+-static inline int stack_guard_page_start(struct vm_area_struct *vma,
+-                                           unsigned long addr)
+-{
+-      return (vma->vm_flags & VM_GROWSDOWN) &&
+-              (vma->vm_start == addr) &&
+-              !vma_growsdown(vma->vm_prev, addr);
+-}
+-
+-/* Is the vma a continuation of the stack vma below it? */
+-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+-{
+-      return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+-}
+-
+-static inline int stack_guard_page_end(struct vm_area_struct *vma,
+-                                         unsigned long addr)
+-{
+-      return (vma->vm_flags & VM_GROWSUP) &&
+-              (vma->vm_end == addr) &&
+-              !vma_growsup(vma->vm_next, addr);
+-}
+-
+-extern struct task_struct *task_of_stack(struct task_struct *task,
+-                              struct vm_area_struct *vma, bool in_group);
+-
+ extern unsigned long move_page_tables(struct vm_area_struct *vma,
+               unsigned long old_addr, struct vm_area_struct *new_vma,
+               unsigned long new_addr, unsigned long len,
+@@ -1928,7 +1897,7 @@ void page_cache_async_readahead(struct a
+                               pgoff_t offset,
+                               unsigned long size);
+-unsigned long max_sane_readahead(unsigned long nr);
++extern unsigned long stack_guard_gap;
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+@@ -1958,6 +1927,30 @@ static inline struct vm_area_struct * fi
+       return vma;
+ }
++static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
++{
++      unsigned long vm_start = vma->vm_start;
++
++      if (vma->vm_flags & VM_GROWSDOWN) {
++              vm_start -= stack_guard_gap;
++              if (vm_start > vma->vm_start)
++                      vm_start = 0;
++      }
++      return vm_start;
++}
++
++static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
++{
++      unsigned long vm_end = vma->vm_end;
++
++      if (vma->vm_flags & VM_GROWSUP) {
++              vm_end += stack_guard_gap;
++              if (vm_end < vma->vm_end)
++                      vm_end = -PAGE_SIZE;
++      }
++      return vm_end;
++}
++
+ static inline unsigned long vma_pages(struct vm_area_struct *vma)
+ {
+       return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -275,10 +275,8 @@ static int faultin_page(struct task_stru
+       unsigned int fault_flags = 0;
+       int ret;
+-      /* For mlock, just skip the stack guard page. */
+-      if ((*flags & FOLL_MLOCK) &&
+-                      (stack_guard_page_start(vma, address) ||
+-                       stack_guard_page_end(vma, address + PAGE_SIZE)))
++      /* mlock all present pages, but do not fault in new pages */
++      if (*flags & FOLL_MLOCK)
+               return -ENOENT;
+       if (*flags & FOLL_WRITE)
+               fault_flags |= FAULT_FLAG_WRITE;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2580,40 +2580,6 @@ out_release:
+ }
+ /*
+- * This is like a special single-page "expand_{down|up}wards()",
+- * except we must first make sure that 'address{-|+}PAGE_SIZE'
+- * doesn't hit another vma.
+- */
+-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+-{
+-      address &= PAGE_MASK;
+-      if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+-              struct vm_area_struct *prev = vma->vm_prev;
+-
+-              /*
+-               * Is there a mapping abutting this one below?
+-               *
+-               * That's only ok if it's the same stack mapping
+-               * that has gotten split..
+-               */
+-              if (prev && prev->vm_end == address)
+-                      return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+-
+-              return expand_downwards(vma, address - PAGE_SIZE);
+-      }
+-      if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+-              struct vm_area_struct *next = vma->vm_next;
+-
+-              /* As VM_GROWSDOWN but s/below/above/ */
+-              if (next && next->vm_start == address + PAGE_SIZE)
+-                      return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+-
+-              return expand_upwards(vma, address + PAGE_SIZE);
+-      }
+-      return 0;
+-}
+-
+-/*
+  * We enter with non-exclusive mmap_sem (to exclude vma changes,
+  * but allow concurrent faults), and pte mapped but not yet locked.
+  * We return with mmap_sem still held, but pte unmapped and unlocked.
+@@ -2633,10 +2599,6 @@ static int do_anonymous_page(struct mm_s
+       if (vma->vm_flags & VM_SHARED)
+               return VM_FAULT_SIGBUS;
+-      /* Check if we need to add a guard page to the stack */
+-      if (check_stack_guard_page(vma, address) < 0)
+-              return VM_FAULT_SIGSEGV;
+-
+       /* Use the zero-page for reads */
+       if (!(flags & FAULT_FLAG_WRITE)) {
+               entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -290,6 +290,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+       unsigned long retval;
+       unsigned long newbrk, oldbrk;
+       struct mm_struct *mm = current->mm;
++      struct vm_area_struct *next;
+       unsigned long min_brk;
+       bool populate;
+@@ -334,7 +335,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+       }
+       /* Check against existing mmap mappings. */
+-      if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
++      next = find_vma(mm, oldbrk);
++      if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
+               goto out;
+       /* Ok, looks good - let it rip. */
+@@ -357,10 +359,22 @@ out:
+ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+ {
+-      unsigned long max, subtree_gap;
+-      max = vma->vm_start;
+-      if (vma->vm_prev)
+-              max -= vma->vm_prev->vm_end;
++      unsigned long max, prev_end, subtree_gap;
++
++      /*
++       * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
++       * allow two stack_guard_gaps between them here, and when choosing
++       * an unmapped area; whereas when expanding we only require one.
++       * That's a little inconsistent, but keeps the code here simpler.
++       */
++      max = vm_start_gap(vma);
++      if (vma->vm_prev) {
++              prev_end = vm_end_gap(vma->vm_prev);
++              if (max > prev_end)
++                      max -= prev_end;
++              else
++                      max = 0;
++      }
+       if (vma->vm_rb.rb_left) {
+               subtree_gap = rb_entry(vma->vm_rb.rb_left,
+                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
+@@ -453,7 +467,7 @@ static void validate_mm(struct mm_struct
+                       anon_vma_unlock_read(anon_vma);
+               }
+-              highest_address = vma->vm_end;
++              highest_address = vm_end_gap(vma);
+               vma = vma->vm_next;
+               i++;
+       }
+@@ -622,7 +636,7 @@ void __vma_link_rb(struct mm_struct *mm,
+       if (vma->vm_next)
+               vma_gap_update(vma->vm_next);
+       else
+-              mm->highest_vm_end = vma->vm_end;
++              mm->highest_vm_end = vm_end_gap(vma);
+       /*
+        * vma->vm_prev wasn't known when we followed the rbtree to find the
+@@ -874,7 +888,7 @@ again:                     remove_next = 1 + (end > next->
+                       vma_gap_update(vma);
+               if (end_changed) {
+                       if (!next)
+-                              mm->highest_vm_end = end;
++                              mm->highest_vm_end = vm_end_gap(vma);
+                       else if (!adjust_next)
+                               vma_gap_update(next);
+               }
+@@ -1740,7 +1754,7 @@ unsigned long unmapped_area(struct vm_un
+       while (true) {
+               /* Visit left subtree if it looks promising */
+-              gap_end = vma->vm_start;
++              gap_end = vm_start_gap(vma);
+               if (gap_end >= low_limit && vma->vm_rb.rb_left) {
+                       struct vm_area_struct *left =
+                               rb_entry(vma->vm_rb.rb_left,
+@@ -1751,7 +1765,7 @@ unsigned long unmapped_area(struct vm_un
+                       }
+               }
+-              gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++              gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ check_current:
+               /* Check if current node has a suitable gap */
+               if (gap_start > high_limit)
+@@ -1778,8 +1792,8 @@ check_current:
+                       vma = rb_entry(rb_parent(prev),
+                                      struct vm_area_struct, vm_rb);
+                       if (prev == vma->vm_rb.rb_left) {
+-                              gap_start = vma->vm_prev->vm_end;
+-                              gap_end = vma->vm_start;
++                              gap_start = vm_end_gap(vma->vm_prev);
++                              gap_end = vm_start_gap(vma);
+                               goto check_current;
+                       }
+               }
+@@ -1843,7 +1857,7 @@ unsigned long unmapped_area_topdown(stru
+       while (true) {
+               /* Visit right subtree if it looks promising */
+-              gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++              gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+               if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+                       struct vm_area_struct *right =
+                               rb_entry(vma->vm_rb.rb_right,
+@@ -1856,7 +1870,7 @@ unsigned long unmapped_area_topdown(stru
+ check_current:
+               /* Check if current node has a suitable gap */
+-              gap_end = vma->vm_start;
++              gap_end = vm_start_gap(vma);
+               if (gap_end < low_limit)
+                       return -ENOMEM;
+               if (gap_start <= high_limit && gap_end - gap_start >= length)
+@@ -1882,7 +1896,7 @@ check_current:
+                                      struct vm_area_struct, vm_rb);
+                       if (prev == vma->vm_rb.rb_right) {
+                               gap_start = vma->vm_prev ?
+-                                      vma->vm_prev->vm_end : 0;
++                                      vm_end_gap(vma->vm_prev) : 0;
+                               goto check_current;
+                       }
+               }
+@@ -1920,7 +1934,7 @@ arch_get_unmapped_area(struct file *filp
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+       struct mm_struct *mm = current->mm;
+-      struct vm_area_struct *vma;
++      struct vm_area_struct *vma, *prev;
+       struct vm_unmapped_area_info info;
+       if (len > TASK_SIZE - mmap_min_addr)
+@@ -1931,9 +1945,10 @@ arch_get_unmapped_area(struct file *filp
+       if (addr) {
+               addr = PAGE_ALIGN(addr);
+-              vma = find_vma(mm, addr);
++              vma = find_vma_prev(mm, addr, &prev);
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-                  (!vma || addr + len <= vma->vm_start))
++                  (!vma || addr + len <= vm_start_gap(vma)) &&
++                  (!prev || addr >= vm_end_gap(prev)))
+                       return addr;
+       }
+@@ -1956,7 +1971,7 @@ arch_get_unmapped_area_topdown(struct fi
+                         const unsigned long len, const unsigned long pgoff,
+                         const unsigned long flags)
+ {
+-      struct vm_area_struct *vma;
++      struct vm_area_struct *vma, *prev;
+       struct mm_struct *mm = current->mm;
+       unsigned long addr = addr0;
+       struct vm_unmapped_area_info info;
+@@ -1971,9 +1986,10 @@ arch_get_unmapped_area_topdown(struct fi
+       /* requesting a specific address */
+       if (addr) {
+               addr = PAGE_ALIGN(addr);
+-              vma = find_vma(mm, addr);
++              vma = find_vma_prev(mm, addr, &prev);
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-                              (!vma || addr + len <= vma->vm_start))
++                              (!vma || addr + len <= vm_start_gap(vma)) &&
++                              (!prev || addr >= vm_end_gap(prev)))
+                       return addr;
+       }
+@@ -2099,21 +2115,19 @@ find_vma_prev(struct mm_struct *mm, unsi
+  * update accounting. This is shared with both the
+  * grow-up and grow-down cases.
+  */
+-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
++static int acct_stack_growth(struct vm_area_struct *vma,
++                           unsigned long size, unsigned long grow)
+ {
+       struct mm_struct *mm = vma->vm_mm;
+       struct rlimit *rlim = current->signal->rlim;
+-      unsigned long new_start, actual_size;
++      unsigned long new_start;
+       /* address space limit tests */
+       if (!may_expand_vm(mm, grow))
+               return -ENOMEM;
+       /* Stack limit test */
+-      actual_size = size;
+-      if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+-              actual_size -= PAGE_SIZE;
+-      if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
++      if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+               return -ENOMEM;
+       /* mlock limit tests */
+@@ -2154,17 +2168,30 @@ static int acct_stack_growth(struct vm_a
+  */
+ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
++      struct vm_area_struct *next;
++      unsigned long gap_addr;
+       int error = 0;
+       if (!(vma->vm_flags & VM_GROWSUP))
+               return -EFAULT;
+       /* Guard against wrapping around to address 0. */
+-      if (address < PAGE_ALIGN(address+4))
+-              address = PAGE_ALIGN(address+4);
+-      else
++      address &= PAGE_MASK;
++      address += PAGE_SIZE;
++      if (!address)
+               return -ENOMEM;
++      /* Enforce stack_guard_gap */
++      gap_addr = address + stack_guard_gap;
++      if (gap_addr < address)
++              return -ENOMEM;
++      next = vma->vm_next;
++      if (next && next->vm_start < gap_addr) {
++              if (!(next->vm_flags & VM_GROWSUP))
++                      return -ENOMEM;
++              /* Check that both stack segments have the same anon_vma? */
++      }
++
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
+@@ -2205,7 +2232,7 @@ int expand_upwards(struct vm_area_struct
+                               if (vma->vm_next)
+                                       vma_gap_update(vma->vm_next);
+                               else
+-                                      vma->vm_mm->highest_vm_end = address;
++                                      vma->vm_mm->highest_vm_end = vm_end_gap(vma);
+                               spin_unlock(&vma->vm_mm->page_table_lock);
+                               perf_event_mmap(vma);
+@@ -2225,6 +2252,8 @@ int expand_upwards(struct vm_area_struct
+ int expand_downwards(struct vm_area_struct *vma,
+                                  unsigned long address)
+ {
++      struct vm_area_struct *prev;
++      unsigned long gap_addr;
+       int error;
+       address &= PAGE_MASK;
+@@ -2232,6 +2261,17 @@ int expand_downwards(struct vm_area_stru
+       if (error)
+               return error;
++      /* Enforce stack_guard_gap */
++      gap_addr = address - stack_guard_gap;
++      if (gap_addr > address)
++              return -ENOMEM;
++      prev = vma->vm_prev;
++      if (prev && prev->vm_end > gap_addr) {
++              if (!(prev->vm_flags & VM_GROWSDOWN))
++                      return -ENOMEM;
++              /* Check that both stack segments have the same anon_vma? */
++      }
++
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
+@@ -2283,28 +2323,25 @@ int expand_downwards(struct vm_area_stru
+       return error;
+ }
+-/*
+- * Note how expand_stack() refuses to expand the stack all the way to
+- * abut the next virtual mapping, *unless* that mapping itself is also
+- * a stack mapping. We want to leave room for a guard page, after all
+- * (the guard page itself is not added here, that is done by the
+- * actual page faulting logic)
+- *
+- * This matches the behavior of the guard page logic (see mm/memory.c:
+- * check_stack_guard_page()), which only allows the guard page to be
+- * removed under these circumstances.
+- */
++/* enforced gap between the expanding stack and other mappings. */
++unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
++
++static int __init cmdline_parse_stack_guard_gap(char *p)
++{
++      unsigned long val;
++      char *endptr;
++
++      val = simple_strtoul(p, &endptr, 10);
++      if (!*endptr)
++              stack_guard_gap = val << PAGE_SHIFT;
++
++      return 0;
++}
++__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
++
+ #ifdef CONFIG_STACK_GROWSUP
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-      struct vm_area_struct *next;
+-
+-      address &= PAGE_MASK;
+-      next = vma->vm_next;
+-      if (next && next->vm_start == address + PAGE_SIZE) {
+-              if (!(next->vm_flags & VM_GROWSUP))
+-                      return -ENOMEM;
+-      }
+       return expand_upwards(vma, address);
+ }
+@@ -2326,14 +2363,6 @@ find_extend_vma(struct mm_struct *mm, un
+ #else
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-      struct vm_area_struct *prev;
+-
+-      address &= PAGE_MASK;
+-      prev = vma->vm_prev;
+-      if (prev && prev->vm_end == address) {
+-              if (!(prev->vm_flags & VM_GROWSDOWN))
+-                      return -ENOMEM;
+-      }
+       return expand_downwards(vma, address);
+ }
+@@ -2429,7 +2458,7 @@ detach_vmas_to_be_unmapped(struct mm_str
+               vma->vm_prev = prev;
+               vma_gap_update(vma);
+       } else
+-              mm->highest_vm_end = prev ? prev->vm_end : 0;
++              mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+       tail_vma->vm_next = NULL;
+       /* Kill the cache */
index 08b37b547c3c406cfb79bff2a5b35b58068c84bd..780af286a9cba957c8ee260ace4ccd9d3a6f104b 100644 (file)
@@ -29,3 +29,4 @@ mm-memory-failure.c-use-compound_head-flags-for-huge-pages.patch
 swap-cond_resched-in-swap_cgroup_prepare.patch
 genirq-release-resources-in-__setup_irq-error-path.patch
 alarmtimer-rate-limit-periodic-intervals.patch
+mm-larger-stack-guard-gap-between-vmas.patch