From: Andrea Arcangeli Subject: avoid silent stack overflow over the heap Patch-mainline: no References: bnc#44807 bnc#211997 x Signed-off-by: Andrea Arcangeli Updated-by: Jeff Mahoney --- arch/alpha/mm/fault.c | 2 - arch/arm/mm/fault.c | 2 - arch/cris/mm/fault.c | 2 - arch/frv/mm/fault.c | 2 - arch/ia64/mm/fault.c | 2 - arch/m32r/mm/fault.c | 2 - arch/m68k/mm/fault.c | 2 - arch/mips/mm/fault.c | 2 - arch/parisc/mm/fault.c | 2 - arch/powerpc/mm/fault.c | 5 ++- arch/powerpc/platforms/cell/spu_fault.c | 2 - arch/s390/mm/fault.c | 2 - arch/sh/mm/fault_32.c | 2 - arch/sh/mm/tlbflush_64.c | 2 - arch/sparc/mm/fault.c | 4 +-- arch/sparc64/mm/fault.c | 2 - arch/um/kernel/trap.c | 9 ++++-- arch/x86/kernel/sys_x86_64.c | 10 ++++++- arch/x86/mm/fault.c | 10 ++++++- arch/xtensa/mm/fault.c | 2 - include/linux/mm.h | 5 +++ kernel/sysctl.c | 8 ++++++ mm/mmap.c | 42 +++++++++++++++++++++++++------- 23 files changed, 88 insertions(+), 35 deletions(-) --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -123,7 +123,7 @@ do_page_fault(unsigned long address, uns goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* Ok, we have a good vm_area for this memory access, so --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -233,7 +233,7 @@ out_of_memory: goto survive; check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr, NULL)) goto good_area; out: return fault; --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -133,7 +133,7 @@ do_page_fault(unsigned long address, str if (address + PAGE_SIZE < rdusp()) goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -121,7 +121,7 @@ asmlinkage void do_page_fault(int datamm } } - if (expand_stack(vma, ear0)) + if (expand_stack(vma, ear0, NULL)) goto bad_area; /* --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -185,7 +185,7 @@ ia64_do_page_fault (unsigned long addres if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) || REGION_OFFSET(address) >= RGN_MAP_LIMIT) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL /* FIXME? */)) goto bad_area; } else { vma = prev_vma; --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -159,7 +159,7 @@ asmlinkage void do_page_fault(struct pt_ goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -121,7 +121,7 @@ int do_page_fault(struct pt_regs *regs, if (address + 256 < rdusp()) goto map_err; } - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto map_err; /* --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -80,7 +80,7 @@ asmlinkage void do_page_fault(struct pt_ goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -196,7 +196,7 @@ good_area: check_expansion: vma = prev_vma; - if (vma && (expand_stack(vma, address) == 0)) + if (vma && (expand_stack(vma, address, NULL) == 0)) goto good_area; /* --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -116,7 +116,7 @@ static int store_updates_sp(struct pt_re int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { - struct vm_area_struct * vma; + struct vm_area_struct * vma, * prev_vma; struct mm_struct *mm = current->mm; siginfo_t info; int code = SEGV_MAPERR; @@ -230,7 +230,8 @@ int __kprobes do_page_fault(struct pt_re && (!user_mode(regs) || !store_updates_sp(regs))) goto bad_area; } - if (expand_stack(vma, address)) + find_vma_prev(mm, address, &prev_vma); + if (expand_stack(vma, address, prev_vma)) goto bad_area; good_area: --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/platforms/cell/spu_fault.c @@ -59,7 +59,7 @@ int spu_handle_mm_fault(struct mm_struct goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, ea)) + if (expand_stack(vma, ea, NULL)) goto bad_area; good_area: is_write = dsisr & MFC_DSISR_ACCESS_PUT; --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -350,7 +350,7 @@ do_exception(struct pt_regs *regs, unsig goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL /* FIXME? */)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -108,7 +108,7 @@ asmlinkage void __kprobes do_page_fault( goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -153,7 +153,7 @@ asmlinkage void do_page_fault(struct pt_ #endif goto bad_area; } - if (expand_stack(vma, address)) { + if (expand_stack(vma, address, NULL)) { #ifdef DEBUG_FAULT print_task(tsk); printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n", --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c @@ -219,7 +219,7 @@ asmlinkage void do_sparc_fault(struct pt goto good_area; if(!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if(expand_stack(vma, address)) + if(expand_stack(vma, address, NULL)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so @@ -472,7 +472,7 @@ static void force_user_fault(unsigned lo goto good_area; if(!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if(expand_stack(vma, address)) + if(expand_stack(vma, address, NULL)) goto bad_area; good_area: info.si_code = SEGV_ACCERR; --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -367,7 +367,7 @@ continue_fault: goto bad_area; } } - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -24,7 +24,7 @@ int handle_page_fault(unsigned long addr int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev_vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -50,8 +50,11 @@ int handle_page_fault(unsigned long addr goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; - else if (expand_stack(vma, address)) - goto out; + else { + find_vma_prev(mm, address, &prev_vma); + if(expand_stack(vma, address, prev_vma)) + goto out; + } good_area: *code_out = SEGV_ACCERR; --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -106,6 +106,7 @@ arch_get_unmapped_area(struct file *filp full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + unsigned long __heap_stack_gap; /* At this point: (!vma || addr < vma->vm_end). */ if (end - len < addr) { /* @@ -119,7 +120,14 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + __heap_stack_gap = 0; + if (vma->vm_flags & VM_GROWSDOWN) + __heap_stack_gap = min(end-(addr+len), + (unsigned long) heap_stack_gap << PAGE_SHIFT); + if (addr + len + __heap_stack_gap <= vma->vm_start) { + got_it: /* * Remember the place where we stopped the search: */ --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -585,7 +585,7 @@ void __kprobes do_page_fault(struct pt_r { struct task_struct *tsk; struct mm_struct *mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev_vma; unsigned long address; int write, si_code; int fault; @@ -719,7 +719,13 @@ again: if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) goto bad_area; } - if (expand_stack(vma, address)) + /* + * find_vma_prev is just a bit slower, because it cannot + * use the mmap_cache, so we run it only in the growsdown + * slow path and we leave find_vma in the fast path. + */ + find_vma_prev(current->mm, address, &prev_vma); + if (expand_stack(vma, address, prev_vma)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -80,7 +80,7 @@ void do_page_fault(struct pt_regs *regs) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(vma, address, NULL)) goto bad_area; /* Ok, we have a good vm_area for this memory access, so --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1208,7 +1208,10 @@ void page_cache_async_readahead(struct a unsigned long max_sane_readahead(unsigned long nr); /* Do stack extension */ -extern int expand_stack(struct vm_area_struct *vma, unsigned long address); +#define EXPAND_STACK_HAS_3_ARGS +extern int heap_stack_gap; +extern int expand_stack(struct vm_area_struct * vma, unsigned long address, + struct vm_area_struct * prev_vma); #ifdef CONFIG_IA64 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #endif --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1208,6 +1208,14 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "heap-stack-gap", + .data = &heap_stack_gap, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt --- a/mm/mmap.c +++ b/mm/mmap.c @@ -85,6 +85,7 @@ int sysctl_overcommit_memory = OVERCOMMI int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +int heap_stack_gap = 1; /* * Check that a process has enough memory to allocate a new virtual @@ -1291,6 +1292,7 @@ arch_get_unmapped_area(struct file *filp full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ + unsigned long __heap_stack_gap; if (TASK_SIZE - len < addr) { /* * Start a new search - just in case we missed @@ -1304,7 +1306,14 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + __heap_stack_gap = 0; + if (vma->vm_flags & VM_GROWSDOWN) + __heap_stack_gap = min(TASK_SIZE-(addr+len), + (unsigned long) heap_stack_gap << PAGE_SHIFT); + if (addr + len + __heap_stack_gap <= vma->vm_start) { + got_it: /* * Remember the place where we stopped the search: */ @@ -1633,11 +1642,9 @@ int expand_upwards(struct vm_area_struct } #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ -/* - * vma is the first one with address < vma->vm_start. Have to extend vma. - */ static inline int expand_downwards(struct vm_area_struct *vma, - unsigned long address) + unsigned long address, + struct vm_area_struct *prev_vma) { int error; @@ -1665,6 +1672,13 @@ static inline int expand_downwards(struc if (address < vma->vm_start) { unsigned long size, grow; + error = -ENOMEM; + if (prev_vma) { + unsigned long __heap_stack_gap = min(TASK_SIZE-prev_vma->vm_end, + (unsigned long) heap_stack_gap << PAGE_SHIFT); + if (unlikely(prev_vma->vm_end + __heap_stack_gap > address)) + goto out_unlock; + } size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; @@ -1674,6 +1688,7 @@ static inline int expand_downwards(struc vma->vm_pgoff -= grow; } } + out_unlock: anon_vma_unlock(vma); return error; } @@ -1684,8 +1699,16 @@ int expand_stack_downwards(struct vm_are } #ifdef CONFIG_STACK_GROWSUP -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack(struct vm_area_struct * vma, unsigned long address, + struct vm_area_struct * prev_vma) { + /* + * If you re-use the heap-stack-gap for a growsup stack you + * should implement the feature for growsup too and remove + * this WARN_ON. + */ + WARN_ON(prev_vma); + return expand_upwards(vma, address); } @@ -1698,7 +1721,7 @@ find_extend_vma(struct mm_struct *mm, un vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + if (!prev || expand_stack(prev, addr, NULL)) return NULL; if (prev->vm_flags & VM_LOCKED) make_pages_present(addr, prev->vm_end); @@ -1713,7 +1736,7 @@ int expand_stack(struct vm_area_struct * struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr) { - struct vm_area_struct * vma; + struct vm_area_struct * vma, * prev_vma; unsigned long start; addr &= PAGE_MASK; @@ -1725,7 +1748,8 @@ find_extend_vma(struct mm_struct * mm, u if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; start = vma->vm_start; - if (expand_stack(vma, addr)) + find_vma_prev(mm, addr, &prev_vma); + if (expand_stack(vma, addr, prev_vma)) return NULL; if (vma->vm_flags & VM_LOCKED) make_pages_present(addr, start);