From: Greg Kroah-Hartman Date: Wed, 19 Nov 2014 18:31:36 +0000 (-0800) Subject: 3.10-stable patches X-Git-Tag: v3.10.61~6 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5bc092f17d231af775124e62b829c447c5706394;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch x86-finish-user-fault-error-path-with-fatal-signal.patch --- diff --git a/queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch b/queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch new file mode 100644 index 00000000000..013561f0a67 --- /dev/null +++ b/queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch @@ -0,0 +1,217 @@ +From 519e52473ebe9db5cdef44670d5a97f1fd53d721 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 12 Sep 2013 15:13:42 -0700 +Subject: mm: memcg: enable memcg OOM killer only for user faults + +From: Johannes Weiner + +commit 519e52473ebe9db5cdef44670d5a97f1fd53d721 upstream. + +System calls and kernel faults (uaccess, gup) can handle an out of memory +situation gracefully and just return -ENOMEM. + +Enable the memcg OOM killer only for user faults, where it's really the +only option available. + +Signed-off-by: Johannes Weiner +Acked-by: Michal Hocko +Cc: David Rientjes +Cc: KAMEZAWA Hiroyuki +Cc: azurIt +Cc: KOSAKI Motohiro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Cong Wang +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/memcontrol.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ + include/linux/sched.h | 3 +++ + mm/filemap.c | 11 ++++++++++- + mm/memcontrol.c | 2 +- + mm/memory.c | 40 ++++++++++++++++++++++++++++++---------- + 5 files changed, 88 insertions(+), 12 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -124,6 +124,37 @@ extern void mem_cgroup_print_oom_info(st + extern void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage); + ++/** ++ * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task ++ * @new: true to enable, false to disable ++ * ++ * Toggle whether a failed memcg charge should invoke the OOM killer ++ * or just return -ENOMEM. Returns the previous toggle state. ++ */ ++static inline bool mem_cgroup_toggle_oom(bool new) ++{ ++ bool old; ++ ++ old = current->memcg_oom.may_oom; ++ current->memcg_oom.may_oom = new; ++ ++ return old; ++} ++ ++static inline void mem_cgroup_enable_oom(void) ++{ ++ bool old = mem_cgroup_toggle_oom(true); ++ ++ WARN_ON(old == true); ++} ++ ++static inline void mem_cgroup_disable_oom(void) ++{ ++ bool old = mem_cgroup_toggle_oom(false); ++ ++ WARN_ON(old == false); ++} ++ + #ifdef CONFIG_MEMCG_SWAP + extern int do_swap_account; + #endif +@@ -347,6 +378,19 @@ static inline void mem_cgroup_end_update + { + } + ++static inline bool mem_cgroup_toggle_oom(bool new) ++{ ++ return false; ++} ++ ++static inline void mem_cgroup_enable_oom(void) ++{ ++} ++ ++static inline void mem_cgroup_disable_oom(void) ++{ ++} ++ + static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) + { +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1411,6 +1411,9 @@ struct task_struct { + unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ + } memcg_batch; + unsigned int memcg_kmem_skip_account; ++ struct memcg_oom_info { ++ unsigned int may_oom:1; ++ } memcg_oom; + #endif + #ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1614,6 +1614,7 @@ int filemap_fault(struct vm_area_struct + struct inode *inode = mapping->host; + pgoff_t offset = vmf->pgoff; + struct page *page; ++ bool memcg_oom; + pgoff_t size; + int ret = 0; + +@@ -1622,7 +1623,11 @@ int filemap_fault(struct vm_area_struct + return VM_FAULT_SIGBUS; + + /* +- * Do we have something in the page cache already? ++ * Do we have something in the page cache already? Either ++ * way, try readahead, but disable the memcg OOM killer for it ++ * as readahead is optional and no errors are propagated up ++ * the fault stack. The OOM killer is enabled while trying to ++ * instantiate the faulting page individually below. + */ + page = find_get_page(mapping, offset); + if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { +@@ -1630,10 +1635,14 @@ int filemap_fault(struct vm_area_struct + * We found the page, so try async readahead before + * waiting for the lock. + */ ++ memcg_oom = mem_cgroup_toggle_oom(false); + do_async_mmap_readahead(vma, ra, file, page, offset); ++ mem_cgroup_toggle_oom(memcg_oom); + } else if (!page) { + /* No page in the page cache at all */ ++ memcg_oom = mem_cgroup_toggle_oom(false); + do_sync_mmap_readahead(vma, ra, file, offset); ++ mem_cgroup_toggle_oom(memcg_oom); + count_vm_event(PGMAJFAULT); + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + ret = VM_FAULT_MAJOR; +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2613,7 +2613,7 @@ static int mem_cgroup_do_charge(struct m + return CHARGE_RETRY; + + /* If we don't need to call oom-killer at el, return immediately */ +- if (!oom_check) ++ if (!oom_check || !current->memcg_oom.may_oom) + return CHARGE_NOMEM; + /* check OOM */ + if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3754,22 +3754,14 @@ unlock: + /* + * By the time we get here, we already hold the mm semaphore + */ +-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, +- unsigned long address, unsigned int flags) ++static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags) + { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +- __set_current_state(TASK_RUNNING); +- +- count_vm_event(PGFAULT); +- mem_cgroup_count_vm_event(mm, PGFAULT); +- +- /* do counter updates before entering really critical section. */ +- check_sync_rss_stat(current); +- + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, flags); + +@@ -3850,6 +3842,34 @@ retry: + return handle_pte_fault(mm, vma, address, pte, pmd, flags); + } + ++int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags) ++{ ++ int ret; ++ ++ __set_current_state(TASK_RUNNING); ++ ++ count_vm_event(PGFAULT); ++ mem_cgroup_count_vm_event(mm, PGFAULT); ++ ++ /* do counter updates before entering really critical section. */ ++ check_sync_rss_stat(current); ++ ++ /* ++ * Enable the memcg OOM handling for faults triggered in user ++ * space. Kernel faults are handled more gracefully. ++ */ ++ if (flags & FAULT_FLAG_USER) ++ mem_cgroup_enable_oom(); ++ ++ ret = __handle_mm_fault(mm, vma, address, flags); ++ ++ if (flags & FAULT_FLAG_USER) ++ mem_cgroup_disable_oom(); ++ ++ return ret; ++} ++ + #ifndef __PAGETABLE_PUD_FOLDED + /* + * Allocate page upper directory. diff --git a/queue-3.10/series b/queue-3.10/series index b532774f346..2bda20e3f48 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -63,3 +63,5 @@ mm-invoke-oom-killer-from-remaining-unconverted-page-fault-handlers.patch arch-mm-remove-obsolete-init-oom-protection.patch arch-mm-do-not-invoke-oom-killer-on-kernel-fault-oom.patch arch-mm-pass-userspace-fault-flag-to-generic-fault-handler.patch +x86-finish-user-fault-error-path-with-fatal-signal.patch +mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch diff --git a/queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch b/queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch new file mode 100644 index 00000000000..e0d7e53bab3 --- /dev/null +++ b/queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch @@ -0,0 +1,107 @@ +From 3a13c4d761b4b979ba8767f42345fed3274991b0 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 12 Sep 2013 15:13:40 -0700 +Subject: x86: finish user fault error path with fatal signal + +From: Johannes Weiner + +commit 3a13c4d761b4b979ba8767f42345fed3274991b0 upstream. + +The x86 fault handler bails in the middle of error handling when the +task has a fatal signal pending. For a subsequent patch this is a +problem in OOM situations because it relies on pagefault_out_of_memory() +being called even when the task has been killed, to perform proper +per-task OOM state unwinding. + +Shortcutting the fault like this is a rather minor optimization that +saves a few instructions in rare cases. Just remove it for +user-triggered faults. + +Use the opportunity to split the fault retry handling from actual fault +errors and add locking documentation that reads suprisingly similar to +ARM's. + +Signed-off-by: Johannes Weiner +Reviewed-by: Michal Hocko +Acked-by: KOSAKI Motohiro +Cc: David Rientjes +Cc: KAMEZAWA Hiroyuki +Cc: azurIt +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Cong Wang +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/fault.c | 35 +++++++++++++++++------------------ + 1 file changed, 17 insertions(+), 18 deletions(-) + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned + force_sig_info_fault(SIGBUS, code, address, tsk, fault); + } + +-static noinline int ++static noinline void + mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) + { +- /* +- * Pagefault was interrupted by SIGKILL. We have no reason to +- * continue pagefault. +- */ +- if (fatal_signal_pending(current)) { +- if (!(fault & VM_FAULT_RETRY)) +- up_read(¤t->mm->mmap_sem); +- if (!(error_code & PF_USER)) +- no_context(regs, error_code, address, 0, 0); +- return 1; ++ if (fatal_signal_pending(current) && !(error_code & PF_USER)) { ++ up_read(¤t->mm->mmap_sem); ++ no_context(regs, error_code, address, 0, 0); ++ return; + } +- if (!(fault & VM_FAULT_ERROR)) +- return 0; + + if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ +@@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, uns + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address, + SIGSEGV, SEGV_MAPERR); +- return 1; ++ return; + } + + up_read(¤t->mm->mmap_sem); +@@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, uns + else + BUG(); + } +- return 1; + } + + static int spurious_fault_check(unsigned long error_code, pte_t *pte) +@@ -1193,9 +1184,17 @@ good_area: + */ + fault = handle_mm_fault(mm, vma, address, flags); + +- if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { +- if (mm_fault_error(regs, error_code, address, fault)) +- return; ++ /* ++ * If we need to retry but a fatal signal is pending, handle the ++ * signal first. We do not need to release the mmap_sem because it ++ * would already be released in __lock_page_or_retry in mm/filemap.c. ++ */ ++ if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) ++ return; ++ ++ if (unlikely(fault & VM_FAULT_ERROR)) { ++ mm_fault_error(regs, error_code, address, fault); ++ return; + } + + /*