]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Nov 2014 18:31:36 +0000 (10:31 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Nov 2014 18:31:36 +0000 (10:31 -0800)
added patches:
mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch
x86-finish-user-fault-error-path-with-fatal-signal.patch

queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch [new file with mode: 0644]
queue-3.10/series
queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch [new file with mode: 0644]

diff --git a/queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch b/queue-3.10/mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch
new file mode 100644 (file)
index 0000000..013561f
--- /dev/null
@@ -0,0 +1,217 @@
+From 519e52473ebe9db5cdef44670d5a97f1fd53d721 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 12 Sep 2013 15:13:42 -0700
+Subject: mm: memcg: enable memcg OOM killer only for user faults
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 519e52473ebe9db5cdef44670d5a97f1fd53d721 upstream.
+
+System calls and kernel faults (uaccess, gup) can handle an out of memory
+situation gracefully and just return -ENOMEM.
+
+Enable the memcg OOM killer only for user faults, where it's really the
+only option available.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Michal Hocko <mhocko@suse.cz>
+Cc: David Rientjes <rientjes@google.com>
+Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: azurIt <azurit@pobox.sk>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memcontrol.h |   44 ++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/sched.h      |    3 +++
+ mm/filemap.c               |   11 ++++++++++-
+ mm/memcontrol.c            |    2 +-
+ mm/memory.c                |   40 ++++++++++++++++++++++++++++++----------
+ 5 files changed, 88 insertions(+), 12 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -124,6 +124,37 @@ extern void mem_cgroup_print_oom_info(st
+ extern void mem_cgroup_replace_page_cache(struct page *oldpage,
+                                       struct page *newpage);
++/**
++ * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
++ * @new: true to enable, false to disable
++ *
++ * Toggle whether a failed memcg charge should invoke the OOM killer
++ * or just return -ENOMEM.  Returns the previous toggle state.
++ */
++static inline bool mem_cgroup_toggle_oom(bool new)
++{
++      bool old;
++
++      old = current->memcg_oom.may_oom;
++      current->memcg_oom.may_oom = new;
++
++      return old;
++}
++
++static inline void mem_cgroup_enable_oom(void)
++{
++      bool old = mem_cgroup_toggle_oom(true);
++
++      WARN_ON(old == true);
++}
++
++static inline void mem_cgroup_disable_oom(void)
++{
++      bool old = mem_cgroup_toggle_oom(false);
++
++      WARN_ON(old == false);
++}
++
+ #ifdef CONFIG_MEMCG_SWAP
+ extern int do_swap_account;
+ #endif
+@@ -347,6 +378,19 @@ static inline void mem_cgroup_end_update
+ {
+ }
++static inline bool mem_cgroup_toggle_oom(bool new)
++{
++      return false;
++}
++
++static inline void mem_cgroup_enable_oom(void)
++{
++}
++
++static inline void mem_cgroup_disable_oom(void)
++{
++}
++
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+                                           enum mem_cgroup_page_stat_item idx)
+ {
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1411,6 +1411,9 @@ struct task_struct {
+               unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
+       } memcg_batch;
+       unsigned int memcg_kmem_skip_account;
++      struct memcg_oom_info {
++              unsigned int may_oom:1;
++      } memcg_oom;
+ #endif
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+       atomic_t ptrace_bp_refcnt;
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1614,6 +1614,7 @@ int filemap_fault(struct vm_area_struct
+       struct inode *inode = mapping->host;
+       pgoff_t offset = vmf->pgoff;
+       struct page *page;
++      bool memcg_oom;
+       pgoff_t size;
+       int ret = 0;
+@@ -1622,7 +1623,11 @@ int filemap_fault(struct vm_area_struct
+               return VM_FAULT_SIGBUS;
+       /*
+-       * Do we have something in the page cache already?
++       * Do we have something in the page cache already?  Either
++       * way, try readahead, but disable the memcg OOM killer for it
++       * as readahead is optional and no errors are propagated up
++       * the fault stack.  The OOM killer is enabled while trying to
++       * instantiate the faulting page individually below.
+        */
+       page = find_get_page(mapping, offset);
+       if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
+@@ -1630,10 +1635,14 @@ int filemap_fault(struct vm_area_struct
+                * We found the page, so try async readahead before
+                * waiting for the lock.
+                */
++              memcg_oom = mem_cgroup_toggle_oom(false);
+               do_async_mmap_readahead(vma, ra, file, page, offset);
++              mem_cgroup_toggle_oom(memcg_oom);
+       } else if (!page) {
+               /* No page in the page cache at all */
++              memcg_oom = mem_cgroup_toggle_oom(false);
+               do_sync_mmap_readahead(vma, ra, file, offset);
++              mem_cgroup_toggle_oom(memcg_oom);
+               count_vm_event(PGMAJFAULT);
+               mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+               ret = VM_FAULT_MAJOR;
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2613,7 +2613,7 @@ static int mem_cgroup_do_charge(struct m
+               return CHARGE_RETRY;
+       /* If we don't need to call oom-killer at el, return immediately */
+-      if (!oom_check)
++      if (!oom_check || !current->memcg_oom.may_oom)
+               return CHARGE_NOMEM;
+       /* check OOM */
+       if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3754,22 +3754,14 @@ unlock:
+ /*
+  * By the time we get here, we already hold the mm semaphore
+  */
+-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+-              unsigned long address, unsigned int flags)
++static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++                           unsigned long address, unsigned int flags)
+ {
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+-      __set_current_state(TASK_RUNNING);
+-
+-      count_vm_event(PGFAULT);
+-      mem_cgroup_count_vm_event(mm, PGFAULT);
+-
+-      /* do counter updates before entering really critical section. */
+-      check_sync_rss_stat(current);
+-
+       if (unlikely(is_vm_hugetlb_page(vma)))
+               return hugetlb_fault(mm, vma, address, flags);
+@@ -3850,6 +3842,34 @@ retry:
+       return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+ }
++int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++                  unsigned long address, unsigned int flags)
++{
++      int ret;
++
++      __set_current_state(TASK_RUNNING);
++
++      count_vm_event(PGFAULT);
++      mem_cgroup_count_vm_event(mm, PGFAULT);
++
++      /* do counter updates before entering really critical section. */
++      check_sync_rss_stat(current);
++
++      /*
++       * Enable the memcg OOM handling for faults triggered in user
++       * space.  Kernel faults are handled more gracefully.
++       */
++      if (flags & FAULT_FLAG_USER)
++              mem_cgroup_enable_oom();
++
++      ret = __handle_mm_fault(mm, vma, address, flags);
++
++      if (flags & FAULT_FLAG_USER)
++              mem_cgroup_disable_oom();
++
++      return ret;
++}
++
+ #ifndef __PAGETABLE_PUD_FOLDED
+ /*
+  * Allocate page upper directory.
index b532774f34676267aa30130cf3bde99cd641afb7..2bda20e3f483a4bdfa3444ffbbe6e46d782133be 100644 (file)
@@ -63,3 +63,5 @@ mm-invoke-oom-killer-from-remaining-unconverted-page-fault-handlers.patch
 arch-mm-remove-obsolete-init-oom-protection.patch
 arch-mm-do-not-invoke-oom-killer-on-kernel-fault-oom.patch
 arch-mm-pass-userspace-fault-flag-to-generic-fault-handler.patch
+x86-finish-user-fault-error-path-with-fatal-signal.patch
+mm-memcg-enable-memcg-oom-killer-only-for-user-faults.patch
diff --git a/queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch b/queue-3.10/x86-finish-user-fault-error-path-with-fatal-signal.patch
new file mode 100644 (file)
index 0000000..e0d7e53
--- /dev/null
@@ -0,0 +1,107 @@
+From 3a13c4d761b4b979ba8767f42345fed3274991b0 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 12 Sep 2013 15:13:40 -0700
+Subject: x86: finish user fault error path with fatal signal
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 3a13c4d761b4b979ba8767f42345fed3274991b0 upstream.
+
+The x86 fault handler bails in the middle of error handling when the
+task has a fatal signal pending.  For a subsequent patch this is a
+problem in OOM situations because it relies on pagefault_out_of_memory()
+being called even when the task has been killed, to perform proper
+per-task OOM state unwinding.
+
+Shortcutting the fault like this is a rather minor optimization that
+saves a few instructions in rare cases.  Just remove it for
+user-triggered faults.
+
+Use the opportunity to split the fault retry handling from actual fault
+errors and add locking documentation that reads suprisingly similar to
+ARM's.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: azurIt <azurit@pobox.sk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c |   35 +++++++++++++++++------------------
+ 1 file changed, 17 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned
+       force_sig_info_fault(SIGBUS, code, address, tsk, fault);
+ }
+-static noinline int
++static noinline void
+ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+              unsigned long address, unsigned int fault)
+ {
+-      /*
+-       * Pagefault was interrupted by SIGKILL. We have no reason to
+-       * continue pagefault.
+-       */
+-      if (fatal_signal_pending(current)) {
+-              if (!(fault & VM_FAULT_RETRY))
+-                      up_read(&current->mm->mmap_sem);
+-              if (!(error_code & PF_USER))
+-                      no_context(regs, error_code, address, 0, 0);
+-              return 1;
++      if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
++              up_read(&current->mm->mmap_sem);
++              no_context(regs, error_code, address, 0, 0);
++              return;
+       }
+-      if (!(fault & VM_FAULT_ERROR))
+-              return 0;
+       if (fault & VM_FAULT_OOM) {
+               /* Kernel mode? Handle exceptions or die: */
+@@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, uns
+                       up_read(&current->mm->mmap_sem);
+                       no_context(regs, error_code, address,
+                                  SIGSEGV, SEGV_MAPERR);
+-                      return 1;
++                      return;
+               }
+               up_read(&current->mm->mmap_sem);
+@@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, uns
+               else
+                       BUG();
+       }
+-      return 1;
+ }
+ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+@@ -1193,9 +1184,17 @@ good_area:
+        */
+       fault = handle_mm_fault(mm, vma, address, flags);
+-      if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+-              if (mm_fault_error(regs, error_code, address, fault))
+-                      return;
++      /*
++       * If we need to retry but a fatal signal is pending, handle the
++       * signal first. We do not need to release the mmap_sem because it
++       * would already be released in __lock_page_or_retry in mm/filemap.c.
++       */
++      if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
++              return;
++
++      if (unlikely(fault & VM_FAULT_ERROR)) {
++              mm_fault_error(regs, error_code, address, fault);
++              return;
+       }
+       /*