git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob

   1 From 3ba08129e38437561df44c36b7ea9081185d5333 Mon Sep 17 00:00:00 2001
   2 From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
   3 Date: Wed, 4 Jun 2014 16:11:02 -0700
   4 Subject: mm/memory-failure.c: support use of a dedicated thread to handle SIGBUS(BUS_MCEERR_AO)
   5
   6 From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
   7
   8 commit 3ba08129e38437561df44c36b7ea9081185d5333 upstream.
   9
  10 Currently memory error handler handles action optional errors in the
  11 deferred manner by default.  And if a recovery aware application wants
  12 to handle it immediately, it can do it by setting PF_MCE_EARLY flag.
  13 However, such signal can be sent only to the main thread, so it's
  14 problematic if the application wants to have a dedicated thread to
  15 handler such signals.
  16
  17 So this patch adds dedicated thread support to memory error handler.  We
  18 have PF_MCE_EARLY flags for each thread separately, so with this patch
  19 AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main
  20 thread.  If you want to implement a dedicated thread, you call prctl()
  21 to set PF_MCE_EARLY on the thread.
  22
  23 Memory error handler collects processes to be killed, so this patch lets
  24 it check PF_MCE_EARLY flag on each thread in the collecting routines.
  25
  26 No behavioral change for all non-early kill cases.
  27
  28 Tony said:
  29
  30 : The old behavior was crazy - someone with a multithreaded process might
  31 : well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then
  32 : that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if
  33 : that thread wasn't the main thread for the process.
  34
  35 [akpm@linux-foundation.org: coding-style fixes]
  36 Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
  37 Reviewed-by: Tony Luck <tony.luck@intel.com>
  38 Cc: Kamil Iskra <iskra@mcs.anl.gov>
  39 Cc: Andi Kleen <andi@firstfloor.org>
  40 Cc: Borislav Petkov <bp@suse.de>
  41 Cc: Chen Gong <gong.chen@linux.jf.intel.com>
  42 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  43 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  44 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  45
  46 ---
  47  Documentation/vm/hwpoison.txt |    5 +++
  48  mm/memory-failure.c           |   56 ++++++++++++++++++++++++++++++++----------
  49  2 files changed, 48 insertions(+), 13 deletions(-)
  50
  51 --- a/Documentation/vm/hwpoison.txt
  52 +++ b/Documentation/vm/hwpoison.txt
  53 @@ -84,6 +84,11 @@ PR_MCE_KILL
  54                 PR_MCE_KILL_EARLY: Early kill
  55                 PR_MCE_KILL_LATE:  Late kill
  56                 PR_MCE_KILL_DEFAULT: Use system global default
  57 +       Note that if you want to have a dedicated thread which handles
  58 +       the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
  59 +       call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
  60 +       the SIGBUS is sent to the main thread.
  61 +
  62  PR_MCE_KILL_GET
  63         return current mode
  64
  65 --- a/mm/memory-failure.c
  66 +++ b/mm/memory-failure.c
  67 @@ -384,15 +384,44 @@ static void kill_procs(struct list_head
  68         }
  69  }
  70
  71 -static int task_early_kill(struct task_struct *tsk, int force_early)
  72 +/*
  73 + * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)
  74 + * on behalf of the thread group. Return task_struct of the (first found)
  75 + * dedicated thread if found, and return NULL otherwise.
  76 + *
  77 + * We already hold read_lock(&tasklist_lock) in the caller, so we don't
  78 + * have to call rcu_read_lock/unlock() in this function.
  79 + */
  80 +static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
  81  {
  82 +       struct task_struct *t;
  83 +
  84 +       for_each_thread(tsk, t)
  85 +               if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY))
  86 +                       return t;
  87 +       return NULL;
  88 +}
  89 +
  90 +/*
  91 + * Determine whether a given process is "early kill" process which expects
  92 + * to be signaled when some page under the process is hwpoisoned.
  93 + * Return task_struct of the dedicated thread (main thread unless explicitly
  94 + * specified) if the process is "early kill," and otherwise returns NULL.
  95 + */
  96 +static struct task_struct *task_early_kill(struct task_struct *tsk,
  97 +                                          int force_early)
  98 +{
  99 +       struct task_struct *t;
 100         if (!tsk->mm)
 101 -               return 0;
 102 +               return NULL;
 103         if (force_early)
 104 -               return 1;
 105 -       if (tsk->flags & PF_MCE_PROCESS)
 106 -               return !!(tsk->flags & PF_MCE_EARLY);
 107 -       return sysctl_memory_failure_early_kill;
 108 +               return tsk;
 109 +       t = find_early_kill_thread(tsk);
 110 +       if (t)
 111 +               return t;
 112 +       if (sysctl_memory_failure_early_kill)
 113 +               return tsk;
 114 +       return NULL;
 115  }
 116
 117  /*
 118 @@ -414,16 +443,17 @@ static void collect_procs_anon(struct pa
 119         read_lock(&tasklist_lock);
 120         for_each_process (tsk) {
 121                 struct anon_vma_chain *vmac;
 122 +               struct task_struct *t = task_early_kill(tsk, force_early);
 123
 124 -               if (!task_early_kill(tsk, force_early))
 125 +               if (!t)
 126                         continue;
 127                 anon_vma_interval_tree_foreach(vmac, &av->rb_root,
 128                                                pgoff, pgoff) {
 129                         vma = vmac->vma;
 130                         if (!page_mapped_in_vma(page, vma))
 131                                 continue;
 132 -                       if (vma->vm_mm == tsk->mm)
 133 -                               add_to_kill(tsk, page, vma, to_kill, tkc);
 134 +                       if (vma->vm_mm == t->mm)
 135 +                               add_to_kill(t, page, vma, to_kill, tkc);
 136                 }
 137         }
 138         read_unlock(&tasklist_lock);
 139 @@ -444,10 +474,10 @@ static void collect_procs_file(struct pa
 140         read_lock(&tasklist_lock);
 141         for_each_process(tsk) {
 142                 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 143 +               struct task_struct *t = task_early_kill(tsk, force_early);
 144
 145 -               if (!task_early_kill(tsk, force_early))
 146 +               if (!t)
 147                         continue;
 148 -
 149                 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
 150                                       pgoff) {
 151                         /*
 152 @@ -457,8 +487,8 @@ static void collect_procs_file(struct pa
 153                          * Assume applications who requested early kill want
 154                          * to be informed of all such data corruptions.
 155                          */
 156 -                       if (vma->vm_mm == tsk->mm)
 157 -                               add_to_kill(tsk, page, vma, to_kill, tkc);
 158 +                       if (vma->vm_mm == t->mm)
 159 +                               add_to_kill(t, page, vma, to_kill, tkc);
 160                 }
 161         }
 162         read_unlock(&tasklist_lock);