]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
drop coredump patch from 4.4.y and 4.9.y for now.
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2019 15:35:38 +0000 (17:35 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Jun 2019 15:35:38 +0000 (17:35 +0200)
queue-4.4/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch [deleted file]
queue-4.4/series
queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch [deleted file]
queue-4.9/series

diff --git a/queue-4.4/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch b/queue-4.4/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch
deleted file mode 100644 (file)
index f638b9e..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-From foo@baz Tue 04 Jun 2019 04:46:27 PM CEST
-From: Andrea Arcangeli <aarcange@redhat.com>
-Date: Thu, 18 Apr 2019 17:50:52 -0700
-Subject: coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping
-
-From: Andrea Arcangeli <aarcange@redhat.com>
-
-commit 04f5866e41fb70690e28397487d8bd8eea7d712a upstream.
-
-The core dumping code has always run without holding the mmap_sem for
-writing, despite that is the only way to ensure that the entire vma
-layout will not change from under it.  Only using some signal
-serialization on the processes belonging to the mm is not nearly enough.
-This was pointed out earlier.  For example in Hugh's post from Jul 2017:
-
-  https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils
-
-  "Not strictly relevant here, but a related note: I was very surprised
-   to discover, only quite recently, how handle_mm_fault() may be called
-   without down_read(mmap_sem) - when core dumping. That seems a
-   misguided optimization to me, which would also be nice to correct"
-
-In particular because the growsdown and growsup can move the
-vm_start/vm_end the various loops the core dump does around the vma will
-not be consistent if page faults can happen concurrently.
-
-Pretty much all users calling mmget_not_zero()/get_task_mm() and then
-taking the mmap_sem had the potential to introduce unexpected side
-effects in the core dumping code.
-
-Adding mmap_sem for writing around the ->core_dump invocation is a
-viable long term fix, but it requires removing all copy user and page
-faults and to replace them with get_dump_page() for all binary formats
-which is not suitable as a short term fix.
-
-For the time being this solution manually covers the places that can
-confuse the core dump either by altering the vma layout or the vma flags
-while it runs.  Once ->core_dump runs under mmap_sem for writing the
-function mmget_still_valid() can be dropped.
-
-Allowing mmap_sem protected sections to run in parallel with the
-coredump provides some minor parallelism advantage to the swapoff code
-(which seems to be safe enough by never mangling any vma field and can
-keep doing swapins in parallel to the core dumping) and to some other
-corner case.
-
-In order to facilitate the backporting I added "Fixes: 86039bd3b4e6"
-however the side effect of this same race condition in /proc/pid/mem
-should be reproducible since before 2.6.12-rc2 so I couldn't add any
-other "Fixes:" because there's no hash beyond the git genesis commit.
-
-Because find_extend_vma() is the only location outside of the process
-context that could modify the "mm" structures under mmap_sem for
-reading, by adding the mmget_still_valid() check to it, all other cases
-that take the mmap_sem for reading don't need the new check after
-mmget_not_zero()/get_task_mm().  The expand_stack() in page fault
-context also doesn't need the new check, because all tasks under core
-dumping are frozen.
-
-Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com
-Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
-Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-Reported-by: Jann Horn <jannh@google.com>
-Suggested-by: Oleg Nesterov <oleg@redhat.com>
-Acked-by: Peter Xu <peterx@redhat.com>
-Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
-Reviewed-by: Oleg Nesterov <oleg@redhat.com>
-Reviewed-by: Jann Horn <jannh@google.com>
-Acked-by: Jason Gunthorpe <jgg@mellanox.com>
-Acked-by: Michal Hocko <mhocko@suse.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-[bwh: Backported to 4.4:
- - Drop changes in Infiniband and userfaultfd_event_wait_completion()
- - Adjust filename, context]
-Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- fs/proc/task_mmu.c    |   18 ++++++++++++++++++
- fs/userfaultfd.c      |    7 +++++++
- include/linux/sched.h |   21 +++++++++++++++++++++
- mm/mmap.c             |    7 ++++++-
- 4 files changed, 52 insertions(+), 1 deletion(-)
-
---- a/fs/proc/task_mmu.c
-+++ b/fs/proc/task_mmu.c
-@@ -947,6 +947,24 @@ static ssize_t clear_refs_write(struct f
-                                       continue;
-                               up_read(&mm->mmap_sem);
-                               down_write(&mm->mmap_sem);
-+                              /*
-+                               * Avoid to modify vma->vm_flags
-+                               * without locked ops while the
-+                               * coredump reads the vm_flags.
-+                               */
-+                              if (!mmget_still_valid(mm)) {
-+                                      /*
-+                                       * Silently return "count"
-+                                       * like if get_task_mm()
-+                                       * failed. FIXME: should this
-+                                       * function have returned
-+                                       * -ESRCH if get_task_mm()
-+                                       * failed like if
-+                                       * get_proc_task() fails?
-+                                       */
-+                                      up_write(&mm->mmap_sem);
-+                                      goto out_mm;
-+                              }
-                               for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                                       vma->vm_flags &= ~VM_SOFTDIRTY;
-                                       vma_set_page_prot(vma);
---- a/fs/userfaultfd.c
-+++ b/fs/userfaultfd.c
-@@ -446,6 +446,8 @@ static int userfaultfd_release(struct in
-        * taking the mmap_sem for writing.
-        */
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto skip_mm;
-       prev = NULL;
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
-               cond_resched();
-@@ -468,6 +470,7 @@ static int userfaultfd_release(struct in
-               vma->vm_flags = new_flags;
-               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
-       }
-+skip_mm:
-       up_write(&mm->mmap_sem);
-       mmput(mm);
- wakeup:
-@@ -769,6 +772,8 @@ static int userfaultfd_register(struct u
-               goto out;
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto out_unlock;
-       vma = find_vma_prev(mm, start, &prev);
-       if (!vma)
-               goto out_unlock;
-@@ -914,6 +919,8 @@ static int userfaultfd_unregister(struct
-               goto out;
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto out_unlock;
-       vma = find_vma_prev(mm, start, &prev);
-       if (!vma)
-               goto out_unlock;
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -2625,6 +2625,27 @@ static inline bool mmget_not_zero(struct
-       return atomic_inc_not_zero(&mm->mm_users);
- }
-+/*
-+ * This has to be called after a get_task_mm()/mmget_not_zero()
-+ * followed by taking the mmap_sem for writing before modifying the
-+ * vmas or anything the coredump pretends not to change from under it.
-+ *
-+ * NOTE: find_extend_vma() called from GUP context is the only place
-+ * that can modify the "mm" (notably the vm_start/end) under mmap_sem
-+ * for reading and outside the context of the process, so it is also
-+ * the only case that holds the mmap_sem for reading that must call
-+ * this function. Generally if the mmap_sem is hold for reading
-+ * there's no need of this check after get_task_mm()/mmget_not_zero().
-+ *
-+ * This function can be obsoleted and the check can be removed, after
-+ * the coredump code will hold the mmap_sem for writing before
-+ * invoking the ->core_dump methods.
-+ */
-+static inline bool mmget_still_valid(struct mm_struct *mm)
-+{
-+      return likely(!mm->core_state);
-+}
-+
- /* mmput gets rid of the mappings and all user-space */
- extern void mmput(struct mm_struct *);
- /* Grab a reference to a task's mm, if it is not already going away */
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -42,6 +42,7 @@
- #include <linux/memory.h>
- #include <linux/printk.h>
- #include <linux/userfaultfd_k.h>
-+#include <linux/sched.h>
- #include <asm/uaccess.h>
- #include <asm/cacheflush.h>
-@@ -2398,7 +2399,8 @@ find_extend_vma(struct mm_struct *mm, un
-       vma = find_vma_prev(mm, addr, &prev);
-       if (vma && (vma->vm_start <= addr))
-               return vma;
--      if (!prev || expand_stack(prev, addr))
-+      /* don't alter vm_end if the coredump is running */
-+      if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
-               return NULL;
-       if (prev->vm_flags & VM_LOCKED)
-               populate_vma_page_range(prev, addr, prev->vm_end, NULL);
-@@ -2424,6 +2426,9 @@ find_extend_vma(struct mm_struct *mm, un
-               return vma;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               return NULL;
-+      /* don't alter vm_start if the coredump is running */
-+      if (!mmget_still_valid(mm))
-+              return NULL;
-       start = vma->vm_start;
-       if (expand_stack(vma, addr))
-               return NULL;
index 63c2b0282ffba4b8112fd2329f41b494b5263e88..eccedbc46eaae00b7156ba0bba4538df7053e790 100644 (file)
@@ -223,5 +223,4 @@ brcmfmac-fix-incorrect-event-channel-deduction.patch
 brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch
 brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch
 userfaultfd-don-t-pin-the-user-memory-in-userfaultfd_file_create.patch
-coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch
 revert-x86-build-move-_etext-to-actual-end-of-.text.patch
diff --git a/queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch b/queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch
deleted file mode 100644 (file)
index 3c8aaf2..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST
-From: Andrea Arcangeli <aarcange@redhat.com>
-Date: Thu, 18 Apr 2019 17:50:52 -0700
-Subject: coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping
-
-From: Andrea Arcangeli <aarcange@redhat.com>
-
-commit 04f5866e41fb70690e28397487d8bd8eea7d712a upstream.
-
-The core dumping code has always run without holding the mmap_sem for
-writing, despite that is the only way to ensure that the entire vma
-layout will not change from under it.  Only using some signal
-serialization on the processes belonging to the mm is not nearly enough.
-This was pointed out earlier.  For example in Hugh's post from Jul 2017:
-
-  https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils
-
-  "Not strictly relevant here, but a related note: I was very surprised
-   to discover, only quite recently, how handle_mm_fault() may be called
-   without down_read(mmap_sem) - when core dumping. That seems a
-   misguided optimization to me, which would also be nice to correct"
-
-In particular because the growsdown and growsup can move the
-vm_start/vm_end the various loops the core dump does around the vma will
-not be consistent if page faults can happen concurrently.
-
-Pretty much all users calling mmget_not_zero()/get_task_mm() and then
-taking the mmap_sem had the potential to introduce unexpected side
-effects in the core dumping code.
-
-Adding mmap_sem for writing around the ->core_dump invocation is a
-viable long term fix, but it requires removing all copy user and page
-faults and to replace them with get_dump_page() for all binary formats
-which is not suitable as a short term fix.
-
-For the time being this solution manually covers the places that can
-confuse the core dump either by altering the vma layout or the vma flags
-while it runs.  Once ->core_dump runs under mmap_sem for writing the
-function mmget_still_valid() can be dropped.
-
-Allowing mmap_sem protected sections to run in parallel with the
-coredump provides some minor parallelism advantage to the swapoff code
-(which seems to be safe enough by never mangling any vma field and can
-keep doing swapins in parallel to the core dumping) and to some other
-corner case.
-
-In order to facilitate the backporting I added "Fixes: 86039bd3b4e6"
-however the side effect of this same race condition in /proc/pid/mem
-should be reproducible since before 2.6.12-rc2 so I couldn't add any
-other "Fixes:" because there's no hash beyond the git genesis commit.
-
-Because find_extend_vma() is the only location outside of the process
-context that could modify the "mm" structures under mmap_sem for
-reading, by adding the mmget_still_valid() check to it, all other cases
-that take the mmap_sem for reading don't need the new check after
-mmget_not_zero()/get_task_mm().  The expand_stack() in page fault
-context also doesn't need the new check, because all tasks under core
-dumping are frozen.
-
-Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com
-Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
-Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-Reported-by: Jann Horn <jannh@google.com>
-Suggested-by: Oleg Nesterov <oleg@redhat.com>
-Acked-by: Peter Xu <peterx@redhat.com>
-Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
-Reviewed-by: Oleg Nesterov <oleg@redhat.com>
-Reviewed-by: Jann Horn <jannh@google.com>
-Acked-by: Jason Gunthorpe <jgg@mellanox.com>
-Acked-by: Michal Hocko <mhocko@suse.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-[bwh: Backported to 4.9:
- - Drop changes in Infiniband and userfaultfd_event_wait_completion()
- - Adjust filename, context]
-Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- fs/proc/task_mmu.c    |   18 ++++++++++++++++++
- fs/userfaultfd.c      |    7 +++++++
- include/linux/sched.h |   21 +++++++++++++++++++++
- mm/mmap.c             |    7 ++++++-
- 4 files changed, 52 insertions(+), 1 deletion(-)
-
---- a/fs/proc/task_mmu.c
-+++ b/fs/proc/task_mmu.c
-@@ -1057,6 +1057,24 @@ static ssize_t clear_refs_write(struct f
-                                       count = -EINTR;
-                                       goto out_mm;
-                               }
-+                              /*
-+                               * Avoid to modify vma->vm_flags
-+                               * without locked ops while the
-+                               * coredump reads the vm_flags.
-+                               */
-+                              if (!mmget_still_valid(mm)) {
-+                                      /*
-+                                       * Silently return "count"
-+                                       * like if get_task_mm()
-+                                       * failed. FIXME: should this
-+                                       * function have returned
-+                                       * -ESRCH if get_task_mm()
-+                                       * failed like if
-+                                       * get_proc_task() fails?
-+                                       */
-+                                      up_write(&mm->mmap_sem);
-+                                      goto out_mm;
-+                              }
-                               for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                                       vma->vm_flags &= ~VM_SOFTDIRTY;
-                                       vma_set_page_prot(vma);
---- a/fs/userfaultfd.c
-+++ b/fs/userfaultfd.c
-@@ -479,6 +479,8 @@ static int userfaultfd_release(struct in
-        * taking the mmap_sem for writing.
-        */
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto skip_mm;
-       prev = NULL;
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
-               cond_resched();
-@@ -501,6 +503,7 @@ static int userfaultfd_release(struct in
-               vma->vm_flags = new_flags;
-               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
-       }
-+skip_mm:
-       up_write(&mm->mmap_sem);
-       mmput(mm);
- wakeup:
-@@ -802,6 +805,8 @@ static int userfaultfd_register(struct u
-               goto out;
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto out_unlock;
-       vma = find_vma_prev(mm, start, &prev);
-       if (!vma)
-               goto out_unlock;
-@@ -947,6 +952,8 @@ static int userfaultfd_unregister(struct
-               goto out;
-       down_write(&mm->mmap_sem);
-+      if (!mmget_still_valid(mm))
-+              goto out_unlock;
-       vma = find_vma_prev(mm, start, &prev);
-       if (!vma)
-               goto out_unlock;
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -2938,6 +2938,27 @@ static inline bool mmget_not_zero(struct
-       return atomic_inc_not_zero(&mm->mm_users);
- }
-+/*
-+ * This has to be called after a get_task_mm()/mmget_not_zero()
-+ * followed by taking the mmap_sem for writing before modifying the
-+ * vmas or anything the coredump pretends not to change from under it.
-+ *
-+ * NOTE: find_extend_vma() called from GUP context is the only place
-+ * that can modify the "mm" (notably the vm_start/end) under mmap_sem
-+ * for reading and outside the context of the process, so it is also
-+ * the only case that holds the mmap_sem for reading that must call
-+ * this function. Generally if the mmap_sem is hold for reading
-+ * there's no need of this check after get_task_mm()/mmget_not_zero().
-+ *
-+ * This function can be obsoleted and the check can be removed, after
-+ * the coredump code will hold the mmap_sem for writing before
-+ * invoking the ->core_dump methods.
-+ */
-+static inline bool mmget_still_valid(struct mm_struct *mm)
-+{
-+      return likely(!mm->core_state);
-+}
-+
- /* mmput gets rid of the mappings and all user-space */
- extern void mmput(struct mm_struct *);
- #ifdef CONFIG_MMU
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -44,6 +44,7 @@
- #include <linux/userfaultfd_k.h>
- #include <linux/moduleparam.h>
- #include <linux/pkeys.h>
-+#include <linux/sched.h>
- #include <asm/uaccess.h>
- #include <asm/cacheflush.h>
-@@ -2448,7 +2449,8 @@ find_extend_vma(struct mm_struct *mm, un
-       vma = find_vma_prev(mm, addr, &prev);
-       if (vma && (vma->vm_start <= addr))
-               return vma;
--      if (!prev || expand_stack(prev, addr))
-+      /* don't alter vm_end if the coredump is running */
-+      if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
-               return NULL;
-       if (prev->vm_flags & VM_LOCKED)
-               populate_vma_page_range(prev, addr, prev->vm_end, NULL);
-@@ -2474,6 +2476,9 @@ find_extend_vma(struct mm_struct *mm, un
-               return vma;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               return NULL;
-+      /* don't alter vm_start if the coredump is running */
-+      if (!mmget_still_valid(mm))
-+              return NULL;
-       start = vma->vm_start;
-       if (expand_stack(vma, addr))
-               return NULL;
index eb52a8ca5c193f7b1eca2849fcbee60282564cd4..0e0ac0545150b88172d1c03213640f916144a11f 100644 (file)
@@ -57,6 +57,5 @@ mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch
 mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch
 mm-prevent-get_user_pages-from-overflowing-page-refcount.patch
 mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch
-coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch
 revert-x86-build-move-_etext-to-actual-end-of-.text.patch
 efi-libstub-unify-command-line-param-parsing.patch