]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Jun 2019 14:33:15 +0000 (16:33 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Jun 2019 14:33:15 +0000 (16:33 +0200)
added patches:
coredump-fix-race-condition-between-collapse_huge_page-and-core-dumping.patch

queue-4.19/coredump-fix-race-condition-between-collapse_huge_page-and-core-dumping.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/coredump-fix-race-condition-between-collapse_huge_page-and-core-dumping.patch b/queue-4.19/coredump-fix-race-condition-between-collapse_huge_page-and-core-dumping.patch
new file mode 100644 (file)
index 0000000..ce112f2
--- /dev/null
@@ -0,0 +1,97 @@
+From 59ea6d06cfa9247b586a695c21f94afa7183af74 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Thu, 13 Jun 2019 15:56:11 -0700
+Subject: coredump: fix race condition between collapse_huge_page() and core dumping
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 59ea6d06cfa9247b586a695c21f94afa7183af74 upstream.
+
+When fixing the race conditions between the coredump and the mmap_sem
+holders outside the context of the process, we focused on
+mmget_not_zero()/get_task_mm() callers in 04f5866e41fb70 ("coredump: fix
+race condition between mmget_not_zero()/get_task_mm() and core
+dumping"), but those aren't the only cases where the mmap_sem can be
+taken outside of the context of the process as Michal Hocko noticed
+while backporting that commit to older -stable kernels.
+
+If mmgrab() is called in the context of the process, but then the
+mm_count reference is transferred outside the context of the process,
+that can also be a problem if the mmap_sem has to be taken for writing
+through that mm_count reference.
+
+khugepaged registration calls mmgrab() in the context of the process,
+but the mmap_sem for writing is taken later in the context of the
+khugepaged kernel thread.
+
+collapse_huge_page() after taking the mmap_sem for writing doesn't
+modify any vma, so it's not obvious that it could cause a problem to the
+coredump, but it happens to modify the pmd in a way that breaks an
+invariant that pmd_trans_huge_lock() relies upon.  collapse_huge_page()
+needs the mmap_sem for writing just to block concurrent page faults that
+call pmd_trans_huge_lock().
+
+Specifically the invariant that "!pmd_trans_huge()" cannot become a
+"pmd_trans_huge()" doesn't hold while collapse_huge_page() runs.
+
+The coredump will call __get_user_pages() without mmap_sem for reading,
+which eventually can invoke a lockless page fault which will need a
+functional pmd_trans_huge_lock().
+
+So collapse_huge_page() needs to use mmget_still_valid() to check it's
+not running concurrently with the coredump...  as long as the coredump
+can invoke page faults without holding the mmap_sem for reading.
+
+This has "Fixes: khugepaged" to facilitate backporting, but in my view
+it's more a bug in the coredump code that will eventually have to be
+rewritten to stop invoking page faults without the mmap_sem for reading.
+So the long term plan is still to drop all mmget_still_valid().
+
+Link: http://lkml.kernel.org/r/20190607161558.32104-1-aarcange@redhat.com
+Fixes: ba76149f47d8 ("thp: khugepaged")
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Jason Gunthorpe <jgg@mellanox.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sched/mm.h |    4 ++++
+ mm/khugepaged.c          |    3 +++
+ 2 files changed, 7 insertions(+)
+
+--- a/include/linux/sched/mm.h
++++ b/include/linux/sched/mm.h
+@@ -54,6 +54,10 @@ static inline void mmdrop(struct mm_stru
+  * followed by taking the mmap_sem for writing before modifying the
+  * vmas or anything the coredump pretends not to change from under it.
+  *
++ * It also has to be called when mmgrab() is used in the context of
++ * the process, but then the mm_count refcount is transferred outside
++ * the context of the process to run down_write() on that pinned mm.
++ *
+  * NOTE: find_extend_vma() called from GUP context is the only place
+  * that can modify the "mm" (notably the vm_start/end) under mmap_sem
+  * for reading and outside the context of the process, so it is also
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1005,6 +1005,9 @@ static void collapse_huge_page(struct mm
+        * handled by the anon_vma lock + PG_lock.
+        */
+       down_write(&mm->mmap_sem);
++      result = SCAN_ANY_PROCESS;
++      if (!mmget_still_valid(mm))
++              goto out;
+       result = hugepage_vma_revalidate(mm, address, &vma);
+       if (result)
+               goto out;
index cdcd3d5fbdc65ac1d56160443224480d10be012f..038e2924dffb573707820e6ccf67478818832b0b 100644 (file)
@@ -58,3 +58,4 @@ scsi-scsi_dh_alua-fix-possible-null-ptr-deref.patch
 scsi-libsas-delete-sas-port-if-expander-discover-fai.patch
 mlxsw-spectrum-prevent-force-of-56g.patch
 ocfs2-fix-error-path-kobject-memory-leak.patch
+coredump-fix-race-condition-between-collapse_huge_page-and-core-dumping.patch