From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 17 Sep 2018 20:25:02 +0000 (+0200)
Subject: 4.14-stable patches
X-Git-Tag: v4.18.9~9
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=37fd147d66e19044cfb60fbfbc66858f96dcb8ed;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	mm-get-rid-of-vmacache_flush_all-entirely.patch
---

diff --git a/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch b/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch
new file mode 100644
index 00000000000..4222a375713
--- /dev/null
+++ b/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch
@@ -0,0 +1,160 @@
+From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 12 Sep 2018 23:57:48 -1000
+Subject: mm: get rid of vmacache_flush_all() entirely
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream.
+
+Jann Horn points out that the vmacache_flush_all() function is not only
+potentially expensive, it's buggy too.  It also happens to be entirely
+unnecessary, because the sequence number overflow case can be avoided by
+simply making the sequence number be 64-bit.  That doesn't even grow the
+data structures in question, because the other adjacent fields are
+already 64-bit.
+
+So simplify the whole thing by just making the sequence number overflow
+case go away entirely, which gets rid of all the complications and makes
+the code faster too.  Win-win.
+
+[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics
+  also just goes away entirely with this ]
+
+Reported-by: Jann Horn <jannh@google.com>
+Suggested-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h      |    2 +-
+ include/linux/mm_types_task.h |    2 +-
+ include/linux/vm_event_item.h |    1 -
+ include/linux/vmacache.h      |    5 -----
+ mm/debug.c                    |    4 ++--
+ mm/vmacache.c                 |   38 --------------------------------------
+ 6 files changed, 4 insertions(+), 48 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -354,7 +354,7 @@ struct kioctx_table;
+ struct mm_struct {
+ 	struct vm_area_struct *mmap;		/* list of VMAs */
+ 	struct rb_root mm_rb;
+-	u32 vmacache_seqnum;                   /* per-thread vmacache */
++	u64 vmacache_seqnum;                   /* per-thread vmacache */
+ #ifdef CONFIG_MMU
+ 	unsigned long (*get_unmapped_area) (struct file *filp,
+ 				unsigned long addr, unsigned long len,
+--- a/include/linux/mm_types_task.h
++++ b/include/linux/mm_types_task.h
+@@ -32,7 +32,7 @@
+ #define VMACACHE_MASK (VMACACHE_SIZE - 1)
+ 
+ struct vmacache {
+-	u32 seqnum;
++	u64 seqnum;
+ 	struct vm_area_struct *vmas[VMACACHE_SIZE];
+ };
+ 
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -105,7 +105,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ #ifdef CONFIG_DEBUG_VM_VMACACHE
+ 		VMACACHE_FIND_CALLS,
+ 		VMACACHE_FIND_HITS,
+-		VMACACHE_FULL_FLUSHES,
+ #endif
+ #ifdef CONFIG_SWAP
+ 		SWAP_RA,
+--- a/include/linux/vmacache.h
++++ b/include/linux/vmacache.h
+@@ -16,7 +16,6 @@ static inline void vmacache_flush(struct
+ 	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
+ }
+ 
+-extern void vmacache_flush_all(struct mm_struct *mm);
+ extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+ extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+ 						    unsigned long addr);
+@@ -30,10 +29,6 @@ extern struct vm_area_struct *vmacache_f
+ static inline void vmacache_invalidate(struct mm_struct *mm)
+ {
+ 	mm->vmacache_seqnum++;
+-
+-	/* deal with overflows */
+-	if (unlikely(mm->vmacache_seqnum == 0))
+-		vmacache_flush_all(mm);
+ }
+ 
+ #endif /* __LINUX_VMACACHE_H */
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -100,7 +100,7 @@ EXPORT_SYMBOL(dump_vma);
+ 
+ void dump_mm(const struct mm_struct *mm)
+ {
+-	pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
++	pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n"
+ #ifdef CONFIG_MMU
+ 		"get_unmapped_area %p\n"
+ #endif
+@@ -128,7 +128,7 @@ void dump_mm(const struct mm_struct *mm)
+ 		"tlb_flush_pending %d\n"
+ 		"def_flags: %#lx(%pGv)\n",
+ 
+-		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
++		mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size,
+ #ifdef CONFIG_MMU
+ 		mm->get_unmapped_area,
+ #endif
+--- a/mm/vmacache.c
++++ b/mm/vmacache.c
+@@ -8,44 +8,6 @@
+ #include <linux/vmacache.h>
+ 
+ /*
+- * Flush vma caches for threads that share a given mm.
+- *
+- * The operation is safe because the caller holds the mmap_sem
+- * exclusively and other threads accessing the vma cache will
+- * have mmap_sem held at least for read, so no extra locking
+- * is required to maintain the vma cache.
+- */
+-void vmacache_flush_all(struct mm_struct *mm)
+-{
+-	struct task_struct *g, *p;
+-
+-	count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
+-
+-	/*
+-	 * Single threaded tasks need not iterate the entire
+-	 * list of process. We can avoid the flushing as well
+-	 * since the mm's seqnum was increased and don't have
+-	 * to worry about other threads' seqnum. Current's
+-	 * flush will occur upon the next lookup.
+-	 */
+-	if (atomic_read(&mm->mm_users) == 1)
+-		return;
+-
+-	rcu_read_lock();
+-	for_each_process_thread(g, p) {
+-		/*
+-		 * Only flush the vmacache pointers as the
+-		 * mm seqnum is already set and curr's will
+-		 * be set upon invalidation when the next
+-		 * lookup is done.
+-		 */
+-		if (mm == p->mm)
+-			vmacache_flush(p);
+-	}
+-	rcu_read_unlock();
+-}
+-
+-/*
+  * This task may be accessing a foreign mm via (for example)
+  * get_user_pages()->find_vma().  The vmacache is task-local and this
+  * task's vmacache pertains to a different mm (ie, its own).  There is
diff --git a/queue-4.14/series b/queue-4.14/series
index e62756f5f42..a813f80ae8f 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -61,7 +61,6 @@ tools-testing-nvdimm-kaddr-and-pfn-can-be-null-to-direct_access.patch
 ath10k-disable-bundle-mgmt-tx-completion-event-support.patch
 bluetooth-hidp-fix-handling-of-strncpy-for-hid-name-information.patch
 x86-mm-remove-in_nmi-warning-from-vmalloc_fault.patch
-x86-kexec-allocate-8k-pgds-for-pti.patch
 pinctrl-imx-off-by-one-in-imx_pinconf_group_dbg_show.patch
 gpio-ml-ioh-fix-buffer-underwrite-on-probe-error-path.patch
 pinctrl-amd-only-handle-irq-if-it-is-pending-and-unmasked.patch
@@ -124,3 +123,4 @@ mtd-ubi-wl-fix-error-return-code-in-ubi_wl_init.patch
 tun-fix-use-after-free-for-ptr_ring.patch
 tuntap-fix-use-after-free-during-release.patch
 autofs-fix-autofs_sbi-does-not-check-super-block-type.patch
+mm-get-rid-of-vmacache_flush_all-entirely.patch
diff --git a/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch b/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch
deleted file mode 100644
index b17f81e6c70..00000000000
--- a/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From foo@baz Mon Sep 17 12:33:31 CEST 2018
-From: Joerg Roedel <jroedel@suse.de>
-Date: Wed, 25 Jul 2018 17:48:03 +0200
-Subject: x86/kexec: Allocate 8k PGDs for PTI
-
-From: Joerg Roedel <jroedel@suse.de>
-
-[ Upstream commit ca38dc8f2724d101038b1205122c93a1c7f38f11 ]
-
-Fuzzing the PTI-x86-32 code with trinity showed unhandled
-kernel paging request oops-messages that looked a lot like
-silent data corruption.
-
-Lot's of debugging and testing lead to the kexec-32bit code,
-which is still allocating 4k PGDs when PTI is enabled. But
-since it uses native_set_pud() to build the page-table, it
-will unevitably call into __pti_set_user_pgtbl(), which
-writes beyond the allocated 4k page.
-
-Use PGD_ALLOCATION_ORDER to allocate PGDs in the kexec code
-to fix the issue.
-
-Signed-off-by: Joerg Roedel <jroedel@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: David H. Gutteridge <dhgutteridge@sympatico.ca>
-Cc: "H . Peter Anvin" <hpa@zytor.com>
-Cc: linux-mm@kvack.org
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Jiri Kosina <jkosina@suse.cz>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: Andrea Arcangeli <aarcange@redhat.com>
-Cc: Waiman Long <llong@redhat.com>
-Cc: Pavel Machek <pavel@ucw.cz>
-Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
-Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
-Cc: Jiri Olsa <jolsa@redhat.com>
-Cc: Namhyung Kim <namhyung@kernel.org>
-Cc: joro@8bytes.org
-Link: https://lkml.kernel.org/r/1532533683-5988-4-git-send-email-joro@8bytes.org
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/kernel/machine_kexec_32.c |    5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
---- a/arch/x86/kernel/machine_kexec_32.c
-+++ b/arch/x86/kernel/machine_kexec_32.c
-@@ -56,7 +56,7 @@ static void load_segments(void)
- 
- static void machine_kexec_free_page_tables(struct kimage *image)
- {
--	free_page((unsigned long)image->arch.pgd);
-+	free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
- 	image->arch.pgd = NULL;
- #ifdef CONFIG_X86_PAE
- 	free_page((unsigned long)image->arch.pmd0);
-@@ -72,7 +72,8 @@ static void machine_kexec_free_page_tabl
- 
- static int machine_kexec_alloc_page_tables(struct kimage *image)
- {
--	image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
-+	image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-+						    PGD_ALLOCATION_ORDER);
- #ifdef CONFIG_X86_PAE
- 	image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
- 	image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);