From 0202f8903a1f6f26d6806f64fdc375f162ae8507 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 17 Sep 2018 23:09:44 +0200
Subject: [PATCH] 4.9-stable patches

added patches:
	mm-get-rid-of-vmacache_flush_all-entirely.patch
---
 ...t-rid-of-vmacache_flush_all-entirely.patch | 160 ++++++++++++++++++
 queue-4.9/series                              |   2 +-
 .../x86-kexec-allocate-8k-pgds-for-pti.patch  |  82 ---------
 3 files changed, 161 insertions(+), 83 deletions(-)
 create mode 100644 queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch
 delete mode 100644 queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch

diff --git a/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch b/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch
new file mode 100644
index 00000000000..bc2ee45a206
--- /dev/null
+++ b/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch
@@ -0,0 +1,160 @@
+From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 12 Sep 2018 23:57:48 -1000
+Subject: mm: get rid of vmacache_flush_all() entirely
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream.
+
+Jann Horn points out that the vmacache_flush_all() function is not only
+potentially expensive, it's buggy too.  It also happens to be entirely
+unnecessary, because the sequence number overflow case can be avoided by
+simply making the sequence number be 64-bit.  That doesn't even grow the
+data structures in question, because the other adjacent fields are
+already 64-bit.
+
+So simplify the whole thing by just making the sequence number overflow
+case go away entirely, which gets rid of all the complications and makes
+the code faster too.  Win-win.
+
+[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics
+  also just goes away entirely with this ]
+
+Reported-by: Jann Horn <jannh@google.com>
+Suggested-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h      |    2 +-
+ include/linux/sched.h         |    2 +-
+ include/linux/vm_event_item.h |    1 -
+ include/linux/vmacache.h      |    5 -----
+ mm/debug.c                    |    4 ++--
+ mm/vmacache.c                 |   38 --------------------------------------
+ 6 files changed, 4 insertions(+), 48 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -396,7 +396,7 @@ struct kioctx_table;
+ struct mm_struct {
+ 	struct vm_area_struct *mmap;		/* list of VMAs */
+ 	struct rb_root mm_rb;
+-	u32 vmacache_seqnum;                   /* per-thread vmacache */
++	u64 vmacache_seqnum;                   /* per-thread vmacache */
+ #ifdef CONFIG_MMU
+ 	unsigned long (*get_unmapped_area) (struct file *filp,
+ 				unsigned long addr, unsigned long len,
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1559,7 +1559,7 @@ struct task_struct {
+ 
+ 	struct mm_struct *mm, *active_mm;
+ 	/* per-thread vma caching */
+-	u32 vmacache_seqnum;
++	u64 vmacache_seqnum;
+ 	struct vm_area_struct *vmacache[VMACACHE_SIZE];
+ #if defined(SPLIT_RSS_COUNTING)
+ 	struct task_rss_stat	rss_stat;
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -97,7 +97,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ #ifdef CONFIG_DEBUG_VM_VMACACHE
+ 		VMACACHE_FIND_CALLS,
+ 		VMACACHE_FIND_HITS,
+-		VMACACHE_FULL_FLUSHES,
+ #endif
+ 		NR_VM_EVENT_ITEMS
+ };
+--- a/include/linux/vmacache.h
++++ b/include/linux/vmacache.h
+@@ -15,7 +15,6 @@ static inline void vmacache_flush(struct
+ 	memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+ }
+ 
+-extern void vmacache_flush_all(struct mm_struct *mm);
+ extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+ extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+ 						    unsigned long addr);
+@@ -29,10 +28,6 @@ extern struct vm_area_struct *vmacache_f
+ static inline void vmacache_invalidate(struct mm_struct *mm)
+ {
+ 	mm->vmacache_seqnum++;
+-
+-	/* deal with overflows */
+-	if (unlikely(mm->vmacache_seqnum == 0))
+-		vmacache_flush_all(mm);
+ }
+ 
+ #endif /* __LINUX_VMACACHE_H */
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -95,7 +95,7 @@ EXPORT_SYMBOL(dump_vma);
+ 
+ void dump_mm(const struct mm_struct *mm)
+ {
+-	pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
++	pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n"
+ #ifdef CONFIG_MMU
+ 		"get_unmapped_area %p\n"
+ #endif
+@@ -125,7 +125,7 @@ void dump_mm(const struct mm_struct *mm)
+ #endif
+ 		"def_flags: %#lx(%pGv)\n",
+ 
+-		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
++		mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size,
+ #ifdef CONFIG_MMU
+ 		mm->get_unmapped_area,
+ #endif
+--- a/mm/vmacache.c
++++ b/mm/vmacache.c
+@@ -6,44 +6,6 @@
+ #include <linux/vmacache.h>
+ 
+ /*
+- * Flush vma caches for threads that share a given mm.
+- *
+- * The operation is safe because the caller holds the mmap_sem
+- * exclusively and other threads accessing the vma cache will
+- * have mmap_sem held at least for read, so no extra locking
+- * is required to maintain the vma cache.
+- */
+-void vmacache_flush_all(struct mm_struct *mm)
+-{
+-	struct task_struct *g, *p;
+-
+-	count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
+-
+-	/*
+-	 * Single threaded tasks need not iterate the entire
+-	 * list of process. We can avoid the flushing as well
+-	 * since the mm's seqnum was increased and don't have
+-	 * to worry about other threads' seqnum. Current's
+-	 * flush will occur upon the next lookup.
+-	 */
+-	if (atomic_read(&mm->mm_users) == 1)
+-		return;
+-
+-	rcu_read_lock();
+-	for_each_process_thread(g, p) {
+-		/*
+-		 * Only flush the vmacache pointers as the
+-		 * mm seqnum is already set and curr's will
+-		 * be set upon invalidation when the next
+-		 * lookup is done.
+-		 */
+-		if (mm == p->mm)
+-			vmacache_flush(p);
+-	}
+-	rcu_read_unlock();
+-}
+-
+-/*
+  * This task may be accessing a foreign mm via (for example)
+  * get_user_pages()->find_vma().  The vmacache is task-local and this
+  * task's vmacache pertains to a different mm (ie, its own).  There is
diff --git a/queue-4.9/series b/queue-4.9/series
index 9a62c17ee01..917e7a4b491 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -41,7 +41,6 @@ scsi-3ware-fix-return-0-on-the-error-path-of-probe.patch
 ath10k-disable-bundle-mgmt-tx-completion-event-support.patch
 bluetooth-hidp-fix-handling-of-strncpy-for-hid-name-information.patch
 x86-mm-remove-in_nmi-warning-from-vmalloc_fault.patch
-x86-kexec-allocate-8k-pgds-for-pti.patch
 gpio-ml-ioh-fix-buffer-underwrite-on-probe-error-path.patch
 net-mvneta-fix-mtu-change-on-port-without-link.patch
 f2fs-try-grabbing-node-page-lock-aggressively-in-sync-scenario.patch
@@ -68,3 +67,4 @@ xhci-fix-use-after-free-in-xhci_free_virt_device.patch
 netfilter-x_tables-avoid-stack-out-of-bounds-read-in-xt_copy_counters_from_user.patch
 mtd-ubi-wl-fix-error-return-code-in-ubi_wl_init.patch
 autofs-fix-autofs_sbi-does-not-check-super-block-type.patch
+mm-get-rid-of-vmacache_flush_all-entirely.patch
diff --git a/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch b/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch
deleted file mode 100644
index 546f513f366..00000000000
--- a/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From foo@baz Mon Sep 17 12:22:41 CEST 2018
-From: Joerg Roedel <jroedel@suse.de>
-Date: Wed, 25 Jul 2018 17:48:03 +0200
-Subject: x86/kexec: Allocate 8k PGDs for PTI
-
-From: Joerg Roedel <jroedel@suse.de>
-
-[ Upstream commit ca38dc8f2724d101038b1205122c93a1c7f38f11 ]
-
-Fuzzing the PTI-x86-32 code with trinity showed unhandled
-kernel paging request oops-messages that looked a lot like
-silent data corruption.
-
-Lot's of debugging and testing lead to the kexec-32bit code,
-which is still allocating 4k PGDs when PTI is enabled. But
-since it uses native_set_pud() to build the page-table, it
-will unevitably call into __pti_set_user_pgtbl(), which
-writes beyond the allocated 4k page.
-
-Use PGD_ALLOCATION_ORDER to allocate PGDs in the kexec code
-to fix the issue.
-
-Signed-off-by: Joerg Roedel <jroedel@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: David H. Gutteridge <dhgutteridge@sympatico.ca>
-Cc: "H . Peter Anvin" <hpa@zytor.com>
-Cc: linux-mm@kvack.org
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Jiri Kosina <jkosina@suse.cz>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: Andrea Arcangeli <aarcange@redhat.com>
-Cc: Waiman Long <llong@redhat.com>
-Cc: Pavel Machek <pavel@ucw.cz>
-Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
-Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
-Cc: Jiri Olsa <jolsa@redhat.com>
-Cc: Namhyung Kim <namhyung@kernel.org>
-Cc: joro@8bytes.org
-Link: https://lkml.kernel.org/r/1532533683-5988-4-git-send-email-joro@8bytes.org
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/kernel/machine_kexec_32.c |    5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
---- a/arch/x86/kernel/machine_kexec_32.c
-+++ b/arch/x86/kernel/machine_kexec_32.c
-@@ -70,7 +70,7 @@ static void load_segments(void)
- 
- static void machine_kexec_free_page_tables(struct kimage *image)
- {
--	free_page((unsigned long)image->arch.pgd);
-+	free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
- 	image->arch.pgd = NULL;
- #ifdef CONFIG_X86_PAE
- 	free_page((unsigned long)image->arch.pmd0);
-@@ -86,7 +86,8 @@ static void machine_kexec_free_page_tabl
- 
- static int machine_kexec_alloc_page_tables(struct kimage *image)
- {
--	image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
-+	image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-+						    PGD_ALLOCATION_ORDER);
- #ifdef CONFIG_X86_PAE
- 	image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
- 	image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
-- 
2.47.3