]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 Sep 2018 20:25:34 +0000 (22:25 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 Sep 2018 20:25:34 +0000 (22:25 +0200)
added patches:
mm-get-rid-of-vmacache_flush_all-entirely.patch

queue-4.18/mm-get-rid-of-vmacache_flush_all-entirely.patch [new file with mode: 0644]
queue-4.18/series
queue-4.18/x86-kexec-allocate-8k-pgds-for-pti.patch [deleted file]

diff --git a/queue-4.18/mm-get-rid-of-vmacache_flush_all-entirely.patch b/queue-4.18/mm-get-rid-of-vmacache_flush_all-entirely.patch
new file mode 100644 (file)
index 0000000..bb3c7f6
--- /dev/null
@@ -0,0 +1,160 @@
+From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 12 Sep 2018 23:57:48 -1000
+Subject: mm: get rid of vmacache_flush_all() entirely
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream.
+
+Jann Horn points out that the vmacache_flush_all() function is not only
+potentially expensive, it's buggy too.  It also happens to be entirely
+unnecessary, because the sequence number overflow case can be avoided by
+simply making the sequence number be 64-bit.  That doesn't even grow the
+data structures in question, because the other adjacent fields are
+already 64-bit.
+
+So simplify the whole thing by just making the sequence number overflow
+case go away entirely, which gets rid of all the complications and makes
+the code faster too.  Win-win.
+
+[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics
+  also just goes away entirely with this ]
+
+Reported-by: Jann Horn <jannh@google.com>
+Suggested-by: Will Deacon <will.deacon@arm.com>
+Acked-by: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h      |    2 +-
+ include/linux/mm_types_task.h |    2 +-
+ include/linux/vm_event_item.h |    1 -
+ include/linux/vmacache.h      |    5 -----
+ mm/debug.c                    |    4 ++--
+ mm/vmacache.c                 |   38 --------------------------------------
+ 6 files changed, 4 insertions(+), 48 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -340,7 +340,7 @@ struct kioctx_table;
+ struct mm_struct {
+       struct vm_area_struct *mmap;            /* list of VMAs */
+       struct rb_root mm_rb;
+-      u32 vmacache_seqnum;                   /* per-thread vmacache */
++      u64 vmacache_seqnum;                   /* per-thread vmacache */
+ #ifdef CONFIG_MMU
+       unsigned long (*get_unmapped_area) (struct file *filp,
+                               unsigned long addr, unsigned long len,
+--- a/include/linux/mm_types_task.h
++++ b/include/linux/mm_types_task.h
+@@ -32,7 +32,7 @@
+ #define VMACACHE_MASK (VMACACHE_SIZE - 1)
+ struct vmacache {
+-      u32 seqnum;
++      u64 seqnum;
+       struct vm_area_struct *vmas[VMACACHE_SIZE];
+ };
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -105,7 +105,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ #ifdef CONFIG_DEBUG_VM_VMACACHE
+               VMACACHE_FIND_CALLS,
+               VMACACHE_FIND_HITS,
+-              VMACACHE_FULL_FLUSHES,
+ #endif
+ #ifdef CONFIG_SWAP
+               SWAP_RA,
+--- a/include/linux/vmacache.h
++++ b/include/linux/vmacache.h
+@@ -16,7 +16,6 @@ static inline void vmacache_flush(struct
+       memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
+ }
+-extern void vmacache_flush_all(struct mm_struct *mm);
+ extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+ extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+                                                   unsigned long addr);
+@@ -30,10 +29,6 @@ extern struct vm_area_struct *vmacache_f
+ static inline void vmacache_invalidate(struct mm_struct *mm)
+ {
+       mm->vmacache_seqnum++;
+-
+-      /* deal with overflows */
+-      if (unlikely(mm->vmacache_seqnum == 0))
+-              vmacache_flush_all(mm);
+ }
+ #endif /* __LINUX_VMACACHE_H */
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -114,7 +114,7 @@ EXPORT_SYMBOL(dump_vma);
+ void dump_mm(const struct mm_struct *mm)
+ {
+-      pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
++      pr_emerg("mm %px mmap %px seqnum %llu task_size %lu\n"
+ #ifdef CONFIG_MMU
+               "get_unmapped_area %px\n"
+ #endif
+@@ -142,7 +142,7 @@ void dump_mm(const struct mm_struct *mm)
+               "tlb_flush_pending %d\n"
+               "def_flags: %#lx(%pGv)\n",
+-              mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
++              mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size,
+ #ifdef CONFIG_MMU
+               mm->get_unmapped_area,
+ #endif
+--- a/mm/vmacache.c
++++ b/mm/vmacache.c
+@@ -8,44 +8,6 @@
+ #include <linux/vmacache.h>
+ /*
+- * Flush vma caches for threads that share a given mm.
+- *
+- * The operation is safe because the caller holds the mmap_sem
+- * exclusively and other threads accessing the vma cache will
+- * have mmap_sem held at least for read, so no extra locking
+- * is required to maintain the vma cache.
+- */
+-void vmacache_flush_all(struct mm_struct *mm)
+-{
+-      struct task_struct *g, *p;
+-
+-      count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
+-
+-      /*
+-       * Single threaded tasks need not iterate the entire
+-       * list of process. We can avoid the flushing as well
+-       * since the mm's seqnum was increased and don't have
+-       * to worry about other threads' seqnum. Current's
+-       * flush will occur upon the next lookup.
+-       */
+-      if (atomic_read(&mm->mm_users) == 1)
+-              return;
+-
+-      rcu_read_lock();
+-      for_each_process_thread(g, p) {
+-              /*
+-               * Only flush the vmacache pointers as the
+-               * mm seqnum is already set and curr's will
+-               * be set upon invalidation when the next
+-               * lookup is done.
+-               */
+-              if (mm == p->mm)
+-                      vmacache_flush(p);
+-      }
+-      rcu_read_unlock();
+-}
+-
+-/*
+  * This task may be accessing a foreign mm via (for example)
+  * get_user_pages()->find_vma().  The vmacache is task-local and this
+  * task's vmacache pertains to a different mm (ie, its own).  There is
index 84d6623090afa72eb1f5acf173e94ccbf09e8936..7d783ea389abb744f8fd936319ad559bb509b514 100644 (file)
@@ -109,7 +109,6 @@ pci-mobiveil-fix-struct-mobiveil_pcie.pcie_reg_base-address-type.patch
 powerpc-mm-don-t-report-puds-as-memory-leaks-when-using-kmemleak.patch
 bluetooth-hidp-fix-handling-of-strncpy-for-hid-name-information.patch
 x86-mm-remove-in_nmi-warning-from-vmalloc_fault.patch
-x86-kexec-allocate-8k-pgds-for-pti.patch
 regulator-tps65217-fix-null-pointer-dereference-on-probe.patch
 pinctrl-imx-off-by-one-in-imx_pinconf_group_dbg_show.patch
 gpio-pxa-disable-pinctrl-calls-for-pxa3xx.patch
@@ -156,3 +155,4 @@ mips-mscc-ocelot-fix-length-of-memory-address-space-for-miim.patch
 rdma-cma-do-not-ignore-net-namespace-for-unbound-cm_id.patch
 clocksource-revert-remove-kthread.patch
 autofs-fix-autofs_sbi-does-not-check-super-block-type.patch
+mm-get-rid-of-vmacache_flush_all-entirely.patch
diff --git a/queue-4.18/x86-kexec-allocate-8k-pgds-for-pti.patch b/queue-4.18/x86-kexec-allocate-8k-pgds-for-pti.patch
deleted file mode 100644 (file)
index 6b24b4b..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-From foo@baz Mon Sep 17 12:37:52 CEST 2018
-From: Joerg Roedel <jroedel@suse.de>
-Date: Wed, 25 Jul 2018 17:48:03 +0200
-Subject: x86/kexec: Allocate 8k PGDs for PTI
-
-From: Joerg Roedel <jroedel@suse.de>
-
-[ Upstream commit ca38dc8f2724d101038b1205122c93a1c7f38f11 ]
-
-Fuzzing the PTI-x86-32 code with trinity showed unhandled
-kernel paging request oops-messages that looked a lot like
-silent data corruption.
-
-Lot's of debugging and testing lead to the kexec-32bit code,
-which is still allocating 4k PGDs when PTI is enabled. But
-since it uses native_set_pud() to build the page-table, it
-will unevitably call into __pti_set_user_pgtbl(), which
-writes beyond the allocated 4k page.
-
-Use PGD_ALLOCATION_ORDER to allocate PGDs in the kexec code
-to fix the issue.
-
-Signed-off-by: Joerg Roedel <jroedel@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: David H. Gutteridge <dhgutteridge@sympatico.ca>
-Cc: "H . Peter Anvin" <hpa@zytor.com>
-Cc: linux-mm@kvack.org
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Jiri Kosina <jkosina@suse.cz>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: Andrea Arcangeli <aarcange@redhat.com>
-Cc: Waiman Long <llong@redhat.com>
-Cc: Pavel Machek <pavel@ucw.cz>
-Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
-Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
-Cc: Jiri Olsa <jolsa@redhat.com>
-Cc: Namhyung Kim <namhyung@kernel.org>
-Cc: joro@8bytes.org
-Link: https://lkml.kernel.org/r/1532533683-5988-4-git-send-email-joro@8bytes.org
-Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/x86/kernel/machine_kexec_32.c |    5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
---- a/arch/x86/kernel/machine_kexec_32.c
-+++ b/arch/x86/kernel/machine_kexec_32.c
-@@ -56,7 +56,7 @@ static void load_segments(void)
- static void machine_kexec_free_page_tables(struct kimage *image)
- {
--      free_page((unsigned long)image->arch.pgd);
-+      free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
-       image->arch.pgd = NULL;
- #ifdef CONFIG_X86_PAE
-       free_page((unsigned long)image->arch.pmd0);
-@@ -72,7 +72,8 @@ static void machine_kexec_free_page_tabl
- static int machine_kexec_alloc_page_tables(struct kimage *image)
- {
--      image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
-+      image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-+                                                  PGD_ALLOCATION_ORDER);
- #ifdef CONFIG_X86_PAE
-       image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
-       image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);