From: Greg Kroah-Hartman Date: Mon, 17 Sep 2018 20:25:02 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.18.9~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=37fd147d66e19044cfb60fbfbc66858f96dcb8ed;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: mm-get-rid-of-vmacache_flush_all-entirely.patch --- diff --git a/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch b/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch new file mode 100644 index 00000000000..4222a375713 --- /dev/null +++ b/queue-4.14/mm-get-rid-of-vmacache_flush_all-entirely.patch @@ -0,0 +1,160 @@ +From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Wed, 12 Sep 2018 23:57:48 -1000 +Subject: mm: get rid of vmacache_flush_all() entirely + +From: Linus Torvalds + +commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream. + +Jann Horn points out that the vmacache_flush_all() function is not only +potentially expensive, it's buggy too. It also happens to be entirely +unnecessary, because the sequence number overflow case can be avoided by +simply making the sequence number be 64-bit. That doesn't even grow the +data structures in question, because the other adjacent fields are +already 64-bit. + +So simplify the whole thing by just making the sequence number overflow +case go away entirely, which gets rid of all the complications and makes +the code faster too. Win-win. + +[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics + also just goes away entirely with this ] + +Reported-by: Jann Horn +Suggested-by: Will Deacon +Acked-by: Davidlohr Bueso +Cc: Oleg Nesterov +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm_types.h | 2 +- + include/linux/mm_types_task.h | 2 +- + include/linux/vm_event_item.h | 1 - + include/linux/vmacache.h | 5 ----- + mm/debug.c | 4 ++-- + mm/vmacache.c | 38 -------------------------------------- + 6 files changed, 4 insertions(+), 48 deletions(-) + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -354,7 +354,7 @@ struct kioctx_table; + struct mm_struct { + struct vm_area_struct *mmap; /* list of VMAs */ + struct rb_root mm_rb; +- u32 vmacache_seqnum; /* per-thread vmacache */ ++ u64 vmacache_seqnum; /* per-thread vmacache */ + #ifdef CONFIG_MMU + unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long addr, unsigned long len, +--- a/include/linux/mm_types_task.h ++++ b/include/linux/mm_types_task.h +@@ -32,7 +32,7 @@ + #define VMACACHE_MASK (VMACACHE_SIZE - 1) + + struct vmacache { +- u32 seqnum; ++ u64 seqnum; + struct vm_area_struct *vmas[VMACACHE_SIZE]; + }; + +--- a/include/linux/vm_event_item.h ++++ b/include/linux/vm_event_item.h +@@ -105,7 +105,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS + #ifdef CONFIG_DEBUG_VM_VMACACHE + VMACACHE_FIND_CALLS, + VMACACHE_FIND_HITS, +- VMACACHE_FULL_FLUSHES, + #endif + #ifdef CONFIG_SWAP + SWAP_RA, +--- a/include/linux/vmacache.h ++++ b/include/linux/vmacache.h +@@ -16,7 +16,6 @@ static inline void vmacache_flush(struct + memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); + } + +-extern void vmacache_flush_all(struct mm_struct *mm); + extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); + extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); +@@ -30,10 +29,6 @@ extern struct vm_area_struct *vmacache_f + static inline void vmacache_invalidate(struct mm_struct *mm) + { + mm->vmacache_seqnum++; +- +- /* deal with overflows */ +- if (unlikely(mm->vmacache_seqnum == 0)) +- vmacache_flush_all(mm); + } + + #endif /* __LINUX_VMACACHE_H */ +--- a/mm/debug.c ++++ b/mm/debug.c +@@ -100,7 +100,7 @@ EXPORT_SYMBOL(dump_vma); + + void dump_mm(const struct mm_struct *mm) + { +- pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" ++ pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n" + #ifdef CONFIG_MMU + "get_unmapped_area %p\n" + #endif +@@ -128,7 +128,7 @@ void dump_mm(const struct mm_struct *mm) + "tlb_flush_pending %d\n" + "def_flags: %#lx(%pGv)\n", + +- mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, ++ mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size, + #ifdef CONFIG_MMU + mm->get_unmapped_area, + #endif +--- a/mm/vmacache.c ++++ b/mm/vmacache.c +@@ -8,44 +8,6 @@ + #include + + /* +- * Flush vma caches for threads that share a given mm. +- * +- * The operation is safe because the caller holds the mmap_sem +- * exclusively and other threads accessing the vma cache will +- * have mmap_sem held at least for read, so no extra locking +- * is required to maintain the vma cache. +- */ +-void vmacache_flush_all(struct mm_struct *mm) +-{ +- struct task_struct *g, *p; +- +- count_vm_vmacache_event(VMACACHE_FULL_FLUSHES); +- +- /* +- * Single threaded tasks need not iterate the entire +- * list of process. We can avoid the flushing as well +- * since the mm's seqnum was increased and don't have +- * to worry about other threads' seqnum. Current's +- * flush will occur upon the next lookup. +- */ +- if (atomic_read(&mm->mm_users) == 1) +- return; +- +- rcu_read_lock(); +- for_each_process_thread(g, p) { +- /* +- * Only flush the vmacache pointers as the +- * mm seqnum is already set and curr's will +- * be set upon invalidation when the next +- * lookup is done. +- */ +- if (mm == p->mm) +- vmacache_flush(p); +- } +- rcu_read_unlock(); +-} +- +-/* + * This task may be accessing a foreign mm via (for example) + * get_user_pages()->find_vma(). The vmacache is task-local and this + * task's vmacache pertains to a different mm (ie, its own). There is diff --git a/queue-4.14/series b/queue-4.14/series index e62756f5f42..a813f80ae8f 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -61,7 +61,6 @@ tools-testing-nvdimm-kaddr-and-pfn-can-be-null-to-direct_access.patch ath10k-disable-bundle-mgmt-tx-completion-event-support.patch bluetooth-hidp-fix-handling-of-strncpy-for-hid-name-information.patch x86-mm-remove-in_nmi-warning-from-vmalloc_fault.patch -x86-kexec-allocate-8k-pgds-for-pti.patch pinctrl-imx-off-by-one-in-imx_pinconf_group_dbg_show.patch gpio-ml-ioh-fix-buffer-underwrite-on-probe-error-path.patch pinctrl-amd-only-handle-irq-if-it-is-pending-and-unmasked.patch @@ -124,3 +123,4 @@ mtd-ubi-wl-fix-error-return-code-in-ubi_wl_init.patch tun-fix-use-after-free-for-ptr_ring.patch tuntap-fix-use-after-free-during-release.patch autofs-fix-autofs_sbi-does-not-check-super-block-type.patch +mm-get-rid-of-vmacache_flush_all-entirely.patch diff --git a/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch b/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch deleted file mode 100644 index b17f81e6c70..00000000000 --- a/queue-4.14/x86-kexec-allocate-8k-pgds-for-pti.patch +++ /dev/null @@ -1,82 +0,0 @@ -From foo@baz Mon Sep 17 12:33:31 CEST 2018 -From: Joerg Roedel -Date: Wed, 25 Jul 2018 17:48:03 +0200 -Subject: x86/kexec: Allocate 8k PGDs for PTI - -From: Joerg Roedel - -[ Upstream commit ca38dc8f2724d101038b1205122c93a1c7f38f11 ] - -Fuzzing the PTI-x86-32 code with trinity showed unhandled -kernel paging request oops-messages that looked a lot like -silent data corruption. - -Lot's of debugging and testing lead to the kexec-32bit code, -which is still allocating 4k PGDs when PTI is enabled. But -since it uses native_set_pud() to build the page-table, it -will unevitably call into __pti_set_user_pgtbl(), which -writes beyond the allocated 4k page. - -Use PGD_ALLOCATION_ORDER to allocate PGDs in the kexec code -to fix the issue. - -Signed-off-by: Joerg Roedel -Signed-off-by: Thomas Gleixner -Tested-by: David H. Gutteridge -Cc: "H . Peter Anvin" -Cc: linux-mm@kvack.org -Cc: Linus Torvalds -Cc: Andy Lutomirski -Cc: Dave Hansen -Cc: Josh Poimboeuf -Cc: Juergen Gross -Cc: Peter Zijlstra -Cc: Borislav Petkov -Cc: Jiri Kosina -Cc: Boris Ostrovsky -Cc: Brian Gerst -Cc: David Laight -Cc: Denys Vlasenko -Cc: Eduardo Valentin -Cc: Greg KH -Cc: Will Deacon -Cc: aliguori@amazon.com -Cc: daniel.gruss@iaik.tugraz.at -Cc: hughd@google.com -Cc: keescook@google.com -Cc: Andrea Arcangeli -Cc: Waiman Long -Cc: Pavel Machek -Cc: Arnaldo Carvalho de Melo -Cc: Alexander Shishkin -Cc: Jiri Olsa -Cc: Namhyung Kim -Cc: joro@8bytes.org -Link: https://lkml.kernel.org/r/1532533683-5988-4-git-send-email-joro@8bytes.org -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - arch/x86/kernel/machine_kexec_32.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/arch/x86/kernel/machine_kexec_32.c -+++ b/arch/x86/kernel/machine_kexec_32.c -@@ -56,7 +56,7 @@ static void load_segments(void) - - static void machine_kexec_free_page_tables(struct kimage *image) - { -- free_page((unsigned long)image->arch.pgd); -+ free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); - image->arch.pgd = NULL; - #ifdef CONFIG_X86_PAE - free_page((unsigned long)image->arch.pmd0); -@@ -72,7 +72,8 @@ static void machine_kexec_free_page_tabl - - static int machine_kexec_alloc_page_tables(struct kimage *image) - { -- image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); -+ image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, -+ PGD_ALLOCATION_ORDER); - #ifdef CONFIG_X86_PAE - image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);