From 0202f8903a1f6f26d6806f64fdc375f162ae8507 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Sep 2018 23:09:44 +0200 Subject: [PATCH] 4.9-stable patches added patches: mm-get-rid-of-vmacache_flush_all-entirely.patch --- ...t-rid-of-vmacache_flush_all-entirely.patch | 160 ++++++++++++++++++ queue-4.9/series | 2 +- .../x86-kexec-allocate-8k-pgds-for-pti.patch | 82 --------- 3 files changed, 161 insertions(+), 83 deletions(-) create mode 100644 queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch delete mode 100644 queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch diff --git a/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch b/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch new file mode 100644 index 00000000000..bc2ee45a206 --- /dev/null +++ b/queue-4.9/mm-get-rid-of-vmacache_flush_all-entirely.patch @@ -0,0 +1,160 @@ +From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Wed, 12 Sep 2018 23:57:48 -1000 +Subject: mm: get rid of vmacache_flush_all() entirely + +From: Linus Torvalds + +commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream. + +Jann Horn points out that the vmacache_flush_all() function is not only +potentially expensive, it's buggy too. It also happens to be entirely +unnecessary, because the sequence number overflow case can be avoided by +simply making the sequence number be 64-bit. That doesn't even grow the +data structures in question, because the other adjacent fields are +already 64-bit. + +So simplify the whole thing by just making the sequence number overflow +case go away entirely, which gets rid of all the complications and makes +the code faster too. Win-win. + +[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics + also just goes away entirely with this ] + +Reported-by: Jann Horn +Suggested-by: Will Deacon +Acked-by: Davidlohr Bueso +Cc: Oleg Nesterov +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm_types.h | 2 +- + include/linux/sched.h | 2 +- + include/linux/vm_event_item.h | 1 - + include/linux/vmacache.h | 5 ----- + mm/debug.c | 4 ++-- + mm/vmacache.c | 38 -------------------------------------- + 6 files changed, 4 insertions(+), 48 deletions(-) + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -396,7 +396,7 @@ struct kioctx_table; + struct mm_struct { + struct vm_area_struct *mmap; /* list of VMAs */ + struct rb_root mm_rb; +- u32 vmacache_seqnum; /* per-thread vmacache */ ++ u64 vmacache_seqnum; /* per-thread vmacache */ + #ifdef CONFIG_MMU + unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long addr, unsigned long len, +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1559,7 +1559,7 @@ struct task_struct { + + struct mm_struct *mm, *active_mm; + /* per-thread vma caching */ +- u32 vmacache_seqnum; ++ u64 vmacache_seqnum; + struct vm_area_struct *vmacache[VMACACHE_SIZE]; + #if defined(SPLIT_RSS_COUNTING) + struct task_rss_stat rss_stat; +--- a/include/linux/vm_event_item.h ++++ b/include/linux/vm_event_item.h +@@ -97,7 +97,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS + #ifdef CONFIG_DEBUG_VM_VMACACHE + VMACACHE_FIND_CALLS, + VMACACHE_FIND_HITS, +- VMACACHE_FULL_FLUSHES, + #endif + NR_VM_EVENT_ITEMS + }; +--- a/include/linux/vmacache.h ++++ b/include/linux/vmacache.h +@@ -15,7 +15,6 @@ static inline void vmacache_flush(struct + memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); + } + +-extern void vmacache_flush_all(struct mm_struct *mm); + extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); + extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); +@@ -29,10 +28,6 @@ extern struct vm_area_struct *vmacache_f + static inline void vmacache_invalidate(struct mm_struct *mm) + { + mm->vmacache_seqnum++; +- +- /* deal with overflows */ +- if (unlikely(mm->vmacache_seqnum == 0)) +- vmacache_flush_all(mm); + } + + #endif /* __LINUX_VMACACHE_H */ +--- a/mm/debug.c ++++ b/mm/debug.c +@@ -95,7 +95,7 @@ EXPORT_SYMBOL(dump_vma); + + void dump_mm(const struct mm_struct *mm) + { +- pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" ++ pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n" + #ifdef CONFIG_MMU + "get_unmapped_area %p\n" + #endif +@@ -125,7 +125,7 @@ void dump_mm(const struct mm_struct *mm) + #endif + "def_flags: %#lx(%pGv)\n", + +- mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, ++ mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size, + #ifdef CONFIG_MMU + mm->get_unmapped_area, + #endif +--- a/mm/vmacache.c ++++ b/mm/vmacache.c +@@ -6,44 +6,6 @@ + #include + + /* +- * Flush vma caches for threads that share a given mm. +- * +- * The operation is safe because the caller holds the mmap_sem +- * exclusively and other threads accessing the vma cache will +- * have mmap_sem held at least for read, so no extra locking +- * is required to maintain the vma cache. +- */ +-void vmacache_flush_all(struct mm_struct *mm) +-{ +- struct task_struct *g, *p; +- +- count_vm_vmacache_event(VMACACHE_FULL_FLUSHES); +- +- /* +- * Single threaded tasks need not iterate the entire +- * list of process. We can avoid the flushing as well +- * since the mm's seqnum was increased and don't have +- * to worry about other threads' seqnum. Current's +- * flush will occur upon the next lookup. +- */ +- if (atomic_read(&mm->mm_users) == 1) +- return; +- +- rcu_read_lock(); +- for_each_process_thread(g, p) { +- /* +- * Only flush the vmacache pointers as the +- * mm seqnum is already set and curr's will +- * be set upon invalidation when the next +- * lookup is done. +- */ +- if (mm == p->mm) +- vmacache_flush(p); +- } +- rcu_read_unlock(); +-} +- +-/* + * This task may be accessing a foreign mm via (for example) + * get_user_pages()->find_vma(). The vmacache is task-local and this + * task's vmacache pertains to a different mm (ie, its own). There is diff --git a/queue-4.9/series b/queue-4.9/series index 9a62c17ee01..917e7a4b491 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -41,7 +41,6 @@ scsi-3ware-fix-return-0-on-the-error-path-of-probe.patch ath10k-disable-bundle-mgmt-tx-completion-event-support.patch bluetooth-hidp-fix-handling-of-strncpy-for-hid-name-information.patch x86-mm-remove-in_nmi-warning-from-vmalloc_fault.patch -x86-kexec-allocate-8k-pgds-for-pti.patch gpio-ml-ioh-fix-buffer-underwrite-on-probe-error-path.patch net-mvneta-fix-mtu-change-on-port-without-link.patch f2fs-try-grabbing-node-page-lock-aggressively-in-sync-scenario.patch @@ -68,3 +67,4 @@ xhci-fix-use-after-free-in-xhci_free_virt_device.patch netfilter-x_tables-avoid-stack-out-of-bounds-read-in-xt_copy_counters_from_user.patch mtd-ubi-wl-fix-error-return-code-in-ubi_wl_init.patch autofs-fix-autofs_sbi-does-not-check-super-block-type.patch +mm-get-rid-of-vmacache_flush_all-entirely.patch diff --git a/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch b/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch deleted file mode 100644 index 546f513f366..00000000000 --- a/queue-4.9/x86-kexec-allocate-8k-pgds-for-pti.patch +++ /dev/null @@ -1,82 +0,0 @@ -From foo@baz Mon Sep 17 12:22:41 CEST 2018 -From: Joerg Roedel -Date: Wed, 25 Jul 2018 17:48:03 +0200 -Subject: x86/kexec: Allocate 8k PGDs for PTI - -From: Joerg Roedel - -[ Upstream commit ca38dc8f2724d101038b1205122c93a1c7f38f11 ] - -Fuzzing the PTI-x86-32 code with trinity showed unhandled -kernel paging request oops-messages that looked a lot like -silent data corruption. - -Lot's of debugging and testing lead to the kexec-32bit code, -which is still allocating 4k PGDs when PTI is enabled. But -since it uses native_set_pud() to build the page-table, it -will unevitably call into __pti_set_user_pgtbl(), which -writes beyond the allocated 4k page. - -Use PGD_ALLOCATION_ORDER to allocate PGDs in the kexec code -to fix the issue. - -Signed-off-by: Joerg Roedel -Signed-off-by: Thomas Gleixner -Tested-by: David H. Gutteridge -Cc: "H . Peter Anvin" -Cc: linux-mm@kvack.org -Cc: Linus Torvalds -Cc: Andy Lutomirski -Cc: Dave Hansen -Cc: Josh Poimboeuf -Cc: Juergen Gross -Cc: Peter Zijlstra -Cc: Borislav Petkov -Cc: Jiri Kosina -Cc: Boris Ostrovsky -Cc: Brian Gerst -Cc: David Laight -Cc: Denys Vlasenko -Cc: Eduardo Valentin -Cc: Greg KH -Cc: Will Deacon -Cc: aliguori@amazon.com -Cc: daniel.gruss@iaik.tugraz.at -Cc: hughd@google.com -Cc: keescook@google.com -Cc: Andrea Arcangeli -Cc: Waiman Long -Cc: Pavel Machek -Cc: Arnaldo Carvalho de Melo -Cc: Alexander Shishkin -Cc: Jiri Olsa -Cc: Namhyung Kim -Cc: joro@8bytes.org -Link: https://lkml.kernel.org/r/1532533683-5988-4-git-send-email-joro@8bytes.org -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - arch/x86/kernel/machine_kexec_32.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/arch/x86/kernel/machine_kexec_32.c -+++ b/arch/x86/kernel/machine_kexec_32.c -@@ -70,7 +70,7 @@ static void load_segments(void) - - static void machine_kexec_free_page_tables(struct kimage *image) - { -- free_page((unsigned long)image->arch.pgd); -+ free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); - image->arch.pgd = NULL; - #ifdef CONFIG_X86_PAE - free_page((unsigned long)image->arch.pmd0); -@@ -86,7 +86,8 @@ static void machine_kexec_free_page_tabl - - static int machine_kexec_alloc_page_tables(struct kimage *image) - { -- image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); -+ image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, -+ PGD_ALLOCATION_ORDER); - #ifdef CONFIG_X86_PAE - image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); -- 2.47.3