From: Greg Kroah-Hartman Date: Mon, 14 May 2018 06:33:35 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v3.18.109~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1aa26b27f7297e5bfa4c2a2495793e0146cecc1f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch --- diff --git a/queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch b/queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch new file mode 100644 index 00000000000..7cded30b462 --- /dev/null +++ b/queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch @@ -0,0 +1,180 @@ +From c3856aeb29402e94ad9b3879030165cc6a4fdc56 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Fri, 23 Feb 2018 21:21:12 +1100 +Subject: KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler + +From: Paul Mackerras + +commit c3856aeb29402e94ad9b3879030165cc6a4fdc56 upstream. + +This fixes several bugs in the radix page fault handler relating to +the way large pages in the memory backing the guest were handled. +First, the check for large pages only checked for explicit huge pages +and missed transparent huge pages. Then the check that the addresses +(host virtual vs. guest physical) had appropriate alignment was +wrong, meaning that the code never put a large page in the partition +scoped radix tree; it was always demoted to a small page. + +Fixing this exposed bugs in kvmppc_create_pte(). We were never +invalidating a 2MB PTE, which meant that if a page was initially +faulted in without write permission and the guest then attempted +to store to it, we would never update the PTE to have write permission. +If we find a valid 2MB PTE in the PMD, we need to clear it and +do a TLB invalidation before installing either the new 2MB PTE or +a pointer to a page table page. + +This also corrects an assumption that get_user_pages_fast would set +the _PAGE_DIRTY bit if we are writing, which is not true. Instead we +mark the page dirty explicitly with set_page_dirty_lock(). This +also means we don't need the dirty bit set on the host PTE when +providing write access on a read fault. + +[paulus@ozlabs.org - use mark_pages_dirty instead of + kvmppc_update_dirty_map] + +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_64_mmu_radix.c | 72 +++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 26 deletions(-) + +--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c ++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c +@@ -19,6 +19,9 @@ + #include + #include + ++static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot, ++ unsigned long gfn, unsigned int order); ++ + /* + * Supported radix tree geometry. + * Like p9, we support either 5 or 9 bits at the first (lowest) level, +@@ -195,6 +198,12 @@ static void kvmppc_pte_free(pte_t *ptep) + kmem_cache_free(kvm_pte_cache, ptep); + } + ++/* Like pmd_huge() and pmd_large(), but works regardless of config options */ ++static inline int pmd_is_leaf(pmd_t pmd) ++{ ++ return !!(pmd_val(pmd) & _PAGE_PTE); ++} ++ + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, + unsigned int level, unsigned long mmu_seq) + { +@@ -219,7 +228,7 @@ static int kvmppc_create_pte(struct kvm + else + new_pmd = pmd_alloc_one(kvm->mm, gpa); + +- if (level == 0 && !(pmd && pmd_present(*pmd))) ++ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) + new_ptep = kvmppc_pte_alloc(); + + /* Check if we might have been invalidated; let the guest retry if so */ +@@ -244,12 +253,30 @@ static int kvmppc_create_pte(struct kvm + new_pmd = NULL; + } + pmd = pmd_offset(pud, gpa); +- if (pmd_large(*pmd)) { +- /* Someone else has instantiated a large page here; retry */ +- ret = -EAGAIN; +- goto out_unlock; +- } +- if (level == 1 && !pmd_none(*pmd)) { ++ if (pmd_is_leaf(*pmd)) { ++ unsigned long lgpa = gpa & PMD_MASK; ++ ++ /* ++ * If we raced with another CPU which has just put ++ * a 2MB pte in after we saw a pte page, try again. ++ */ ++ if (level == 0 && !new_ptep) { ++ ret = -EAGAIN; ++ goto out_unlock; ++ } ++ /* Valid 2MB page here already, remove it */ ++ old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), ++ ~0UL, 0, lgpa, PMD_SHIFT); ++ kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); ++ if (old & _PAGE_DIRTY) { ++ unsigned long gfn = lgpa >> PAGE_SHIFT; ++ struct kvm_memory_slot *memslot; ++ memslot = gfn_to_memslot(kvm, gfn); ++ if (memslot) ++ mark_pages_dirty(kvm, memslot, gfn, ++ PMD_SHIFT - PAGE_SHIFT); ++ } ++ } else if (level == 1 && !pmd_none(*pmd)) { + /* + * There's a page table page here, but we wanted + * to install a large page. Tell the caller and let +@@ -412,28 +439,24 @@ int kvmppc_book3s_radix_page_fault(struc + } else { + page = pages[0]; + pfn = page_to_pfn(page); +- if (PageHuge(page)) { +- page = compound_head(page); +- pte_size <<= compound_order(page); ++ if (PageCompound(page)) { ++ pte_size <<= compound_order(compound_head(page)); + /* See if we can insert a 2MB large-page PTE here */ + if (pte_size >= PMD_SIZE && +- (gpa & PMD_MASK & PAGE_MASK) == +- (hva & PMD_MASK & PAGE_MASK)) { ++ (gpa & (PMD_SIZE - PAGE_SIZE)) == ++ (hva & (PMD_SIZE - PAGE_SIZE))) { + level = 1; + pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); + } + } + /* See if we can provide write access */ + if (writing) { +- /* +- * We assume gup_fast has set dirty on the host PTE. +- */ + pgflags |= _PAGE_WRITE; + } else { + local_irq_save(flags); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); +- if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) ++ if (ptep && pte_write(*ptep)) + pgflags |= _PAGE_WRITE; + local_irq_restore(flags); + } +@@ -459,18 +482,15 @@ int kvmppc_book3s_radix_page_fault(struc + pte = pfn_pte(pfn, __pgprot(pgflags)); + ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); + } +- if (ret == 0 || ret == -EAGAIN) +- ret = RESUME_GUEST; + + if (page) { +- /* +- * We drop pages[0] here, not page because page might +- * have been set to the head page of a compound, but +- * we have to drop the reference on the correct tail +- * page to match the get inside gup() +- */ +- put_page(pages[0]); ++ if (!ret && (pgflags & _PAGE_WRITE)) ++ set_page_dirty_lock(page); ++ put_page(page); + } ++ ++ if (ret == 0 || ret == -EAGAIN) ++ ret = RESUME_GUEST; + return ret; + } + +@@ -676,7 +696,7 @@ void kvmppc_free_radix(struct kvm *kvm) + continue; + pmd = pmd_offset(pud, 0); + for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { +- if (pmd_huge(*pmd)) { ++ if (pmd_is_leaf(*pmd)) { + pmd_clear(pmd); + continue; + } diff --git a/queue-4.14/series b/queue-4.14/series index a54286925ad..0fa13bdda86 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -58,3 +58,4 @@ perf-x86-cstate-fix-possible-spectre-v1-indexing-for-pkg_msr.patch perf-x86-msr-fix-possible-spectre-v1-indexing-in-the-msr-driver.patch perf-core-fix-possible-spectre-v1-indexing-for-aux_pages.patch perf-x86-fix-possible-spectre-v1-indexing-for-x86_pmu-event_map.patch +kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch