]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 May 2018 06:33:35 +0000 (08:33 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 May 2018 06:33:35 +0000 (08:33 +0200)
added patches:
kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch

queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch b/queue-4.14/kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch
new file mode 100644 (file)
index 0000000..7cded30
--- /dev/null
@@ -0,0 +1,180 @@
+From c3856aeb29402e94ad9b3879030165cc6a4fdc56 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 23 Feb 2018 21:21:12 +1100
+Subject: KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit c3856aeb29402e94ad9b3879030165cc6a4fdc56 upstream.
+
+This fixes several bugs in the radix page fault handler relating to
+the way large pages in the memory backing the guest were handled.
+First, the check for large pages only checked for explicit huge pages
+and missed transparent huge pages.  Then the check that the addresses
+(host virtual vs. guest physical) had appropriate alignment was
+wrong, meaning that the code never put a large page in the partition
+scoped radix tree; it was always demoted to a small page.
+
+Fixing this exposed bugs in kvmppc_create_pte().  We were never
+invalidating a 2MB PTE, which meant that if a page was initially
+faulted in without write permission and the guest then attempted
+to store to it, we would never update the PTE to have write permission.
+If we find a valid 2MB PTE in the PMD, we need to clear it and
+do a TLB invalidation before installing either the new 2MB PTE or
+a pointer to a page table page.
+
+This also corrects an assumption that get_user_pages_fast would set
+the _PAGE_DIRTY bit if we are writing, which is not true.  Instead we
+mark the page dirty explicitly with set_page_dirty_lock().  This
+also means we don't need the dirty bit set on the host PTE when
+providing write access on a read fault.
+
+[paulus@ozlabs.org - use mark_pages_dirty instead of
+ kvmppc_update_dirty_map]
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c |   72 +++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 26 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -19,6 +19,9 @@
+ #include <asm/pgalloc.h>
+ #include <asm/pte-walk.h>
++static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
++                           unsigned long gfn, unsigned int order);
++
+ /*
+  * Supported radix tree geometry.
+  * Like p9, we support either 5 or 9 bits at the first (lowest) level,
+@@ -195,6 +198,12 @@ static void kvmppc_pte_free(pte_t *ptep)
+       kmem_cache_free(kvm_pte_cache, ptep);
+ }
++/* Like pmd_huge() and pmd_large(), but works regardless of config options */
++static inline int pmd_is_leaf(pmd_t pmd)
++{
++      return !!(pmd_val(pmd) & _PAGE_PTE);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+                            unsigned int level, unsigned long mmu_seq)
+ {
+@@ -219,7 +228,7 @@ static int kvmppc_create_pte(struct kvm
+       else
+               new_pmd = pmd_alloc_one(kvm->mm, gpa);
+-      if (level == 0 && !(pmd && pmd_present(*pmd)))
++      if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+               new_ptep = kvmppc_pte_alloc();
+       /* Check if we might have been invalidated; let the guest retry if so */
+@@ -244,12 +253,30 @@ static int kvmppc_create_pte(struct kvm
+               new_pmd = NULL;
+       }
+       pmd = pmd_offset(pud, gpa);
+-      if (pmd_large(*pmd)) {
+-              /* Someone else has instantiated a large page here; retry */
+-              ret = -EAGAIN;
+-              goto out_unlock;
+-      }
+-      if (level == 1 && !pmd_none(*pmd)) {
++      if (pmd_is_leaf(*pmd)) {
++              unsigned long lgpa = gpa & PMD_MASK;
++
++              /*
++               * If we raced with another CPU which has just put
++               * a 2MB pte in after we saw a pte page, try again.
++               */
++              if (level == 0 && !new_ptep) {
++                      ret = -EAGAIN;
++                      goto out_unlock;
++              }
++              /* Valid 2MB page here already, remove it */
++              old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
++                                            ~0UL, 0, lgpa, PMD_SHIFT);
++              kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
++              if (old & _PAGE_DIRTY) {
++                      unsigned long gfn = lgpa >> PAGE_SHIFT;
++                      struct kvm_memory_slot *memslot;
++                      memslot = gfn_to_memslot(kvm, gfn);
++                      if (memslot)
++                              mark_pages_dirty(kvm, memslot, gfn,
++                                               PMD_SHIFT - PAGE_SHIFT);
++              }
++      } else if (level == 1 && !pmd_none(*pmd)) {
+               /*
+                * There's a page table page here, but we wanted
+                * to install a large page.  Tell the caller and let
+@@ -412,28 +439,24 @@ int kvmppc_book3s_radix_page_fault(struc
+       } else {
+               page = pages[0];
+               pfn = page_to_pfn(page);
+-              if (PageHuge(page)) {
+-                      page = compound_head(page);
+-                      pte_size <<= compound_order(page);
++              if (PageCompound(page)) {
++                      pte_size <<= compound_order(compound_head(page));
+                       /* See if we can insert a 2MB large-page PTE here */
+                       if (pte_size >= PMD_SIZE &&
+-                          (gpa & PMD_MASK & PAGE_MASK) ==
+-                          (hva & PMD_MASK & PAGE_MASK)) {
++                          (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++                          (hva & (PMD_SIZE - PAGE_SIZE))) {
+                               level = 1;
+                               pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+                       }
+               }
+               /* See if we can provide write access */
+               if (writing) {
+-                      /*
+-                       * We assume gup_fast has set dirty on the host PTE.
+-                       */
+                       pgflags |= _PAGE_WRITE;
+               } else {
+                       local_irq_save(flags);
+                       ptep = find_current_mm_pte(current->mm->pgd,
+                                                  hva, NULL, NULL);
+-                      if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
++                      if (ptep && pte_write(*ptep))
+                               pgflags |= _PAGE_WRITE;
+                       local_irq_restore(flags);
+               }
+@@ -459,18 +482,15 @@ int kvmppc_book3s_radix_page_fault(struc
+               pte = pfn_pte(pfn, __pgprot(pgflags));
+               ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+       }
+-      if (ret == 0 || ret == -EAGAIN)
+-              ret = RESUME_GUEST;
+       if (page) {
+-              /*
+-               * We drop pages[0] here, not page because page might
+-               * have been set to the head page of a compound, but
+-               * we have to drop the reference on the correct tail
+-               * page to match the get inside gup()
+-               */
+-              put_page(pages[0]);
++              if (!ret && (pgflags & _PAGE_WRITE))
++                      set_page_dirty_lock(page);
++              put_page(page);
+       }
++
++      if (ret == 0 || ret == -EAGAIN)
++              ret = RESUME_GUEST;
+       return ret;
+ }
+@@ -676,7 +696,7 @@ void kvmppc_free_radix(struct kvm *kvm)
+                               continue;
+                       pmd = pmd_offset(pud, 0);
+                       for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+-                              if (pmd_huge(*pmd)) {
++                              if (pmd_is_leaf(*pmd)) {
+                                       pmd_clear(pmd);
+                                       continue;
+                               }
index a54286925ad1c08702ba0c304f010f81d4f10a0e..0fa13bdda86bf0b133130e3587221688fc6d1c37 100644 (file)
@@ -58,3 +58,4 @@ perf-x86-cstate-fix-possible-spectre-v1-indexing-for-pkg_msr.patch
 perf-x86-msr-fix-possible-spectre-v1-indexing-in-the-msr-driver.patch
 perf-core-fix-possible-spectre-v1-indexing-for-aux_pages.patch
 perf-x86-fix-possible-spectre-v1-indexing-for-x86_pmu-event_map.patch
+kvm-ppc-book3s-hv-fix-handling-of-large-pages-in-radix-page-fault-handler.patch