]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.9.49/s390-mm-avoid-empty-zero-pages-for-kvm-guests-to-avoid-postcopy-hangs.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.9.49 / s390-mm-avoid-empty-zero-pages-for-kvm-guests-to-avoid-postcopy-hangs.patch
CommitLineData
a1e97e95
GKH
1From fa41ba0d08de7c975c3e94d0067553f9b934221f Mon Sep 17 00:00:00 2001
2From: Christian Borntraeger <borntraeger@de.ibm.com>
3Date: Thu, 24 Aug 2017 12:55:08 +0200
4Subject: s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs
5
6From: Christian Borntraeger <borntraeger@de.ibm.com>
7
8commit fa41ba0d08de7c975c3e94d0067553f9b934221f upstream.
9
10Right now there is a potential hang situation for postcopy migrations,
11if the guest is enabling storage keys on the target system during the
12postcopy process.
13
14For storage key virtualization, we have to forbid the empty zero page as
15the storage key is a property of the physical page frame. As we enable
16storage key handling lazily we then drop all mappings for empty zero
17pages for lazy refaulting later on.
18
19This does not work with the postcopy migration, which relies on the
20empty zero page never triggering a fault again in the future. The reason
21is that postcopy migration will simply read a page on the target system
22if that page is a known zero page to fault in an empty zero page. At
23the same time postcopy remembers that this page was already transferred
24- so any future userfault on that page will NOT be retransmitted again
25to avoid races.
26
27If now the guest enters the storage key mode while in postcopy, we will
28break this assumption of postcopy.
29
30The solution is to disable the empty zero page for KVM guests early on
31and not during storage key enablement. With this change, the postcopy
32migration process is guaranteed to start after no zero pages are left.
33
34As guest pages are very likely not empty zero pages anyway the memory
35overhead is also pretty small.
36
37While at it this also adds proper page table locking to the zero page
38removal.
39
40Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
41Acked-by: Janosch Frank <frankja@linux.vnet.ibm.com>
42Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
43Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
44
45---
46 arch/s390/include/asm/pgtable.h | 2 +-
47 arch/s390/mm/gmap.c | 39 ++++++++++++++++++++++++++++++++-------
48 2 files changed, 33 insertions(+), 8 deletions(-)
49
50--- a/arch/s390/include/asm/pgtable.h
51+++ b/arch/s390/include/asm/pgtable.h
52@@ -480,7 +480,7 @@ static inline int mm_alloc_pgste(struct
53 * In the case that a guest uses storage keys
54 * faults should no longer be backed by zero pages
55 */
56-#define mm_forbids_zeropage mm_use_skey
57+#define mm_forbids_zeropage mm_has_pgste
58 static inline int mm_use_skey(struct mm_struct *mm)
59 {
60 #ifdef CONFIG_PGSTE
61--- a/arch/s390/mm/gmap.c
62+++ b/arch/s390/mm/gmap.c
63@@ -2125,6 +2125,37 @@ static inline void thp_split_mm(struct m
64 }
65
66 /*
67+ * Remove all empty zero pages from the mapping for lazy refaulting
68+ * - This must be called after mm->context.has_pgste is set, to avoid
69+ * future creation of zero pages
70+ * - This must be called after THP was enabled
71+ */
72+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
73+ unsigned long end, struct mm_walk *walk)
74+{
75+ unsigned long addr;
76+
77+ for (addr = start; addr != end; addr += PAGE_SIZE) {
78+ pte_t *ptep;
79+ spinlock_t *ptl;
80+
81+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
82+ if (is_zero_pfn(pte_pfn(*ptep)))
83+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
84+ pte_unmap_unlock(ptep, ptl);
85+ }
86+ return 0;
87+}
88+
89+static inline void zap_zero_pages(struct mm_struct *mm)
90+{
91+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
92+
93+ walk.mm = mm;
94+ walk_page_range(0, TASK_SIZE, &walk);
95+}
96+
97+/*
98 * switch on pgstes for its userspace process (for kvm)
99 */
100 int s390_enable_sie(void)
101@@ -2141,6 +2172,7 @@ int s390_enable_sie(void)
102 mm->context.has_pgste = 1;
103 /* split thp mappings and disable thp for future mappings */
104 thp_split_mm(mm);
105+ zap_zero_pages(mm);
106 up_write(&mm->mmap_sem);
107 return 0;
108 }
109@@ -2153,13 +2185,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
110 static int __s390_enable_skey(pte_t *pte, unsigned long addr,
111 unsigned long next, struct mm_walk *walk)
112 {
113- /*
114- * Remove all zero page mappings,
115- * after establishing a policy to forbid zero page mappings
116- * following faults for that page will get fresh anonymous pages
117- */
118- if (is_zero_pfn(pte_pfn(*pte)))
119- ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
120 /* Clear storage key */
121 ptep_zap_key(walk->mm, addr, pte);
122 return 0;