]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - arch/x86/hyperv/mmu.c
tracing/hyper-v: Trace hyperv_mmu_flush_tlb_others()
[thirdparty/kernel/stable.git] / arch / x86 / hyperv / mmu.c
CommitLineData
2ffd9e33
VK
1#define pr_fmt(fmt) "Hyper-V: " fmt
2
3#include <linux/hyperv.h>
4#include <linux/log2.h>
5#include <linux/slab.h>
6#include <linux/types.h>
7
8#include <asm/fpu/api.h>
9#include <asm/mshyperv.h>
10#include <asm/msr.h>
11#include <asm/tlbflush.h>
12
773b79f7
VK
13#define CREATE_TRACE_POINTS
14#include <asm/trace/hyperv.h>
15
2ffd9e33
VK
16/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17struct hv_flush_pcpu {
18 u64 address_space;
19 u64 flags;
20 u64 processor_mask;
21 u64 gva_list[];
22};
23
628f54cc
VK
24/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25struct hv_flush_pcpu_ex {
26 u64 address_space;
27 u64 flags;
28 struct {
29 u64 format;
30 u64 valid_bank_mask;
31 u64 bank_contents[];
32 } hv_vp_set;
33 u64 gva_list[];
34};
35
2ffd9e33
VK
36/* Each gva in gva_list encodes up to 4096 pages to flush */
37#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
38
39static struct hv_flush_pcpu __percpu *pcpu_flush;
40
628f54cc
VK
41static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
42
2ffd9e33
VK
43/*
44 * Fills in gva_list starting from offset. Returns the number of items added.
45 */
46static inline int fill_gva_list(u64 gva_list[], int offset,
47 unsigned long start, unsigned long end)
48{
49 int gva_n = offset;
50 unsigned long cur = start, diff;
51
52 do {
53 diff = end > cur ? end - cur : 0;
54
55 gva_list[gva_n] = cur & PAGE_MASK;
56 /*
57 * Lower 12 bits encode the number of additional
58 * pages to flush (in addition to the 'cur' page).
59 */
60 if (diff >= HV_TLB_FLUSH_UNIT)
61 gva_list[gva_n] |= ~PAGE_MASK;
62 else if (diff)
63 gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
64
65 cur += HV_TLB_FLUSH_UNIT;
66 gva_n++;
67
68 } while (cur < end);
69
70 return gva_n - offset;
71}
72
628f54cc
VK
73/* Return the number of banks in the resulting vp_set */
74static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
75 const struct cpumask *cpus)
76{
77 int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
78
79 /*
80 * Some banks may end up being empty but this is acceptable.
81 */
82 for_each_cpu(cpu, cpus) {
83 vcpu = hv_cpu_number_to_vp_number(cpu);
84 vcpu_bank = vcpu / 64;
85 vcpu_offset = vcpu % 64;
86
87 /* valid_bank_mask can represent up to 64 banks */
88 if (vcpu_bank >= 64)
89 return 0;
90
91 __set_bit(vcpu_offset, (unsigned long *)
92 &flush->hv_vp_set.bank_contents[vcpu_bank]);
93 if (vcpu_bank >= nr_bank)
94 nr_bank = vcpu_bank + 1;
95 }
96 flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
97
98 return nr_bank;
99}
100
2ffd9e33
VK
101static void hyperv_flush_tlb_others(const struct cpumask *cpus,
102 const struct flush_tlb_info *info)
103{
104 int cpu, vcpu, gva_n, max_gvas;
105 struct hv_flush_pcpu *flush;
106 u64 status = U64_MAX;
107 unsigned long flags;
108
773b79f7
VK
109 trace_hyperv_mmu_flush_tlb_others(cpus, info);
110
2ffd9e33
VK
111 if (!pcpu_flush || !hv_hypercall_pg)
112 goto do_native;
113
114 if (cpumask_empty(cpus))
115 return;
116
117 local_irq_save(flags);
118
119 flush = this_cpu_ptr(pcpu_flush);
120
121 if (info->mm) {
122 flush->address_space = virt_to_phys(info->mm->pgd);
123 flush->flags = 0;
124 } else {
125 flush->address_space = 0;
126 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
127 }
128
129 flush->processor_mask = 0;
130 if (cpumask_equal(cpus, cpu_present_mask)) {
131 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
132 } else {
133 for_each_cpu(cpu, cpus) {
134 vcpu = hv_cpu_number_to_vp_number(cpu);
135 if (vcpu >= 64)
136 goto do_native;
137
138 __set_bit(vcpu, (unsigned long *)
139 &flush->processor_mask);
140 }
141 }
142
143 /*
144 * We can flush not more than max_gvas with one hypercall. Flush the
145 * whole address space if we were asked to do more.
146 */
147 max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
148
149 if (info->end == TLB_FLUSH_ALL) {
150 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
151 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
152 flush, NULL);
153 } else if (info->end &&
154 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
155 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
156 flush, NULL);
157 } else {
158 gva_n = fill_gva_list(flush->gva_list, 0,
159 info->start, info->end);
160 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
161 gva_n, 0, flush, NULL);
162 }
163
164 local_irq_restore(flags);
165
166 if (!(status & HV_HYPERCALL_RESULT_MASK))
167 return;
168do_native:
169 native_flush_tlb_others(cpus, info);
170}
171
628f54cc
VK
172static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
173 const struct flush_tlb_info *info)
174{
175 int nr_bank = 0, max_gvas, gva_n;
176 struct hv_flush_pcpu_ex *flush;
177 u64 status = U64_MAX;
178 unsigned long flags;
179
773b79f7
VK
180 trace_hyperv_mmu_flush_tlb_others(cpus, info);
181
628f54cc
VK
182 if (!pcpu_flush_ex || !hv_hypercall_pg)
183 goto do_native;
184
185 if (cpumask_empty(cpus))
186 return;
187
188 local_irq_save(flags);
189
190 flush = this_cpu_ptr(pcpu_flush_ex);
191
192 if (info->mm) {
193 flush->address_space = virt_to_phys(info->mm->pgd);
194 flush->flags = 0;
195 } else {
196 flush->address_space = 0;
197 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
198 }
199
200 flush->hv_vp_set.valid_bank_mask = 0;
201
202 if (!cpumask_equal(cpus, cpu_present_mask)) {
203 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
204 nr_bank = cpumask_to_vp_set(flush, cpus);
205 }
206
207 if (!nr_bank) {
208 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
209 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
210 }
211
212 /*
213 * We can flush not more than max_gvas with one hypercall. Flush the
214 * whole address space if we were asked to do more.
215 */
216 max_gvas =
217 (PAGE_SIZE - sizeof(*flush) - nr_bank *
218 sizeof(flush->hv_vp_set.bank_contents[0])) /
219 sizeof(flush->gva_list[0]);
220
221 if (info->end == TLB_FLUSH_ALL) {
222 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
223 status = hv_do_rep_hypercall(
224 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
225 0, nr_bank + 2, flush, NULL);
226 } else if (info->end &&
227 ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
228 status = hv_do_rep_hypercall(
229 HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
230 0, nr_bank + 2, flush, NULL);
231 } else {
232 gva_n = fill_gva_list(flush->gva_list, nr_bank,
233 info->start, info->end);
234 status = hv_do_rep_hypercall(
235 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
236 gva_n, nr_bank + 2, flush, NULL);
237 }
238
239 local_irq_restore(flags);
240
241 if (!(status & HV_HYPERCALL_RESULT_MASK))
242 return;
243do_native:
244 native_flush_tlb_others(cpus, info);
245}
246
2ffd9e33
VK
247void hyperv_setup_mmu_ops(void)
248{
628f54cc
VK
249 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
250 return;
251
252 setup_clear_cpu_cap(X86_FEATURE_PCID);
253
254 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
2ffd9e33
VK
255 pr_info("Using hypercall for remote TLB flush\n");
256 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
628f54cc
VK
257 } else {
258 pr_info("Using ext hypercall for remote TLB flush\n");
259 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
2ffd9e33
VK
260 }
261}
262
263void hyper_alloc_mmu(void)
264{
628f54cc
VK
265 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
266 return;
267
268 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
2ffd9e33 269 pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
628f54cc
VK
270 else
271 pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
2ffd9e33 272}