]>
Commit | Line | Data |
---|---|---|
43aa3132 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
8730046c S |
2 | /* |
3 | * X86 specific Hyper-V initialization code. | |
4 | * | |
5 | * Copyright (C) 2016, Microsoft, Inc. | |
6 | * | |
7 | * Author : K. Y. Srinivasan <kys@microsoft.com> | |
8730046c S |
8 | */ |
9 | ||
203a521b SS |
10 | #define pr_fmt(fmt) "Hyper-V: " fmt |
11 | ||
2f285f46 | 12 | #include <linux/efi.h> |
8730046c | 13 | #include <linux/types.h> |
a6c76bb0 | 14 | #include <linux/bitfield.h> |
6dc77fa5 | 15 | #include <linux/io.h> |
93286261 VK |
16 | #include <asm/apic.h> |
17 | #include <asm/desc.h> | |
49d6a3c0 | 18 | #include <asm/sev.h> |
d5ace2a7 | 19 | #include <asm/ibt.h> |
8730046c | 20 | #include <asm/hypervisor.h> |
5a485803 | 21 | #include <asm/hyperv-tlfs.h> |
8730046c | 22 | #include <asm/mshyperv.h> |
a16be368 | 23 | #include <asm/idtentry.h> |
b1310355 | 24 | #include <asm/set_memory.h> |
dfe94d40 | 25 | #include <linux/kexec.h> |
8730046c S |
26 | #include <linux/version.h> |
27 | #include <linux/vmalloc.h> | |
28 | #include <linux/mm.h> | |
67071816 | 29 | #include <linux/hyperv.h> |
7415aea6 | 30 | #include <linux/slab.h> |
f3a99e76 | 31 | #include <linux/kernel.h> |
7415aea6 | 32 | #include <linux/cpuhotplug.h> |
05bd330a | 33 | #include <linux/syscore_ops.h> |
dd2cb348 | 34 | #include <clocksource/hyperv_timer.h> |
80f73c9f | 35 | #include <linux/highmem.h> |
8730046c | 36 | |
dfe94d40 | 37 | int hyperv_init_cpuhp; |
99a0f46a WL |
38 | u64 hv_current_partition_id = ~0ull; |
39 | EXPORT_SYMBOL_GPL(hv_current_partition_id); | |
dfe94d40 | 40 | |
fc53662f VK |
41 | void *hv_hypercall_pg; |
42 | EXPORT_SYMBOL_GPL(hv_hypercall_pg); | |
dee863b5 | 43 | |
e1878402 | 44 | union hv_ghcb * __percpu *hv_ghcb_pg; |
0cc4f6d9 | 45 | |
05bd330a DC |
46 | /* Storage to save the hypercall page temporarily for hibernation */ |
47 | static void *hv_hypercall_pg_saved; | |
48 | ||
a46d15cc VK |
49 | struct hv_vp_assist_page **hv_vp_assist_page; |
50 | EXPORT_SYMBOL_GPL(hv_vp_assist_page); | |
51 | ||
0cc4f6d9 TL |
52 | static int hyperv_init_ghcb(void) |
53 | { | |
54 | u64 ghcb_gpa; | |
55 | void *ghcb_va; | |
56 | void **ghcb_base; | |
57 | ||
e3131f1c | 58 | if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) |
0cc4f6d9 TL |
59 | return 0; |
60 | ||
61 | if (!hv_ghcb_pg) | |
62 | return -EINVAL; | |
63 | ||
64 | /* | |
65 | * GHCB page is allocated by paravisor. The address | |
66 | * returned by MSR_AMD64_SEV_ES_GHCB is above shared | |
67 | * memory boundary and map it here. | |
68 | */ | |
69 | rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); | |
6afd9dc1 MK |
70 | |
71 | /* Mask out vTOM bit. ioremap_cache() maps decrypted */ | |
72 | ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary; | |
73 | ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE); | |
0cc4f6d9 TL |
74 | if (!ghcb_va) |
75 | return -ENOMEM; | |
76 | ||
77 | ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); | |
78 | *ghcb_base = ghcb_va; | |
79 | ||
80 | return 0; | |
81 | } | |
82 | ||
7415aea6 VK |
83 | static int hv_cpu_init(unsigned int cpu) |
84 | { | |
e5d9b714 | 85 | union hv_vp_assist_msr_contents msr = { 0 }; |
68f2f2bc | 86 | struct hv_vp_assist_page **hvp; |
afca4d95 | 87 | int ret; |
7415aea6 | 88 | |
afca4d95 MK |
89 | ret = hv_common_cpu_init(cpu); |
90 | if (ret) | |
91 | return ret; | |
a3b74243 | 92 | |
a46d15cc VK |
93 | if (!hv_vp_assist_page) |
94 | return 0; | |
95 | ||
68f2f2bc | 96 | hvp = &hv_vp_assist_page[cpu]; |
ee681541 VK |
97 | if (hv_root_partition) { |
98 | /* | |
99 | * For root partition we get the hypervisor provided VP assist | |
100 | * page, instead of allocating a new page. | |
101 | */ | |
102 | rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); | |
103 | *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, | |
104 | PAGE_SIZE, MEMREMAP_WB); | |
105 | } else { | |
106 | /* | |
107 | * The VP assist page is an "overlay" page (see Hyper-V TLFS's | |
108 | * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed | |
109 | * out to make sure we always write the EOI MSR in | |
110 | * hv_apic_eoi_write() *after* the EOI optimization is disabled | |
111 | * in hv_cpu_die(), otherwise a CPU may not be stopped in the | |
112 | * case of CPU offlining and the VM will hang. | |
113 | */ | |
b1310355 | 114 | if (!*hvp) { |
e5d9b714 | 115 | *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); |
b1310355 TL |
116 | |
117 | /* | |
118 | * Hyper-V should never specify a VM that is a Confidential | |
119 | * VM and also running in the root partition. Root partition | |
120 | * is blocked to run in Confidential VM. So only decrypt assist | |
121 | * page in non-root partition here. | |
122 | */ | |
e3131f1c | 123 | if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) { |
b1310355 TL |
124 | WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1)); |
125 | memset(*hvp, 0, PAGE_SIZE); | |
126 | } | |
127 | } | |
128 | ||
ee681541 VK |
129 | if (*hvp) |
130 | msr.pfn = vmalloc_to_pfn(*hvp); | |
131 | ||
132 | } | |
133 | if (!WARN_ON(!(*hvp))) { | |
134 | msr.enable = 1; | |
135 | wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); | |
a46d15cc VK |
136 | } |
137 | ||
0cc4f6d9 | 138 | return hyperv_init_ghcb(); |
7415aea6 VK |
139 | } |
140 | ||
93286261 VK |
141 | static void (*hv_reenlightenment_cb)(void); |
142 | ||
143 | static void hv_reenlightenment_notify(struct work_struct *dummy) | |
144 | { | |
145 | struct hv_tsc_emulation_status emu_status; | |
146 | ||
147 | rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); | |
148 | ||
149 | /* Don't issue the callback if TSC accesses are not emulated */ | |
150 | if (hv_reenlightenment_cb && emu_status.inprogress) | |
151 | hv_reenlightenment_cb(); | |
152 | } | |
153 | static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); | |
154 | ||
155 | void hyperv_stop_tsc_emulation(void) | |
156 | { | |
157 | u64 freq; | |
158 | struct hv_tsc_emulation_status emu_status; | |
159 | ||
160 | rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); | |
161 | emu_status.inprogress = 0; | |
162 | wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); | |
163 | ||
164 | rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); | |
165 | tsc_khz = div64_u64(freq, 1000); | |
166 | } | |
167 | EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); | |
168 | ||
169 | static inline bool hv_reenlightenment_available(void) | |
170 | { | |
171 | /* | |
d9f6e12f | 172 | * Check for required features and privileges to make TSC frequency |
93286261 VK |
173 | * change notifications work. |
174 | */ | |
dfc53baa | 175 | return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && |
93286261 | 176 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && |
dfc53baa | 177 | ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT; |
93286261 VK |
178 | } |
179 | ||
a16be368 | 180 | DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment) |
93286261 | 181 | { |
670c04ad | 182 | apic_eoi(); |
51d4e5da | 183 | inc_irq_stat(irq_hv_reenlightenment_count); |
93286261 | 184 | schedule_delayed_work(&hv_reenlightenment_work, HZ/10); |
93286261 VK |
185 | } |
186 | ||
187 | void set_hv_tscchange_cb(void (*cb)(void)) | |
188 | { | |
189 | struct hv_reenlightenment_control re_ctrl = { | |
190 | .vector = HYPERV_REENLIGHTENMENT_VECTOR, | |
191 | .enabled = 1, | |
93286261 VK |
192 | }; |
193 | struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; | |
194 | ||
195 | if (!hv_reenlightenment_available()) { | |
203a521b | 196 | pr_warn("reenlightenment support is unavailable\n"); |
93286261 VK |
197 | return; |
198 | } | |
199 | ||
daf97211 SC |
200 | if (!hv_vp_index) |
201 | return; | |
202 | ||
93286261 VK |
203 | hv_reenlightenment_cb = cb; |
204 | ||
205 | /* Make sure callback is registered before we write to MSRs */ | |
206 | wmb(); | |
207 | ||
285f68af VK |
208 | re_ctrl.target_vp = hv_vp_index[get_cpu()]; |
209 | ||
93286261 VK |
210 | wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); |
211 | wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); | |
285f68af VK |
212 | |
213 | put_cpu(); | |
93286261 VK |
214 | } |
215 | EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); | |
216 | ||
217 | void clear_hv_tscchange_cb(void) | |
218 | { | |
219 | struct hv_reenlightenment_control re_ctrl; | |
220 | ||
221 | if (!hv_reenlightenment_available()) | |
222 | return; | |
223 | ||
224 | rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); | |
225 | re_ctrl.enabled = 0; | |
226 | wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); | |
227 | ||
228 | hv_reenlightenment_cb = NULL; | |
229 | } | |
230 | EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); | |
231 | ||
e7c4e36c VK |
232 | static int hv_cpu_die(unsigned int cpu) |
233 | { | |
234 | struct hv_reenlightenment_control re_ctrl; | |
235 | unsigned int new_cpu; | |
0cc4f6d9 TL |
236 | void **ghcb_va; |
237 | ||
238 | if (hv_ghcb_pg) { | |
239 | ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg); | |
240 | if (*ghcb_va) | |
6afd9dc1 | 241 | iounmap(*ghcb_va); |
0cc4f6d9 TL |
242 | *ghcb_va = NULL; |
243 | } | |
68bb7bfb | 244 | |
afca4d95 | 245 | hv_common_cpu_die(cpu); |
e7c4e36c | 246 | |
e5d9b714 PK |
247 | if (hv_vp_assist_page && hv_vp_assist_page[cpu]) { |
248 | union hv_vp_assist_msr_contents msr = { 0 }; | |
249 | if (hv_root_partition) { | |
250 | /* | |
251 | * For root partition the VP assist page is mapped to | |
252 | * hypervisor provided page, and thus we unmap the | |
253 | * page here and nullify it, so that in future we have | |
254 | * correct page address mapped in hv_cpu_init. | |
255 | */ | |
256 | memunmap(hv_vp_assist_page[cpu]); | |
257 | hv_vp_assist_page[cpu] = NULL; | |
258 | rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); | |
259 | msr.enable = 0; | |
260 | } | |
261 | wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); | |
262 | } | |
a46d15cc | 263 | |
e7c4e36c VK |
264 | if (hv_reenlightenment_cb == NULL) |
265 | return 0; | |
266 | ||
267 | rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); | |
268 | if (re_ctrl.target_vp == hv_vp_index[cpu]) { | |
38dce419 VK |
269 | /* |
270 | * Reassign reenlightenment notifications to some other online | |
271 | * CPU or just disable the feature if there are no online CPUs | |
272 | * left (happens on hibernation). | |
273 | */ | |
e7c4e36c VK |
274 | new_cpu = cpumask_any_but(cpu_online_mask, cpu); |
275 | ||
38dce419 VK |
276 | if (new_cpu < nr_cpu_ids) |
277 | re_ctrl.target_vp = hv_vp_index[new_cpu]; | |
278 | else | |
279 | re_ctrl.enabled = 0; | |
280 | ||
e7c4e36c VK |
281 | wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); |
282 | } | |
283 | ||
284 | return 0; | |
285 | } | |
286 | ||
2f285f46 DC |
287 | static int __init hv_pci_init(void) |
288 | { | |
289 | int gen2vm = efi_enabled(EFI_BOOT); | |
290 | ||
291 | /* | |
292 | * For Generation-2 VM, we exit from pci_arch_init() by returning 0. | |
293 | * The purpose is to suppress the harmless warning: | |
294 | * "PCI: Fatal: No config space access function found" | |
295 | */ | |
296 | if (gen2vm) | |
297 | return 0; | |
298 | ||
299 | /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ | |
300 | return 1; | |
301 | } | |
302 | ||
05bd330a DC |
303 | static int hv_suspend(void) |
304 | { | |
305 | union hv_x64_msr_hypercall_contents hypercall_msr; | |
421f090c | 306 | int ret; |
05bd330a | 307 | |
80f73c9f WL |
308 | if (hv_root_partition) |
309 | return -EPERM; | |
310 | ||
05bd330a DC |
311 | /* |
312 | * Reset the hypercall page as it is going to be invalidated | |
d9f6e12f | 313 | * across hibernation. Setting hv_hypercall_pg to NULL ensures |
05bd330a DC |
314 | * that any subsequent hypercall operation fails safely instead of |
315 | * crashing due to an access of an invalid page. The hypercall page | |
316 | * pointer is restored on resume. | |
317 | */ | |
318 | hv_hypercall_pg_saved = hv_hypercall_pg; | |
319 | hv_hypercall_pg = NULL; | |
320 | ||
321 | /* Disable the hypercall page in the hypervisor */ | |
322 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
323 | hypercall_msr.enable = 0; | |
324 | wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
325 | ||
421f090c DC |
326 | ret = hv_cpu_die(0); |
327 | return ret; | |
05bd330a DC |
328 | } |
329 | ||
330 | static void hv_resume(void) | |
331 | { | |
332 | union hv_x64_msr_hypercall_contents hypercall_msr; | |
421f090c DC |
333 | int ret; |
334 | ||
335 | ret = hv_cpu_init(0); | |
336 | WARN_ON(ret); | |
05bd330a DC |
337 | |
338 | /* Re-enable the hypercall page */ | |
339 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
340 | hypercall_msr.enable = 1; | |
341 | hypercall_msr.guest_physical_address = | |
342 | vmalloc_to_pfn(hv_hypercall_pg_saved); | |
343 | wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
344 | ||
345 | hv_hypercall_pg = hv_hypercall_pg_saved; | |
346 | hv_hypercall_pg_saved = NULL; | |
38dce419 VK |
347 | |
348 | /* | |
349 | * Reenlightenment notifications are disabled by hv_cpu_die(0), | |
350 | * reenable them here if hv_reenlightenment_cb was previously set. | |
351 | */ | |
352 | if (hv_reenlightenment_cb) | |
353 | set_hv_tscchange_cb(hv_reenlightenment_cb); | |
05bd330a DC |
354 | } |
355 | ||
421f090c | 356 | /* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */ |
05bd330a DC |
357 | static struct syscore_ops hv_syscore_ops = { |
358 | .suspend = hv_suspend, | |
359 | .resume = hv_resume, | |
360 | }; | |
361 | ||
fff7b5e6 DC |
362 | static void (* __initdata old_setup_percpu_clockev)(void); |
363 | ||
364 | static void __init hv_stimer_setup_percpu_clockev(void) | |
365 | { | |
366 | /* | |
367 | * Ignore any errors in setting up stimer clockevents | |
368 | * as we can run with the LAPIC timer as a fallback. | |
369 | */ | |
ec866be6 | 370 | (void)hv_stimer_alloc(false); |
fff7b5e6 DC |
371 | |
372 | /* | |
373 | * Still register the LAPIC timer, because the direct-mode STIMER is | |
374 | * not supported by old versions of Hyper-V. This also allows users | |
375 | * to switch to LAPIC timer via /sys, if they want to. | |
376 | */ | |
377 | if (old_setup_percpu_clockev) | |
378 | old_setup_percpu_clockev(); | |
379 | } | |
380 | ||
99a0f46a WL |
381 | static void __init hv_get_partition_id(void) |
382 | { | |
383 | struct hv_get_partition_id *output_page; | |
384 | u64 status; | |
385 | unsigned long flags; | |
386 | ||
387 | local_irq_save(flags); | |
388 | output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); | |
389 | status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); | |
753ed9c9 | 390 | if (!hv_result_success(status)) { |
99a0f46a WL |
391 | /* No point in proceeding if this failed */ |
392 | pr_err("Failed to get partition ID: %lld\n", status); | |
393 | BUG(); | |
394 | } | |
395 | hv_current_partition_id = output_page->partition_id; | |
396 | local_irq_restore(flags); | |
397 | } | |
398 | ||
f2a55d08 | 399 | #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) |
8387ce06 TL |
400 | static u8 __init get_vtl(void) |
401 | { | |
402 | u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; | |
403 | struct hv_get_vp_registers_input *input; | |
404 | struct hv_get_vp_registers_output *output; | |
405 | unsigned long flags; | |
406 | u64 ret; | |
407 | ||
408 | local_irq_save(flags); | |
409 | input = *this_cpu_ptr(hyperv_pcpu_input_arg); | |
410 | output = (struct hv_get_vp_registers_output *)input; | |
411 | ||
412 | memset(input, 0, struct_size(input, element, 1)); | |
413 | input->header.partitionid = HV_PARTITION_ID_SELF; | |
414 | input->header.vpindex = HV_VP_INDEX_SELF; | |
415 | input->header.inputvtl = 0; | |
416 | input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; | |
417 | ||
418 | ret = hv_do_hypercall(control, input, output); | |
419 | if (hv_result_success(ret)) { | |
420 | ret = output->as64.low & HV_X64_VTL_MASK; | |
421 | } else { | |
f2a55d08 SS |
422 | pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); |
423 | BUG(); | |
8387ce06 TL |
424 | } |
425 | ||
426 | local_irq_restore(flags); | |
427 | return ret; | |
428 | } | |
f2a55d08 SS |
429 | #else |
430 | static inline u8 get_vtl(void) { return 0; } | |
431 | #endif | |
8387ce06 | 432 | |
8730046c S |
433 | /* |
434 | * This function is to be invoked early in the boot sequence after the | |
435 | * hypervisor has been detected. | |
436 | * | |
437 | * 1. Setup the hypercall page. | |
63ed4e0c | 438 | * 2. Register Hyper-V specific clocksource. |
6b48cb5f | 439 | * 3. Setup Hyper-V specific APIC entry points. |
8730046c | 440 | */ |
6b48cb5f | 441 | void __init hyperv_init(void) |
8730046c | 442 | { |
f3e613e7 | 443 | u64 guest_id; |
8730046c | 444 | union hv_x64_msr_hypercall_contents hypercall_msr; |
afca4d95 | 445 | int cpuhp; |
8730046c | 446 | |
03b2a320 | 447 | if (x86_hyper_type != X86_HYPER_MS_HYPERV) |
8730046c S |
448 | return; |
449 | ||
afca4d95 | 450 | if (hv_common_init()) |
7415aea6 VK |
451 | return; |
452 | ||
68f2f2bc DC |
453 | /* |
454 | * The VP assist page is useless to a TDX guest: the only use we | |
455 | * would have for it is lazy EOI, which can not be used with TDX. | |
456 | */ | |
457 | if (hv_isolation_type_tdx()) | |
458 | hv_vp_assist_page = NULL; | |
459 | else | |
460 | hv_vp_assist_page = kcalloc(num_possible_cpus(), | |
461 | sizeof(*hv_vp_assist_page), | |
462 | GFP_KERNEL); | |
a46d15cc VK |
463 | if (!hv_vp_assist_page) { |
464 | ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; | |
68f2f2bc DC |
465 | |
466 | if (!hv_isolation_type_tdx()) | |
467 | goto common_free; | |
a46d15cc VK |
468 | } |
469 | ||
e3131f1c | 470 | if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { |
49d6a3c0 TL |
471 | /* Negotiate GHCB Version. */ |
472 | if (!hv_ghcb_negotiate_protocol()) | |
473 | hv_ghcb_terminate(SEV_TERM_SET_GEN, | |
474 | GHCB_SEV_ES_PROT_UNSUPPORTED); | |
475 | ||
faff4406 | 476 | hv_ghcb_pg = alloc_percpu(union hv_ghcb *); |
0cc4f6d9 TL |
477 | if (!hv_ghcb_pg) |
478 | goto free_vp_assist_page; | |
479 | } | |
480 | ||
9636be85 | 481 | cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", |
a46d15cc VK |
482 | hv_cpu_init, hv_cpu_die); |
483 | if (cpuhp < 0) | |
0cc4f6d9 | 484 | goto free_ghcb_page; |
7415aea6 | 485 | |
8730046c S |
486 | /* |
487 | * Setup the hypercall page and enable hypercalls. | |
488 | * 1. Register the guest ID | |
489 | * 2. Enable the hypercall and register the hypercall page | |
23378295 DC |
490 | * |
491 | * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: | |
492 | * when the hypercall input is a page, such a VM must pass a decrypted | |
493 | * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page | |
494 | * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. | |
495 | * | |
496 | * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, | |
497 | * which are handled by the paravisor and the VM must use an encrypted | |
498 | * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and | |
499 | * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and | |
500 | * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: | |
501 | * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). | |
502 | * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. | |
503 | * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; | |
504 | * instead, hv_post_message() uses the post_msg_page, which is decrypted | |
505 | * in such a VM and is only used in such a VM. | |
8730046c | 506 | */ |
d5ebde1e | 507 | guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); |
8730046c S |
508 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); |
509 | ||
b9b4fe3a DC |
510 | /* With the paravisor, the VM must also write the ID via GHCB/GHCI */ |
511 | hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); | |
faff4406 | 512 | |
23378295 DC |
513 | /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ |
514 | if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) | |
d6e0228d | 515 | goto skip_hypercall_pg_init; |
faff4406 | 516 | |
800e26b8 CH |
517 | hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, |
518 | VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, | |
a3a66c38 CH |
519 | VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, |
520 | __builtin_return_address(0)); | |
0cc4f6d9 TL |
521 | if (hv_hypercall_pg == NULL) |
522 | goto clean_guest_os_id; | |
8730046c S |
523 | |
524 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
525 | hypercall_msr.enable = 1; | |
80f73c9f WL |
526 | |
527 | if (hv_root_partition) { | |
528 | struct page *pg; | |
03b9a6e1 | 529 | void *src; |
80f73c9f WL |
530 | |
531 | /* | |
532 | * For the root partition, the hypervisor will set up its | |
533 | * hypercall page. The hypervisor guarantees it will not show | |
534 | * up in the root's address space. The root can't change the | |
535 | * location of the hypercall page. | |
536 | * | |
537 | * Order is important here. We must enable the hypercall page | |
538 | * so it is populated with code, then copy the code to an | |
539 | * executable page. | |
540 | */ | |
541 | wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
542 | ||
543 | pg = vmalloc_to_page(hv_hypercall_pg); | |
80f73c9f WL |
544 | src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, |
545 | MEMREMAP_WB); | |
03b9a6e1 ZL |
546 | BUG_ON(!src); |
547 | memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); | |
80f73c9f | 548 | memunmap(src); |
0408f16b SK |
549 | |
550 | hv_remap_tsc_clocksource(); | |
80f73c9f WL |
551 | } else { |
552 | hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); | |
553 | wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
554 | } | |
63ed4e0c | 555 | |
d6e0228d | 556 | skip_hypercall_pg_init: |
d5ace2a7 MK |
557 | /* |
558 | * Some versions of Hyper-V that provide IBT in guest VMs have a bug | |
559 | * in that there's no ENDBR64 instruction at the entry to the | |
560 | * hypercall page. Because hypercalls are invoked via an indirect call | |
561 | * to the hypercall page, all hypercall attempts fail when IBT is | |
562 | * enabled, and Linux panics. For such buggy versions, disable IBT. | |
563 | * | |
564 | * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall | |
565 | * page, so if future Linux kernel versions enable IBT for 32-bit | |
566 | * builds, additional hypercall page hackery will be required here | |
567 | * to provide an ENDBR32. | |
568 | */ | |
569 | #ifdef CONFIG_X86_KERNEL_IBT | |
570 | if (cpu_feature_enabled(X86_FEATURE_IBT) && | |
571 | *(u32 *)hv_hypercall_pg != gen_endbr()) { | |
572 | setup_clear_cpu_cap(X86_FEATURE_IBT); | |
203a521b | 573 | pr_warn("Disabling IBT because of Hyper-V bug\n"); |
d5ace2a7 MK |
574 | } |
575 | #endif | |
576 | ||
4df4cb9e | 577 | /* |
fff7b5e6 DC |
578 | * hyperv_init() is called before LAPIC is initialized: see |
579 | * apic_intr_mode_init() -> x86_platform.apic_post_init() and | |
580 | * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER | |
581 | * depends on LAPIC, so hv_stimer_alloc() should be called from | |
582 | * x86_init.timers.setup_percpu_clockev. | |
4df4cb9e | 583 | */ |
fff7b5e6 DC |
584 | old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev; |
585 | x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev; | |
4df4cb9e | 586 | |
6b48cb5f S |
587 | hv_apic_init(); |
588 | ||
2f285f46 DC |
589 | x86_init.pci.arch_init = hv_pci_init; |
590 | ||
05bd330a DC |
591 | register_syscore_ops(&hv_syscore_ops); |
592 | ||
dfe94d40 | 593 | hyperv_init_cpuhp = cpuhp; |
99a0f46a WL |
594 | |
595 | if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) | |
596 | hv_get_partition_id(); | |
597 | ||
598 | BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); | |
599 | ||
e39397d1 WL |
600 | #ifdef CONFIG_PCI_MSI |
601 | /* | |
602 | * If we're running as root, we want to create our own PCI MSI domain. | |
603 | * We can't set this in hv_pci_init because that would be too late. | |
604 | */ | |
605 | if (hv_root_partition) | |
606 | x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; | |
607 | #endif | |
608 | ||
6dc2a774 SM |
609 | /* Query the VMs extended capability once, so that it can be cached. */ |
610 | hv_query_ext_cap(0); | |
062a5c42 | 611 | |
8387ce06 | 612 | /* Find the VTL */ |
f2a55d08 | 613 | ms_hyperv.vtl = get_vtl(); |
8387ce06 | 614 | |
14058f72 SS |
615 | if (ms_hyperv.vtl > 0) /* non default VTL */ |
616 | hv_vtl_early_init(); | |
617 | ||
7415aea6 VK |
618 | return; |
619 | ||
0cc4f6d9 TL |
620 | clean_guest_os_id: |
621 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); | |
b9b4fe3a | 622 | hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); |
a46d15cc | 623 | cpuhp_remove_state(cpuhp); |
0cc4f6d9 TL |
624 | free_ghcb_page: |
625 | free_percpu(hv_ghcb_pg); | |
a46d15cc VK |
626 | free_vp_assist_page: |
627 | kfree(hv_vp_assist_page); | |
628 | hv_vp_assist_page = NULL; | |
afca4d95 MK |
629 | common_free: |
630 | hv_common_free(); | |
8730046c | 631 | } |
6ab42a66 | 632 | |
d6f3609d VK |
633 | /* |
634 | * This routine is called before kexec/kdump, it does the required cleanup. | |
635 | */ | |
636 | void hyperv_cleanup(void) | |
637 | { | |
638 | union hv_x64_msr_hypercall_contents hypercall_msr; | |
2982635a | 639 | union hv_reference_tsc_msr tsc_msr; |
d6f3609d VK |
640 | |
641 | /* Reset our OS id */ | |
642 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); | |
b9b4fe3a | 643 | hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); |
d6f3609d | 644 | |
179fb36a KS |
645 | /* |
646 | * Reset hypercall page reference before reset the page, | |
647 | * let hypercall operations fail safely rather than | |
648 | * panic the kernel for using invalid hypercall page | |
649 | */ | |
650 | hv_hypercall_pg = NULL; | |
651 | ||
d6f3609d | 652 | /* Reset the hypercall page */ |
2982635a AR |
653 | hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); |
654 | hypercall_msr.enable = 0; | |
655 | hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
5647dbf8 VK |
656 | |
657 | /* Reset the TSC page */ | |
2982635a AR |
658 | tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); |
659 | tsc_msr.enable = 0; | |
660 | hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); | |
d6f3609d | 661 | } |
d6f3609d | 662 | |
f3a99e76 | 663 | void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) |
d058fa7e S |
664 | { |
665 | static bool panic_reported; | |
7ed4325a | 666 | u64 guest_id; |
d058fa7e | 667 | |
f3a99e76 TL |
668 | if (in_die && !panic_on_oops) |
669 | return; | |
670 | ||
d058fa7e S |
671 | /* |
672 | * We prefer to report panic on 'die' chain as we have proper | |
673 | * registers to report, but if we miss it (e.g. on BUG()) we need | |
674 | * to report it on 'panic'. | |
675 | */ | |
676 | if (panic_reported) | |
677 | return; | |
678 | panic_reported = true; | |
679 | ||
7ed4325a S |
680 | rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); |
681 | ||
682 | wrmsrl(HV_X64_MSR_CRASH_P0, err); | |
683 | wrmsrl(HV_X64_MSR_CRASH_P1, guest_id); | |
684 | wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip); | |
685 | wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax); | |
686 | wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp); | |
d058fa7e S |
687 | |
688 | /* | |
689 | * Let Hyper-V know there is crash data available | |
690 | */ | |
691 | wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); | |
692 | } | |
693 | EXPORT_SYMBOL_GPL(hyperv_report_panic); | |
73638cdd | 694 | |
4a5f3cde | 695 | bool hv_is_hyperv_initialized(void) |
73638cdd S |
696 | { |
697 | union hv_x64_msr_hypercall_contents hypercall_msr; | |
698 | ||
4a5f3cde MK |
699 | /* |
700 | * Ensure that we're really on Hyper-V, and not a KVM or Xen | |
701 | * emulation of Hyper-V | |
702 | */ | |
703 | if (x86_hyper_type != X86_HYPER_MS_HYPERV) | |
704 | return false; | |
705 | ||
d3a9d7e4 DC |
706 | /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */ |
707 | if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) | |
d6e0228d | 708 | return true; |
4a5f3cde MK |
709 | /* |
710 | * Verify that earlier initialization succeeded by checking | |
711 | * that the hypercall page is setup | |
712 | */ | |
73638cdd S |
713 | hypercall_msr.as_uint64 = 0; |
714 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | |
715 | ||
4a5f3cde | 716 | return hypercall_msr.enable; |
73638cdd | 717 | } |
4a5f3cde | 718 | EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); |