From: Greg Kroah-Hartman Date: Mon, 19 Jul 2021 08:56:42 +0000 (+0200) Subject: 5.13-stable patches X-Git-Tag: v5.13.4~49 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c1694bfee1680d9a8d7fcf7634013177844ba531;p=thirdparty%2Fkernel%2Fstable-queue.git 5.13-stable patches added patches: cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch kvm-svm-remove-init-intercept-handler.patch kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch kvm-svm-smi-interception-must-not-skip-the-instruction.patch kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch --- diff --git a/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch b/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch new file mode 100644 index 00000000000..072188557cb --- /dev/null +++ b/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch @@ -0,0 +1,40 @@ +From 50630b3f1ada0bf412d3f28e73bac310448d9d6f Mon Sep 17 00:00:00 2001 +From: Ronnie Sahlberg +Date: Tue, 13 Jul 2021 12:22:59 +1000 +Subject: cifs: Do not use the original cruid when following DFS links for multiuser mounts + +From: Ronnie Sahlberg + +commit 50630b3f1ada0bf412d3f28e73bac310448d9d6f upstream. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213565 + +cruid should only be used for the initial mount and after this we should use the current +users credentials. +Ignore the original cruid mount argument when creating a new context for a multiuser mount +following a DFS link. + +Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") +Cc: stable@vger.kernel.org # 5.11+ +Reported-by: Xiaoli Feng +Signed-off-by: Ronnie Sahlberg +Reviewed-by: Paulo Alcantara (SUSE) +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/cifs_dfs_ref.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/cifs/cifs_dfs_ref.c ++++ b/fs/cifs/cifs_dfs_ref.c +@@ -208,6 +208,10 @@ char *cifs_compose_mount_options(const c + else + noff = tkn_e - (sb_mountdata + off) + 1; + ++ if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) { ++ off += noff; ++ continue; ++ } + if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) { + off += noff; + continue; diff --git a/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch b/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch new file mode 100644 index 00000000000..2c5661861b8 --- /dev/null +++ b/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch @@ -0,0 +1,42 @@ +From 507345b5ae6a57b7ecd7550ff39282ed20de7b8d Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Mon, 12 Jul 2021 12:38:24 -0300 +Subject: cifs: handle reconnect of tcon when there is no cached dfs referral + +From: Paulo Alcantara + +commit 507345b5ae6a57b7ecd7550ff39282ed20de7b8d upstream. + +When there is no cached DFS referral of tcon->dfs_path, then reconnect +to same share. + +Signed-off-by: Paulo Alcantara (SUSE) +Cc: +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/connect.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -4155,7 +4155,8 @@ int cifs_tree_connect(const unsigned int + if (!tree) + return -ENOMEM; + +- if (!tcon->dfs_path) { ++ /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ ++ if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) { + if (tcon->ipc) { + scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); +@@ -4165,9 +4166,6 @@ int cifs_tree_connect(const unsigned int + goto out; + } + +- rc = dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl); +- if (rc) +- goto out; + isroot = ref.server_type == DFS_TYPE_ROOT; + free_dfs_info_param(&ref); + diff --git a/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch b/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch new file mode 100644 index 00000000000..7b045f2ff29 --- /dev/null +++ b/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch @@ -0,0 +1,232 @@ +From 506c1da44fee32ba1d3a70413289ad58c772bba6 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N +Date: Tue, 18 May 2021 15:05:50 +0000 +Subject: cifs: use the expiry output of dns_query to schedule next resolution + +From: Shyam Prasad N + +commit 506c1da44fee32ba1d3a70413289ad58c772bba6 upstream. + +We recently fixed DNS resolution of the server hostname during reconnect. +However, server IP address may change, even when the old one continues +to server (although sub-optimally). + +We should schedule the next DNS resolution based on the TTL of +the DNS record used for the last resolution. This way, we resolve the +server hostname again when a DNS record expires. + +Signed-off-by: Shyam Prasad N +Reviewed-by: Paulo Alcantara (SUSE) +Cc: # v5.11+ +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/cifs_dfs_ref.c | 2 - + fs/cifs/cifsglob.h | 4 +++ + fs/cifs/connect.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++--- + fs/cifs/dns_resolve.c | 10 +++++--- + fs/cifs/dns_resolve.h | 2 - + fs/cifs/misc.c | 2 - + 6 files changed, 65 insertions(+), 10 deletions(-) + +--- a/fs/cifs/cifs_dfs_ref.c ++++ b/fs/cifs/cifs_dfs_ref.c +@@ -173,7 +173,7 @@ char *cifs_compose_mount_options(const c + } + } + +- rc = dns_resolve_server_name_to_ip(name, &srvIP); ++ rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL); + if (rc < 0) { + cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", + __func__, name, rc); +--- a/fs/cifs/cifsglob.h ++++ b/fs/cifs/cifsglob.h +@@ -84,6 +84,9 @@ + #define SMB_ECHO_INTERVAL_MAX 600 + #define SMB_ECHO_INTERVAL_DEFAULT 60 + ++/* dns resolution interval in seconds */ ++#define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600 ++ + /* maximum number of PDUs in one compound */ + #define MAX_COMPOUND 5 + +@@ -654,6 +657,7 @@ struct TCP_Server_Info { + /* point to the SMBD connection if RDMA is used instead of socket */ + struct smbd_connection *smbd_conn; + struct delayed_work echo; /* echo ping workqueue job */ ++ struct delayed_work resolve; /* dns resolution workqueue job */ + char *smallbuf; /* pointer to current "small" buffer */ + char *bigbuf; /* pointer to current "big" buffer */ + /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -90,6 +90,8 @@ static int reconn_set_ipaddr_from_hostna + int rc; + int len; + char *unc, *ipaddr = NULL; ++ time64_t expiry, now; ++ unsigned long ttl = SMB_DNS_RESOLVE_INTERVAL_DEFAULT; + + if (!server->hostname) + return -EINVAL; +@@ -103,13 +105,13 @@ static int reconn_set_ipaddr_from_hostna + } + scnprintf(unc, len, "\\\\%s", server->hostname); + +- rc = dns_resolve_server_name_to_ip(unc, &ipaddr); ++ rc = dns_resolve_server_name_to_ip(unc, &ipaddr, &expiry); + kfree(unc); + + if (rc < 0) { + cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", + __func__, server->hostname, rc); +- return rc; ++ goto requeue_resolve; + } + + spin_lock(&cifs_tcp_ses_lock); +@@ -118,7 +120,45 @@ static int reconn_set_ipaddr_from_hostna + spin_unlock(&cifs_tcp_ses_lock); + kfree(ipaddr); + +- return !rc ? -1 : 0; ++ /* rc == 1 means success here */ ++ if (rc) { ++ now = ktime_get_real_seconds(); ++ if (expiry && expiry > now) ++ /* ++ * To make sure we don't use the cached entry, retry 1s ++ * after expiry. ++ */ ++ ttl = (expiry - now + 1); ++ } ++ rc = !rc ? -1 : 0; ++ ++requeue_resolve: ++ cifs_dbg(FYI, "%s: next dns resolution scheduled for %lu seconds in the future\n", ++ __func__, ttl); ++ mod_delayed_work(cifsiod_wq, &server->resolve, (ttl * HZ)); ++ ++ return rc; ++} ++ ++ ++static void cifs_resolve_server(struct work_struct *work) ++{ ++ int rc; ++ struct TCP_Server_Info *server = container_of(work, ++ struct TCP_Server_Info, resolve.work); ++ ++ mutex_lock(&server->srv_mutex); ++ ++ /* ++ * Resolve the hostname again to make sure that IP address is up-to-date. ++ */ ++ rc = reconn_set_ipaddr_from_hostname(server); ++ if (rc) { ++ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", ++ __func__, rc); ++ } ++ ++ mutex_unlock(&server->srv_mutex); + } + + #ifdef CONFIG_CIFS_DFS_UPCALL +@@ -698,6 +738,7 @@ static void clean_demultiplex_info(struc + spin_unlock(&cifs_tcp_ses_lock); + + cancel_delayed_work_sync(&server->echo); ++ cancel_delayed_work_sync(&server->resolve); + + spin_lock(&GlobalMid_Lock); + server->tcpStatus = CifsExiting; +@@ -1278,6 +1319,7 @@ cifs_put_tcp_session(struct TCP_Server_I + spin_unlock(&cifs_tcp_ses_lock); + + cancel_delayed_work_sync(&server->echo); ++ cancel_delayed_work_sync(&server->resolve); + + if (from_reconnect) + /* +@@ -1360,6 +1402,7 @@ cifs_get_tcp_session(struct smb3_fs_cont + INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); + INIT_LIST_HEAD(&tcp_ses->smb_ses_list); + INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); ++ INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server); + INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); + mutex_init(&tcp_ses->reconnect_mutex); + memcpy(&tcp_ses->srcaddr, &ctx->srcaddr, +@@ -1440,6 +1483,12 @@ smbd_connected: + /* queue echo request delayed work */ + queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval); + ++ /* queue dns resolution delayed work */ ++ cifs_dbg(FYI, "%s: next dns resolution scheduled for %d seconds in the future\n", ++ __func__, SMB_DNS_RESOLVE_INTERVAL_DEFAULT); ++ ++ queue_delayed_work(cifsiod_wq, &tcp_ses->resolve, (SMB_DNS_RESOLVE_INTERVAL_DEFAULT * HZ)); ++ + return tcp_ses; + + out_err_crypto_release: +--- a/fs/cifs/dns_resolve.c ++++ b/fs/cifs/dns_resolve.c +@@ -36,6 +36,7 @@ + * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. + * @unc: UNC path specifying the server (with '/' as delimiter) + * @ip_addr: Where to return the IP address. ++ * @expiry: Where to return the expiry time for the dns record. + * + * The IP address will be returned in string form, and the caller is + * responsible for freeing it. +@@ -43,7 +44,7 @@ + * Returns length of result on success, -ve on error. + */ + int +-dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) ++dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry) + { + struct sockaddr_storage ss; + const char *hostname, *sep; +@@ -78,13 +79,14 @@ dns_resolve_server_name_to_ip(const char + + /* Perform the upcall */ + rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, +- NULL, ip_addr, NULL, false); ++ NULL, ip_addr, expiry, false); + if (rc < 0) + cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", + __func__, len, len, hostname); + else +- cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n", +- __func__, len, len, hostname, *ip_addr); ++ cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n", ++ __func__, len, len, hostname, *ip_addr, ++ expiry ? (*expiry) : 0); + return rc; + + name_is_IP_address: +--- a/fs/cifs/dns_resolve.h ++++ b/fs/cifs/dns_resolve.h +@@ -24,7 +24,7 @@ + #define _DNS_RESOLVE_H + + #ifdef __KERNEL__ +-extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); ++extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry); + #endif /* KERNEL */ + + #endif /* _DNS_RESOLVE_H */ +--- a/fs/cifs/misc.c ++++ b/fs/cifs/misc.c +@@ -1199,7 +1199,7 @@ int match_target_ip(struct TCP_Server_In + + cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2); + +- rc = dns_resolve_server_name_to_ip(target, &tip); ++ rc = dns_resolve_server_name_to_ip(target, &tip, NULL); + if (rc < 0) + goto out; + diff --git a/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch b/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch new file mode 100644 index 00000000000..2bceab2d765 --- /dev/null +++ b/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch @@ -0,0 +1,128 @@ +From 23fa2e46a5556f787ce2ea1a315d3ab93cced204 Mon Sep 17 00:00:00 2001 +From: Kefeng Wang +Date: Sat, 26 Jun 2021 15:03:04 +0800 +Subject: KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio + +From: Kefeng Wang + +commit 23fa2e46a5556f787ce2ea1a315d3ab93cced204 upstream. + +BUG: KASAN: use-after-free in kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183 +Read of size 8 at addr ffff0000c03a2500 by task syz-executor083/4269 + +CPU: 5 PID: 4269 Comm: syz-executor083 Not tainted 5.10.0 #7 +Hardware name: linux,dummy-virt (DT) +Call trace: + dump_backtrace+0x0/0x2d0 arch/arm64/kernel/stacktrace.c:132 + show_stack+0x28/0x34 arch/arm64/kernel/stacktrace.c:196 + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x110/0x164 lib/dump_stack.c:118 + print_address_description+0x78/0x5c8 mm/kasan/report.c:385 + __kasan_report mm/kasan/report.c:545 [inline] + kasan_report+0x148/0x1e4 mm/kasan/report.c:562 + check_memory_region_inline mm/kasan/generic.c:183 [inline] + __asan_load8+0xb4/0xbc mm/kasan/generic.c:252 + kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183 + kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755 + vfs_ioctl fs/ioctl.c:48 [inline] + __do_sys_ioctl fs/ioctl.c:753 [inline] + __se_sys_ioctl fs/ioctl.c:739 [inline] + __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 + __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] + el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] + do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 + el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 + el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 + el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 + +Allocated by task 4269: + stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 + kasan_save_stack mm/kasan/common.c:48 [inline] + kasan_set_track mm/kasan/common.c:56 [inline] + __kasan_kmalloc+0xdc/0x120 mm/kasan/common.c:461 + kasan_kmalloc+0xc/0x14 mm/kasan/common.c:475 + kmem_cache_alloc_trace include/linux/slab.h:450 [inline] + kmalloc include/linux/slab.h:552 [inline] + kzalloc include/linux/slab.h:664 [inline] + kvm_vm_ioctl_register_coalesced_mmio+0x78/0x1cc arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:146 + kvm_vm_ioctl+0x7e8/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3746 + vfs_ioctl fs/ioctl.c:48 [inline] + __do_sys_ioctl fs/ioctl.c:753 [inline] + __se_sys_ioctl fs/ioctl.c:739 [inline] + __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 + __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] + el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] + do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 + el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 + el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 + el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 + +Freed by task 4269: + stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 + kasan_save_stack mm/kasan/common.c:48 [inline] + kasan_set_track+0x38/0x6c mm/kasan/common.c:56 + kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:355 + __kasan_slab_free+0x124/0x150 mm/kasan/common.c:422 + kasan_slab_free+0x10/0x1c mm/kasan/common.c:431 + slab_free_hook mm/slub.c:1544 [inline] + slab_free_freelist_hook mm/slub.c:1577 [inline] + slab_free mm/slub.c:3142 [inline] + kfree+0x104/0x38c mm/slub.c:4124 + coalesced_mmio_destructor+0x94/0xa4 arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:102 + kvm_iodevice_destructor include/kvm/iodev.h:61 [inline] + kvm_io_bus_unregister_dev+0x248/0x280 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:4374 + kvm_vm_ioctl_unregister_coalesced_mmio+0x158/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:186 + kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755 + vfs_ioctl fs/ioctl.c:48 [inline] + __do_sys_ioctl fs/ioctl.c:753 [inline] + __se_sys_ioctl fs/ioctl.c:739 [inline] + __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 + __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] + el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] + do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 + el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 + el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 + el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 + +If kvm_io_bus_unregister_dev() return -ENOMEM, we already call kvm_iodevice_destructor() +inside this function to delete 'struct kvm_coalesced_mmio_dev *dev' from list +and free the dev, but kvm_iodevice_destructor() is called again, it will lead +the above issue. + +Let's check the the return value of kvm_io_bus_unregister_dev(), only call +kvm_iodevice_destructor() if the return value is 0. + +Cc: Paolo Bonzini +Cc: kvm@vger.kernel.org +Reported-by: Hulk Robot +Signed-off-by: Kefeng Wang +Message-Id: <20210626070304.143456-1-wangkefeng.wang@huawei.com> +Cc: stable@vger.kernel.org +Fixes: 5d3c4c79384a ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed", 2021-04-20) +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/coalesced_mmio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/virt/kvm/coalesced_mmio.c ++++ b/virt/kvm/coalesced_mmio.c +@@ -186,7 +186,6 @@ int kvm_vm_ioctl_unregister_coalesced_mm + coalesced_mmio_in_range(dev, zone->addr, zone->size)) { + r = kvm_io_bus_unregister_dev(kvm, + zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); +- kvm_iodevice_destructor(&dev->dev); + + /* + * On failure, unregister destroys all devices on the +@@ -196,6 +195,7 @@ int kvm_vm_ioctl_unregister_coalesced_mm + */ + if (r) + break; ++ kvm_iodevice_destructor(&dev->dev); + } + } + diff --git a/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch b/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch new file mode 100644 index 00000000000..20f3d622752 --- /dev/null +++ b/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch @@ -0,0 +1,46 @@ +From fce7e152ffc8f89d02a80617b16c7aa1527847c8 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Mon, 28 Jun 2021 12:44:20 +0200 +Subject: KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA + +From: Vitaly Kuznetsov + +commit fce7e152ffc8f89d02a80617b16c7aa1527847c8 upstream. + +APM states that #GP is raised upon write to MSR_VM_HSAVE_PA when +the supplied address is not page-aligned or is outside of "maximum +supported physical address for this implementation". +page_address_valid() check seems suitable. Also, forcefully page-align +the address when it's written from VMM. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20210628104425.391276-2-vkuznets@redhat.com> +Cc: stable@vger.kernel.org +Reviewed-by: Maxim Levitsky +[Add comment about behavior for host-provided values. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2915,7 +2915,16 @@ static int svm_set_msr(struct kvm_vcpu * + svm_disable_lbrv(vcpu); + break; + case MSR_VM_HSAVE_PA: +- svm->nested.hsave_msr = data; ++ /* ++ * Old kernels did not validate the value written to ++ * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid ++ * value to allow live migrating buggy or malicious guests ++ * originating from those kernels. ++ */ ++ if (!msr->host_initiated && !page_address_valid(vcpu, data)) ++ return 1; ++ ++ svm->nested.hsave_msr = data & PAGE_MASK; + break; + case MSR_VM_CR: + return svm_set_vm_cr(vcpu, data); diff --git a/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch b/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch new file mode 100644 index 00000000000..e937dfb7800 --- /dev/null +++ b/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch @@ -0,0 +1,44 @@ +From cd4220d23bf3f43cf720e82bdee681f383433ae2 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Thu, 1 Jul 2021 17:42:24 +0200 +Subject: KVM: selftests: do not require 64GB in set_memory_region_test + +From: Christian Borntraeger + +commit cd4220d23bf3f43cf720e82bdee681f383433ae2 upstream. + +Unless the user sets overcommit_memory or has plenty of swap, the latest +changes to the testcase will result in ENOMEM failures for hosts with +less than 64GB RAM. As we do not use much of the allocated memory, we +can use MAP_NORESERVE to avoid this error. + +Cc: Zenghui Yu +Cc: vkuznets@redhat.com +Cc: wanghaibin.wang@huawei.com +Cc: stable@vger.kernel.org +Fixes: 309505dd5685 ("KVM: selftests: Fix mapping length truncation in m{,un}map()") +Tested-by: Zenghui Yu +Link: https://lore.kernel.org/kvm/20210701160425.33666-1-borntraeger@de.ibm.com/ +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/kvm/set_memory_region_test.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c +index d8812f27648c..d31f54ac4e98 100644 +--- a/tools/testing/selftests/kvm/set_memory_region_test.c ++++ b/tools/testing/selftests/kvm/set_memory_region_test.c +@@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void) + (max_mem_slots - 1), MEM_REGION_SIZE >> 10); + + mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, +- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); + +-- +2.32.0 + diff --git a/queue-5.13/kvm-svm-remove-init-intercept-handler.patch b/queue-5.13/kvm-svm-remove-init-intercept-handler.patch new file mode 100644 index 00000000000..bf8e6c27bd0 --- /dev/null +++ b/queue-5.13/kvm-svm-remove-init-intercept-handler.patch @@ -0,0 +1,39 @@ +From 896707c212d440a6863ce0a3930c8a609e24497d Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 7 Jul 2021 15:50:59 +0300 +Subject: KVM: SVM: remove INIT intercept handler + +From: Maxim Levitsky + +commit 896707c212d440a6863ce0a3930c8a609e24497d upstream. + +Kernel never sends real INIT even to CPUs, other than on boot. + +Thus INIT interception is an error which should be caught +by a check for an unknown VMexit reason. + +On top of that, the current INIT VM exit handler skips +the current instruction which is wrong. +That was added in commit 5ff3a351f687 ("KVM: x86: Move trivial +instruction-based exit handlers to common code"). + +Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code") +Signed-off-by: Maxim Levitsky +Message-Id: <20210707125100.677203-3-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -3069,7 +3069,6 @@ static int (*const svm_exit_handlers[])( + [SVM_EXIT_INTR] = intr_interception, + [SVM_EXIT_NMI] = nmi_interception, + [SVM_EXIT_SMI] = smi_interception, +- [SVM_EXIT_INIT] = kvm_emulate_as_nop, + [SVM_EXIT_VINTR] = interrupt_window_interception, + [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, + [SVM_EXIT_CPUID] = kvm_emulate_cpuid, diff --git a/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch b/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch new file mode 100644 index 00000000000..947a633800a --- /dev/null +++ b/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch @@ -0,0 +1,60 @@ +From 76ff371b67cb12fb635396234468abcf6a466f16 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 24 Jun 2021 19:03:54 -0700 +Subject: KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler + +From: Sean Christopherson + +commit 76ff371b67cb12fb635396234468abcf6a466f16 upstream. + +Don't clear the C-bit in the #NPF handler, as it is a legal GPA bit for +non-SEV guests, and for SEV guests the C-bit is dropped before the GPA +hits the NPT in hardware. Clearing the bit for non-SEV guests causes KVM +to mishandle #NPFs with that collide with the host's C-bit. + +Although the APM doesn't explicitly state that the C-bit is not reserved +for non-SEV, Tom Lendacky confirmed that the following snippet about the +effective reduction due to the C-bit does indeed apply only to SEV guests. + + Note that because guest physical addresses are always translated + through the nested page tables, the size of the guest physical address + space is not impacted by any physical address space reduction indicated + in CPUID 8000_001F[EBX]. If the C-bit is a physical address bit however, + the guest physical address space is effectively reduced by 1 bit. + +And for SEV guests, the APM clearly states that the bit is dropped before +walking the nested page tables. + + If the C-bit is an address bit, this bit is masked from the guest + physical address when it is translated through the nested page tables. + Consequently, the hypervisor does not need to be aware of which pages + the guest has chosen to mark private. + +Note, the bogus C-bit clearing was removed from legacy #PF handler in +commit 6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF +interception"). + +Fixes: 0ede79e13224 ("KVM: SVM: Clear C-bit from the page fault address") +Cc: Peter Gonda +Cc: Brijesh Singh +Cc: Tom Lendacky +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210625020354.431829-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -1897,7 +1897,7 @@ static int npf_interception(struct kvm_v + { + struct vcpu_svm *svm = to_svm(vcpu); + +- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); ++ u64 fault_address = svm->vmcb->control.exit_info_2; + u64 error_code = svm->vmcb->control.exit_info_1; + + trace_kvm_page_fault(fault_address, error_code); diff --git a/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch b/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch new file mode 100644 index 00000000000..1965c93b99b --- /dev/null +++ b/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch @@ -0,0 +1,51 @@ +From 991afbbee8ac93b055a27477278a5fb556af1ff4 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 7 Jul 2021 15:50:58 +0300 +Subject: KVM: SVM: #SMI interception must not skip the instruction + +From: Maxim Levitsky + +commit 991afbbee8ac93b055a27477278a5fb556af1ff4 upstream. + +Commit 5ff3a351f687 ("KVM: x86: Move trivial instruction-based +exit handlers to common code"), unfortunately made a mistake of +treating nop_on_interception and nop_interception in the same way. + +Former does truly nothing while the latter skips the instruction. + +SMI VM exit handler should do nothing. +(SMI itself is handled by the host when we do STGI) + +Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code") +Signed-off-by: Maxim Levitsky +Message-Id: <20210707125100.677203-2-mlevitsk@redhat.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2080,6 +2080,11 @@ static int nmi_interception(struct kvm_v + return 1; + } + ++static int smi_interception(struct kvm_vcpu *vcpu) ++{ ++ return 1; ++} ++ + static int intr_interception(struct kvm_vcpu *vcpu) + { + ++vcpu->stat.irq_exits; +@@ -3063,7 +3068,7 @@ static int (*const svm_exit_handlers[])( + [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, + [SVM_EXIT_INTR] = intr_interception, + [SVM_EXIT_NMI] = nmi_interception, +- [SVM_EXIT_SMI] = kvm_emulate_as_nop, ++ [SVM_EXIT_SMI] = smi_interception, + [SVM_EXIT_INIT] = kvm_emulate_as_nop, + [SVM_EXIT_VINTR] = interrupt_window_interception, + [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, diff --git a/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch b/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch new file mode 100644 index 00000000000..9c34907e8a8 --- /dev/null +++ b/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch @@ -0,0 +1,49 @@ +From f85d40160691881a17a397c448d799dfc90987ba Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Tue, 29 Jun 2021 01:26:32 +0800 +Subject: KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run() + +From: Lai Jiangshan + +commit f85d40160691881a17a397c448d799dfc90987ba upstream. + +When the host is using debug registers but the guest is not using them +nor is the guest in guest-debug state, the kvm code does not reset +the host debug registers before kvm_x86->run(). Rather, it relies on +the hardware vmentry instruction to automatically reset the dr7 registers +which ensures that the host breakpoints do not affect the guest. + +This however violates the non-instrumentable nature around VM entry +and exit; for example, when a host breakpoint is set on vcpu->arch.cr2, + +Another issue is consistency. When the guest debug registers are active, +the host breakpoints are reset before kvm_x86->run(). But when the +guest debug registers are inactive, the host breakpoints are delayed to +be disabled. The host tracing tools may see different results depending +on what the guest is doing. + +To fix the problems, we clear %db7 unconditionally before kvm_x86->run() +if the host has set any breakpoints, no matter if the guest is using +them or not. + +Signed-off-by: Lai Jiangshan +Message-Id: <20210628172632.81029-1-jiangshanlai@gmail.com> +Cc: stable@vger.kernel.org +[Only clear %db7 instead of reloading all debug registers. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9347,6 +9347,8 @@ static int vcpu_enter_guest(struct kvm_v + set_debugreg(vcpu->arch.eff_db[3], 3); + set_debugreg(vcpu->arch.dr6, 6); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; ++ } else if (unlikely(hw_breakpoint_active())) { ++ set_debugreg(0, 7); + } + + for (;;) { diff --git a/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch b/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch new file mode 100644 index 00000000000..a1277b90dc9 --- /dev/null +++ b/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch @@ -0,0 +1,111 @@ +From fc9bf2e087efcd81bda2e52d09616d2a1bf982a8 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 23 Jun 2021 16:05:49 -0700 +Subject: KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs + +From: Sean Christopherson + +commit fc9bf2e087efcd81bda2e52d09616d2a1bf982a8 upstream. + +Ignore "dynamic" host adjustments to the physical address mask when +generating the masks for guest PTEs, i.e. the guest PA masks. The host +physical address space and guest physical address space are two different +beasts, e.g. even though SEV's C-bit is the same bit location for both +host and guest, disabling SME in the host (which clears shadow_me_mask) +does not affect the guest PTE->GPA "translation". + +For non-SEV guests, not dropping bits is the correct behavior. Assuming +KVM and userspace correctly enumerate/configure guest MAXPHYADDR, bits +that are lost as collateral damage from memory encryption are treated as +reserved bits, i.e. KVM will never get to the point where it attempts to +generate a gfn using the affected bits. And if userspace wants to create +a bogus vCPU, then userspace gets to deal with the fallout of hardware +doing odd things with bad GPAs. + +For SEV guests, not dropping the C-bit is technically wrong, but it's a +moot point because KVM can't read SEV guest's page tables in any case +since they're always encrypted. Not to mention that the current KVM code +is also broken since sme_me_mask does not have to be non-zero for SEV to +be supported by KVM. The proper fix would be to teach all of KVM to +correctly handle guest private memory, but that's a task for the future. + +Fixes: d0ec49d4de90 ("kvm/x86/svm: Support Secure Memory Encryption within KVM") +Cc: stable@vger.kernel.org +Cc: Brijesh Singh +Cc: Tom Lendacky +Signed-off-by: Sean Christopherson +Message-Id: <20210623230552.4027702-5-seanjc@google.com> +[Use a new header instead of adding header guards to paging_tmpl.h. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 2 ++ + arch/x86/kvm/mmu/paging.h | 14 ++++++++++++++ + arch/x86/kvm/mmu/paging_tmpl.h | 4 ++-- + arch/x86/kvm/mmu/spte.h | 6 ------ + 4 files changed, 18 insertions(+), 8 deletions(-) + create mode 100644 arch/x86/kvm/mmu/paging.h + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -53,6 +53,8 @@ + #include + #include "trace.h" + ++#include "paging.h" ++ + extern bool itlb_multihit_kvm_mitigation; + + static int __read_mostly nx_huge_pages = -1; +--- /dev/null ++++ b/arch/x86/kvm/mmu/paging.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Shadow paging constants/helpers that don't need to be #undef'd. */ ++#ifndef __KVM_X86_PAGING_H ++#define __KVM_X86_PAGING_H ++ ++#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) ++#define PT64_LVL_ADDR_MASK(level) \ ++ (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ ++ * PT64_LEVEL_BITS))) - 1)) ++#define PT64_LVL_OFFSET_MASK(level) \ ++ (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ ++ * PT64_LEVEL_BITS))) - 1)) ++#endif /* __KVM_X86_PAGING_H */ ++ +--- a/arch/x86/kvm/mmu/paging_tmpl.h ++++ b/arch/x86/kvm/mmu/paging_tmpl.h +@@ -24,7 +24,7 @@ + #define pt_element_t u64 + #define guest_walker guest_walker64 + #define FNAME(name) paging##64_##name +- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK ++ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK + #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) + #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) + #define PT_INDEX(addr, level) PT64_INDEX(addr, level) +@@ -57,7 +57,7 @@ + #define pt_element_t u64 + #define guest_walker guest_walkerEPT + #define FNAME(name) ept_##name +- #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK ++ #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK + #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) + #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) + #define PT_INDEX(addr, level) PT64_INDEX(addr, level) +--- a/arch/x86/kvm/mmu/spte.h ++++ b/arch/x86/kvm/mmu/spte.h +@@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK = + #else + #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) + #endif +-#define PT64_LVL_ADDR_MASK(level) \ +- (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ +- * PT64_LEVEL_BITS))) - 1)) +-#define PT64_LVL_OFFSET_MASK(level) \ +- (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ +- * PT64_LEVEL_BITS))) - 1)) + + #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ + | shadow_x_mask | shadow_nx_mask | shadow_me_mask) diff --git a/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch b/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch new file mode 100644 index 00000000000..74315d242e5 --- /dev/null +++ b/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch @@ -0,0 +1,44 @@ +From 4bf48e3c0aafd32b960d341c4925b48f416f14a5 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 23 Jun 2021 16:05:46 -0700 +Subject: KVM: x86: Use guest MAXPHYADDR from CPUID.0x8000_0008 iff TDP is enabled + +From: Sean Christopherson + +commit 4bf48e3c0aafd32b960d341c4925b48f416f14a5 upstream. + +Ignore the guest MAXPHYADDR reported by CPUID.0x8000_0008 if TDP, i.e. +NPT, is disabled, and instead use the host's MAXPHYADDR. Per AMD'S APM: + + Maximum guest physical address size in bits. This number applies only + to guests using nested paging. When this field is zero, refer to the + PhysAddrSize field for the maximum guest physical address size. + +Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210623230552.4027702-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -940,8 +940,14 @@ static inline int __do_cpuid_func(struct + unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); + unsigned phys_as = entry->eax & 0xff; + +- if (!g_phys_as) ++ /* ++ * Use bare metal's MAXPHADDR if the CPU doesn't report guest ++ * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the ++ * guest version "applies only to guests using nested paging". ++ */ ++ if (!g_phys_as || !tdp_enabled) + g_phys_as = phys_as; ++ + entry->eax = g_phys_as | (virt_as << 8); + entry->edx = 0; + cpuid_entry_override(entry, CPUID_8000_0008_EBX); diff --git a/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch b/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch new file mode 100644 index 00000000000..51faf2e6766 --- /dev/null +++ b/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch @@ -0,0 +1,78 @@ +From e39f00f60ebd2e7b295c37a05e6349df656d3eb8 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 23 Jun 2021 16:05:47 -0700 +Subject: KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR + +From: Sean Christopherson + +commit e39f00f60ebd2e7b295c37a05e6349df656d3eb8 upstream. + +Use boot_cpu_data.x86_phys_bits instead of the raw CPUID information to +enumerate the MAXPHYADDR for KVM guests when TDP is disabled (the guest +version is only relevant to NPT/TDP). + +When using shadow paging, any reductions to the host's MAXPHYADDR apply +to KVM and its guests as well, i.e. using the raw CPUID info will cause +KVM to misreport the number of PA bits available to the guest. + +Unconditionally zero out the "Physical Address bit reduction" entry. +For !TDP, the adjustment is already done, and for TDP enumerating the +host's reduction is wrong as the reduction does not apply to GPAs. + +Fixes: 9af9b94068fb ("x86/cpu/AMD: Handle SME reduction in physical address size") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210623230552.4027702-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 27 ++++++++++++++++++++------- + 1 file changed, 20 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -941,11 +941,18 @@ static inline int __do_cpuid_func(struct + unsigned phys_as = entry->eax & 0xff; + + /* +- * Use bare metal's MAXPHADDR if the CPU doesn't report guest +- * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the +- * guest version "applies only to guests using nested paging". ++ * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as ++ * the guest operates in the same PA space as the host, i.e. ++ * reductions in MAXPHYADDR for memory encryption affect shadow ++ * paging, too. ++ * ++ * If TDP is enabled but an explicit guest MAXPHYADDR is not ++ * provided, use the raw bare metal MAXPHYADDR as reductions to ++ * the HPAs do not affect GPAs. + */ +- if (!g_phys_as || !tdp_enabled) ++ if (!tdp_enabled) ++ g_phys_as = boot_cpu_data.x86_phys_bits; ++ else if (!g_phys_as) + g_phys_as = phys_as; + + entry->eax = g_phys_as | (virt_as << 8); +@@ -970,12 +977,18 @@ static inline int __do_cpuid_func(struct + case 0x8000001a: + case 0x8000001e: + break; +- /* Support memory encryption cpuid if host supports it */ + case 0x8000001F: +- if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) ++ if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; +- else ++ } else { + cpuid_entry_override(entry, CPUID_8000_001F_EAX); ++ ++ /* ++ * Enumerate '0' for "PA bits reduction", the adjusted ++ * MAXPHYADDR is enumerated directly (see 0x80000008). ++ */ ++ entry->ebx &= ~GENMASK(11, 6); ++ } + break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: diff --git a/queue-5.13/series b/queue-5.13/series new file mode 100644 index 00000000000..2799920be80 --- /dev/null +++ b/queue-5.13/series @@ -0,0 +1,13 @@ +cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch +cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch +cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch +kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch +kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch +kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch +kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch +kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch +kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch +kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch +kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch +kvm-svm-smi-interception-must-not-skip-the-instruction.patch +kvm-svm-remove-init-intercept-handler.patch