5.13-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)
diff --git a/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch b/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch

new file mode 100644 (file)

index 0000000..0721885
--- /dev/null
+++ b/queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch
@@ -0,0 +1,40 @@
+From 50630b3f1ada0bf412d3f28e73bac310448d9d6f Mon Sep 17 00:00:00 2001
+From: Ronnie Sahlberg <lsahlber@redhat.com>
+Date: Tue, 13 Jul 2021 12:22:59 +1000
+Subject: cifs: Do not use the original cruid when following DFS links for multiuser mounts
+
+From: Ronnie Sahlberg <lsahlber@redhat.com>
+
+commit 50630b3f1ada0bf412d3f28e73bac310448d9d6f upstream.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213565
+
+cruid should only be used for the initial mount and after this we should use the current
+users credentials.
+Ignore the original cruid mount argument when creating a new context for a multiuser mount
+following a DFS link.
+
+Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api")
+Cc: stable@vger.kernel.org # 5.11+
+Reported-by: Xiaoli Feng <xifeng@redhat.com>
+Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/cifs_dfs_ref.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/cifs/cifs_dfs_ref.c
++++ b/fs/cifs/cifs_dfs_ref.c
+@@ -208,6 +208,10 @@ char *cifs_compose_mount_options(const c
+               else
+                       noff = tkn_e - (sb_mountdata + off) + 1;
+ 
++              if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) {
++                      off += noff;
++                      continue;
++              }
+               if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) {
+                       off += noff;
+                       continue;
diff --git a/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch b/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch

new file mode 100644 (file)

index 0000000..2c56618
--- /dev/null
+++ b/queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch
@@ -0,0 +1,42 @@
+From 507345b5ae6a57b7ecd7550ff39282ed20de7b8d Mon Sep 17 00:00:00 2001
+From: Paulo Alcantara <pc@cjr.nz>
+Date: Mon, 12 Jul 2021 12:38:24 -0300
+Subject: cifs: handle reconnect of tcon when there is no cached dfs referral
+
+From: Paulo Alcantara <pc@cjr.nz>
+
+commit 507345b5ae6a57b7ecd7550ff39282ed20de7b8d upstream.
+
+When there is no cached DFS referral of tcon->dfs_path, then reconnect
+to same share.
+
+Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/connect.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -4155,7 +4155,8 @@ int cifs_tree_connect(const unsigned int
+       if (!tree)
+               return -ENOMEM;
+ 
+-      if (!tcon->dfs_path) {
++      /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
++      if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) {
+               if (tcon->ipc) {
+                       scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+@@ -4165,9 +4166,6 @@ int cifs_tree_connect(const unsigned int
+               goto out;
+       }
+ 
+-      rc = dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl);
+-      if (rc)
+-              goto out;
+       isroot = ref.server_type == DFS_TYPE_ROOT;
+       free_dfs_info_param(&ref);
+ 
diff --git a/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch b/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch

new file mode 100644 (file)

index 0000000..7b045f2
--- /dev/null
+++ b/queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch
@@ -0,0 +1,232 @@
+From 506c1da44fee32ba1d3a70413289ad58c772bba6 Mon Sep 17 00:00:00 2001
+From: Shyam Prasad N <sprasad@microsoft.com>
+Date: Tue, 18 May 2021 15:05:50 +0000
+Subject: cifs: use the expiry output of dns_query to schedule next resolution
+
+From: Shyam Prasad N <sprasad@microsoft.com>
+
+commit 506c1da44fee32ba1d3a70413289ad58c772bba6 upstream.
+
+We recently fixed DNS resolution of the server hostname during reconnect.
+However, server IP address may change, even when the old one continues
+to server (although sub-optimally).
+
+We should schedule the next DNS resolution based on the TTL of
+the DNS record used for the last resolution. This way, we resolve the
+server hostname again when a DNS record expires.
+
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Cc: <stable@vger.kernel.org> # v5.11+
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/cifs_dfs_ref.c |    2 -
+ fs/cifs/cifsglob.h     |    4 +++
+ fs/cifs/connect.c      |   55 ++++++++++++++++++++++++++++++++++++++++++++++---
+ fs/cifs/dns_resolve.c  |   10 +++++---
+ fs/cifs/dns_resolve.h  |    2 -
+ fs/cifs/misc.c         |    2 -
+ 6 files changed, 65 insertions(+), 10 deletions(-)
+
+--- a/fs/cifs/cifs_dfs_ref.c
++++ b/fs/cifs/cifs_dfs_ref.c
+@@ -173,7 +173,7 @@ char *cifs_compose_mount_options(const c
+               }
+       }
+ 
+-      rc = dns_resolve_server_name_to_ip(name, &srvIP);
++      rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL);
+       if (rc < 0) {
+               cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n",
+                        __func__, name, rc);
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -84,6 +84,9 @@
+ #define SMB_ECHO_INTERVAL_MAX 600
+ #define SMB_ECHO_INTERVAL_DEFAULT 60
+ 
++/* dns resolution interval in seconds */
++#define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
++
+ /* maximum number of PDUs in one compound */
+ #define MAX_COMPOUND 5
+ 
+@@ -654,6 +657,7 @@ struct TCP_Server_Info {
+       /* point to the SMBD connection if RDMA is used instead of socket */
+       struct smbd_connection *smbd_conn;
+       struct delayed_work     echo; /* echo ping workqueue job */
++      struct delayed_work     resolve; /* dns resolution workqueue job */
+       char    *smallbuf;      /* pointer to current "small" buffer */
+       char    *bigbuf;        /* pointer to current "big" buffer */
+       /* Total size of this PDU. Only valid from cifs_demultiplex_thread */
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -90,6 +90,8 @@ static int reconn_set_ipaddr_from_hostna
+       int rc;
+       int len;
+       char *unc, *ipaddr = NULL;
++      time64_t expiry, now;
++      unsigned long ttl = SMB_DNS_RESOLVE_INTERVAL_DEFAULT;
+ 
+       if (!server->hostname)
+               return -EINVAL;
+@@ -103,13 +105,13 @@ static int reconn_set_ipaddr_from_hostna
+       }
+       scnprintf(unc, len, "\\\\%s", server->hostname);
+ 
+-      rc = dns_resolve_server_name_to_ip(unc, &ipaddr);
++      rc = dns_resolve_server_name_to_ip(unc, &ipaddr, &expiry);
+       kfree(unc);
+ 
+       if (rc < 0) {
+               cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n",
+                        __func__, server->hostname, rc);
+-              return rc;
++              goto requeue_resolve;
+       }
+ 
+       spin_lock(&cifs_tcp_ses_lock);
+@@ -118,7 +120,45 @@ static int reconn_set_ipaddr_from_hostna
+       spin_unlock(&cifs_tcp_ses_lock);
+       kfree(ipaddr);
+ 
+-      return !rc ? -1 : 0;
++      /* rc == 1 means success here */
++      if (rc) {
++              now = ktime_get_real_seconds();
++              if (expiry && expiry > now)
++                      /*
++                       * To make sure we don't use the cached entry, retry 1s
++                       * after expiry.
++                       */
++                      ttl = (expiry - now + 1);
++      }
++      rc = !rc ? -1 : 0;
++
++requeue_resolve:
++      cifs_dbg(FYI, "%s: next dns resolution scheduled for %lu seconds in the future\n",
++               __func__, ttl);
++      mod_delayed_work(cifsiod_wq, &server->resolve, (ttl * HZ));
++
++      return rc;
++}
++
++
++static void cifs_resolve_server(struct work_struct *work)
++{
++      int rc;
++      struct TCP_Server_Info *server = container_of(work,
++                                      struct TCP_Server_Info, resolve.work);
++
++      mutex_lock(&server->srv_mutex);
++
++      /*
++       * Resolve the hostname again to make sure that IP address is up-to-date.
++       */
++      rc = reconn_set_ipaddr_from_hostname(server);
++      if (rc) {
++              cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
++                              __func__, rc);
++      }
++
++      mutex_unlock(&server->srv_mutex);
+ }
+ 
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+@@ -698,6 +738,7 @@ static void clean_demultiplex_info(struc
+       spin_unlock(&cifs_tcp_ses_lock);
+ 
+       cancel_delayed_work_sync(&server->echo);
++      cancel_delayed_work_sync(&server->resolve);
+ 
+       spin_lock(&GlobalMid_Lock);
+       server->tcpStatus = CifsExiting;
+@@ -1278,6 +1319,7 @@ cifs_put_tcp_session(struct TCP_Server_I
+       spin_unlock(&cifs_tcp_ses_lock);
+ 
+       cancel_delayed_work_sync(&server->echo);
++      cancel_delayed_work_sync(&server->resolve);
+ 
+       if (from_reconnect)
+               /*
+@@ -1360,6 +1402,7 @@ cifs_get_tcp_session(struct smb3_fs_cont
+       INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
+       INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
+       INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
++      INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server);
+       INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
+       mutex_init(&tcp_ses->reconnect_mutex);
+       memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
+@@ -1440,6 +1483,12 @@ smbd_connected:
+       /* queue echo request delayed work */
+       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
+ 
++      /* queue dns resolution delayed work */
++      cifs_dbg(FYI, "%s: next dns resolution scheduled for %d seconds in the future\n",
++               __func__, SMB_DNS_RESOLVE_INTERVAL_DEFAULT);
++
++      queue_delayed_work(cifsiod_wq, &tcp_ses->resolve, (SMB_DNS_RESOLVE_INTERVAL_DEFAULT * HZ));
++
+       return tcp_ses;
+ 
+ out_err_crypto_release:
+--- a/fs/cifs/dns_resolve.c
++++ b/fs/cifs/dns_resolve.c
+@@ -36,6 +36,7 @@
+  * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
+  * @unc: UNC path specifying the server (with '/' as delimiter)
+  * @ip_addr: Where to return the IP address.
++ * @expiry: Where to return the expiry time for the dns record.
+  *
+  * The IP address will be returned in string form, and the caller is
+  * responsible for freeing it.
+@@ -43,7 +44,7 @@
+  * Returns length of result on success, -ve on error.
+  */
+ int
+-dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
++dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry)
+ {
+       struct sockaddr_storage ss;
+       const char *hostname, *sep;
+@@ -78,13 +79,14 @@ dns_resolve_server_name_to_ip(const char
+ 
+       /* Perform the upcall */
+       rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len,
+-                     NULL, ip_addr, NULL, false);
++                     NULL, ip_addr, expiry, false);
+       if (rc < 0)
+               cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
+                        __func__, len, len, hostname);
+       else
+-              cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n",
+-                       __func__, len, len, hostname, *ip_addr);
++              cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n",
++                       __func__, len, len, hostname, *ip_addr,
++                       expiry ? (*expiry) : 0);
+       return rc;
+ 
+ name_is_IP_address:
+--- a/fs/cifs/dns_resolve.h
++++ b/fs/cifs/dns_resolve.h
+@@ -24,7 +24,7 @@
+ #define _DNS_RESOLVE_H
+ 
+ #ifdef __KERNEL__
+-extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
++extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry);
+ #endif /* KERNEL */
+ 
+ #endif /* _DNS_RESOLVE_H */
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -1199,7 +1199,7 @@ int match_target_ip(struct TCP_Server_In
+ 
+       cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2);
+ 
+-      rc = dns_resolve_server_name_to_ip(target, &tip);
++      rc = dns_resolve_server_name_to_ip(target, &tip, NULL);
+       if (rc < 0)
+               goto out;
+ 
diff --git a/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch b/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch

new file mode 100644 (file)

index 0000000..2bceab2
--- /dev/null
+++ b/queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch
@@ -0,0 +1,128 @@
+From 23fa2e46a5556f787ce2ea1a315d3ab93cced204 Mon Sep 17 00:00:00 2001
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+Date: Sat, 26 Jun 2021 15:03:04 +0800
+Subject: KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio
+
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+
+commit 23fa2e46a5556f787ce2ea1a315d3ab93cced204 upstream.
+
+BUG: KASAN: use-after-free in kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183
+Read of size 8 at addr ffff0000c03a2500 by task syz-executor083/4269
+
+CPU: 5 PID: 4269 Comm: syz-executor083 Not tainted 5.10.0 #7
+Hardware name: linux,dummy-virt (DT)
+Call trace:
+ dump_backtrace+0x0/0x2d0 arch/arm64/kernel/stacktrace.c:132
+ show_stack+0x28/0x34 arch/arm64/kernel/stacktrace.c:196
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x110/0x164 lib/dump_stack.c:118
+ print_address_description+0x78/0x5c8 mm/kasan/report.c:385
+ __kasan_report mm/kasan/report.c:545 [inline]
+ kasan_report+0x148/0x1e4 mm/kasan/report.c:562
+ check_memory_region_inline mm/kasan/generic.c:183 [inline]
+ __asan_load8+0xb4/0xbc mm/kasan/generic.c:252
+ kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183
+ kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755
+ vfs_ioctl fs/ioctl.c:48 [inline]
+ __do_sys_ioctl fs/ioctl.c:753 [inline]
+ __se_sys_ioctl fs/ioctl.c:739 [inline]
+ __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739
+ __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline]
+ invoke_syscall arch/arm64/kernel/syscall.c:48 [inline]
+ el0_svc_common arch/arm64/kernel/syscall.c:158 [inline]
+ do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220
+ el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367
+ el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383
+ el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670
+
+Allocated by task 4269:
+ stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track mm/kasan/common.c:56 [inline]
+ __kasan_kmalloc+0xdc/0x120 mm/kasan/common.c:461
+ kasan_kmalloc+0xc/0x14 mm/kasan/common.c:475
+ kmem_cache_alloc_trace include/linux/slab.h:450 [inline]
+ kmalloc include/linux/slab.h:552 [inline]
+ kzalloc include/linux/slab.h:664 [inline]
+ kvm_vm_ioctl_register_coalesced_mmio+0x78/0x1cc arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:146
+ kvm_vm_ioctl+0x7e8/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3746
+ vfs_ioctl fs/ioctl.c:48 [inline]
+ __do_sys_ioctl fs/ioctl.c:753 [inline]
+ __se_sys_ioctl fs/ioctl.c:739 [inline]
+ __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739
+ __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline]
+ invoke_syscall arch/arm64/kernel/syscall.c:48 [inline]
+ el0_svc_common arch/arm64/kernel/syscall.c:158 [inline]
+ do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220
+ el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367
+ el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383
+ el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670
+
+Freed by task 4269:
+ stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track+0x38/0x6c mm/kasan/common.c:56
+ kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:355
+ __kasan_slab_free+0x124/0x150 mm/kasan/common.c:422
+ kasan_slab_free+0x10/0x1c mm/kasan/common.c:431
+ slab_free_hook mm/slub.c:1544 [inline]
+ slab_free_freelist_hook mm/slub.c:1577 [inline]
+ slab_free mm/slub.c:3142 [inline]
+ kfree+0x104/0x38c mm/slub.c:4124
+ coalesced_mmio_destructor+0x94/0xa4 arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:102
+ kvm_iodevice_destructor include/kvm/iodev.h:61 [inline]
+ kvm_io_bus_unregister_dev+0x248/0x280 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:4374
+ kvm_vm_ioctl_unregister_coalesced_mmio+0x158/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:186
+ kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755
+ vfs_ioctl fs/ioctl.c:48 [inline]
+ __do_sys_ioctl fs/ioctl.c:753 [inline]
+ __se_sys_ioctl fs/ioctl.c:739 [inline]
+ __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739
+ __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline]
+ invoke_syscall arch/arm64/kernel/syscall.c:48 [inline]
+ el0_svc_common arch/arm64/kernel/syscall.c:158 [inline]
+ do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220
+ el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367
+ el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383
+ el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670
+
+If kvm_io_bus_unregister_dev() return -ENOMEM, we already call kvm_iodevice_destructor()
+inside this function to delete 'struct kvm_coalesced_mmio_dev *dev' from list
+and free the dev, but kvm_iodevice_destructor() is called again, it will lead
+the above issue.
+
+Let's check the the return value of kvm_io_bus_unregister_dev(), only call
+kvm_iodevice_destructor() if the return value is 0.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: kvm@vger.kernel.org
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Message-Id: <20210626070304.143456-1-wangkefeng.wang@huawei.com>
+Cc: stable@vger.kernel.org
+Fixes: 5d3c4c79384a ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed", 2021-04-20)
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/coalesced_mmio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/virt/kvm/coalesced_mmio.c
++++ b/virt/kvm/coalesced_mmio.c
+@@ -186,7 +186,6 @@ int kvm_vm_ioctl_unregister_coalesced_mm
+                   coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
+                       r = kvm_io_bus_unregister_dev(kvm,
+                               zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
+-                      kvm_iodevice_destructor(&dev->dev);
+ 
+                       /*
+                        * On failure, unregister destroys all devices on the
+@@ -196,6 +195,7 @@ int kvm_vm_ioctl_unregister_coalesced_mm
+                        */
+                       if (r)
+                               break;
++                      kvm_iodevice_destructor(&dev->dev);
+               }
+       }
+ 
diff --git a/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch b/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch

new file mode 100644 (file)

index 0000000..20f3d62
--- /dev/null
+++ b/queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch
@@ -0,0 +1,46 @@
+From fce7e152ffc8f89d02a80617b16c7aa1527847c8 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 28 Jun 2021 12:44:20 +0200
+Subject: KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit fce7e152ffc8f89d02a80617b16c7aa1527847c8 upstream.
+
+APM states that #GP is raised upon write to MSR_VM_HSAVE_PA when
+the supplied address is not page-aligned or is outside of "maximum
+supported physical address for this implementation".
+page_address_valid() check seems suitable. Also, forcefully page-align
+the address when it's written from VMM.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20210628104425.391276-2-vkuznets@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+[Add comment about behavior for host-provided values. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2915,7 +2915,16 @@ static int svm_set_msr(struct kvm_vcpu *
+                       svm_disable_lbrv(vcpu);
+               break;
+       case MSR_VM_HSAVE_PA:
+-              svm->nested.hsave_msr = data;
++              /*
++               * Old kernels did not validate the value written to
++               * MSR_VM_HSAVE_PA.  Allow KVM_SET_MSR to set an invalid
++               * value to allow live migrating buggy or malicious guests
++               * originating from those kernels.
++               */
++              if (!msr->host_initiated && !page_address_valid(vcpu, data))
++                      return 1;
++
++              svm->nested.hsave_msr = data & PAGE_MASK;
+               break;
+       case MSR_VM_CR:
+               return svm_set_vm_cr(vcpu, data);
diff --git a/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch b/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch

new file mode 100644 (file)

index 0000000..e937dfb
--- /dev/null
+++ b/queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch
@@ -0,0 +1,44 @@
+From cd4220d23bf3f43cf720e82bdee681f383433ae2 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 1 Jul 2021 17:42:24 +0200
+Subject: KVM: selftests: do not require 64GB in set_memory_region_test
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit cd4220d23bf3f43cf720e82bdee681f383433ae2 upstream.
+
+Unless the user sets overcommit_memory or has plenty of swap, the latest
+changes to the testcase will result in ENOMEM failures for hosts with
+less than 64GB RAM. As we do not use much of the allocated memory, we
+can use MAP_NORESERVE to avoid this error.
+
+Cc: Zenghui Yu <yuzenghui@huawei.com>
+Cc: vkuznets@redhat.com
+Cc: wanghaibin.wang@huawei.com
+Cc: stable@vger.kernel.org
+Fixes: 309505dd5685 ("KVM: selftests: Fix mapping length truncation in m{,un}map()")
+Tested-by: Zenghui Yu <yuzenghui@huawei.com>
+Link: https://lore.kernel.org/kvm/20210701160425.33666-1-borntraeger@de.ibm.com/
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/kvm/set_memory_region_test.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
+index d8812f27648c..d31f54ac4e98 100644
+--- a/tools/testing/selftests/kvm/set_memory_region_test.c
++++ b/tools/testing/selftests/kvm/set_memory_region_test.c
+@@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void)
+               (max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+ 
+       mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+-                 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
++                 PROT_READ | PROT_WRITE,
++                 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+       TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+       mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
+ 
+-- 
+2.32.0
+
diff --git a/queue-5.13/kvm-svm-remove-init-intercept-handler.patch b/queue-5.13/kvm-svm-remove-init-intercept-handler.patch

new file mode 100644 (file)

index 0000000..bf8e6c2
--- /dev/null
+++ b/queue-5.13/kvm-svm-remove-init-intercept-handler.patch
@@ -0,0 +1,39 @@
+From 896707c212d440a6863ce0a3930c8a609e24497d Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 7 Jul 2021 15:50:59 +0300
+Subject: KVM: SVM: remove INIT intercept handler
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 896707c212d440a6863ce0a3930c8a609e24497d upstream.
+
+Kernel never sends real INIT even to CPUs, other than on boot.
+
+Thus INIT interception is an error which should be caught
+by a check for an unknown VMexit reason.
+
+On top of that, the current INIT VM exit handler skips
+the current instruction which is wrong.
+That was added in commit 5ff3a351f687 ("KVM: x86: Move trivial
+instruction-based exit handlers to common code").
+
+Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code")
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210707125100.677203-3-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3069,7 +3069,6 @@ static int (*const svm_exit_handlers[])(
+       [SVM_EXIT_INTR]                         = intr_interception,
+       [SVM_EXIT_NMI]                          = nmi_interception,
+       [SVM_EXIT_SMI]                          = smi_interception,
+-      [SVM_EXIT_INIT]                         = kvm_emulate_as_nop,
+       [SVM_EXIT_VINTR]                        = interrupt_window_interception,
+       [SVM_EXIT_RDPMC]                        = kvm_emulate_rdpmc,
+       [SVM_EXIT_CPUID]                        = kvm_emulate_cpuid,
diff --git a/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch b/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch

new file mode 100644 (file)

index 0000000..947a633
--- /dev/null
+++ b/queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch
@@ -0,0 +1,60 @@
+From 76ff371b67cb12fb635396234468abcf6a466f16 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 24 Jun 2021 19:03:54 -0700
+Subject: KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 76ff371b67cb12fb635396234468abcf6a466f16 upstream.
+
+Don't clear the C-bit in the #NPF handler, as it is a legal GPA bit for
+non-SEV guests, and for SEV guests the C-bit is dropped before the GPA
+hits the NPT in hardware.  Clearing the bit for non-SEV guests causes KVM
+to mishandle #NPFs with that collide with the host's C-bit.
+
+Although the APM doesn't explicitly state that the C-bit is not reserved
+for non-SEV, Tom Lendacky confirmed that the following snippet about the
+effective reduction due to the C-bit does indeed apply only to SEV guests.
+
+  Note that because guest physical addresses are always translated
+  through the nested page tables, the size of the guest physical address
+  space is not impacted by any physical address space reduction indicated
+  in CPUID 8000_001F[EBX]. If the C-bit is a physical address bit however,
+  the guest physical address space is effectively reduced by 1 bit.
+
+And for SEV guests, the APM clearly states that the bit is dropped before
+walking the nested page tables.
+
+  If the C-bit is an address bit, this bit is masked from the guest
+  physical address when it is translated through the nested page tables.
+  Consequently, the hypervisor does not need to be aware of which pages
+  the guest has chosen to mark private.
+
+Note, the bogus C-bit clearing was removed from legacy #PF handler in
+commit 6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF
+interception").
+
+Fixes: 0ede79e13224 ("KVM: SVM: Clear C-bit from the page fault address")
+Cc: Peter Gonda <pgonda@google.com>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210625020354.431829-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1897,7 +1897,7 @@ static int npf_interception(struct kvm_v
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+ 
+-      u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
++      u64 fault_address = svm->vmcb->control.exit_info_2;
+       u64 error_code = svm->vmcb->control.exit_info_1;
+ 
+       trace_kvm_page_fault(fault_address, error_code);
diff --git a/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch b/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch

new file mode 100644 (file)

index 0000000..1965c93
--- /dev/null
+++ b/queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch
@@ -0,0 +1,51 @@
+From 991afbbee8ac93b055a27477278a5fb556af1ff4 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 7 Jul 2021 15:50:58 +0300
+Subject: KVM: SVM: #SMI interception must not skip the instruction
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 991afbbee8ac93b055a27477278a5fb556af1ff4 upstream.
+
+Commit 5ff3a351f687 ("KVM: x86: Move trivial instruction-based
+exit handlers to common code"), unfortunately made a mistake of
+treating nop_on_interception and nop_interception in the same way.
+
+Former does truly nothing while the latter skips the instruction.
+
+SMI VM exit handler should do nothing.
+(SMI itself is handled by the host when we do STGI)
+
+Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code")
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20210707125100.677203-2-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2080,6 +2080,11 @@ static int nmi_interception(struct kvm_v
+       return 1;
+ }
+ 
++static int smi_interception(struct kvm_vcpu *vcpu)
++{
++      return 1;
++}
++
+ static int intr_interception(struct kvm_vcpu *vcpu)
+ {
+       ++vcpu->stat.irq_exits;
+@@ -3063,7 +3068,7 @@ static int (*const svm_exit_handlers[])(
+       [SVM_EXIT_EXCP_BASE + GP_VECTOR]        = gp_interception,
+       [SVM_EXIT_INTR]                         = intr_interception,
+       [SVM_EXIT_NMI]                          = nmi_interception,
+-      [SVM_EXIT_SMI]                          = kvm_emulate_as_nop,
++      [SVM_EXIT_SMI]                          = smi_interception,
+       [SVM_EXIT_INIT]                         = kvm_emulate_as_nop,
+       [SVM_EXIT_VINTR]                        = interrupt_window_interception,
+       [SVM_EXIT_RDPMC]                        = kvm_emulate_rdpmc,
diff --git a/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch b/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch

new file mode 100644 (file)

index 0000000..9c34907
--- /dev/null
+++ b/queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch
@@ -0,0 +1,49 @@
+From f85d40160691881a17a397c448d799dfc90987ba Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+Date: Tue, 29 Jun 2021 01:26:32 +0800
+Subject: KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run()
+
+From: Lai Jiangshan <laijs@linux.alibaba.com>
+
+commit f85d40160691881a17a397c448d799dfc90987ba upstream.
+
+When the host is using debug registers but the guest is not using them
+nor is the guest in guest-debug state, the kvm code does not reset
+the host debug registers before kvm_x86->run().  Rather, it relies on
+the hardware vmentry instruction to automatically reset the dr7 registers
+which ensures that the host breakpoints do not affect the guest.
+
+This however violates the non-instrumentable nature around VM entry
+and exit; for example, when a host breakpoint is set on vcpu->arch.cr2,
+
+Another issue is consistency.  When the guest debug registers are active,
+the host breakpoints are reset before kvm_x86->run(). But when the
+guest debug registers are inactive, the host breakpoints are delayed to
+be disabled.  The host tracing tools may see different results depending
+on what the guest is doing.
+
+To fix the problems, we clear %db7 unconditionally before kvm_x86->run()
+if the host has set any breakpoints, no matter if the guest is using
+them or not.
+
+Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
+Message-Id: <20210628172632.81029-1-jiangshanlai@gmail.com>
+Cc: stable@vger.kernel.org
+[Only clear %db7 instead of reloading all debug registers. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9347,6 +9347,8 @@ static int vcpu_enter_guest(struct kvm_v
+               set_debugreg(vcpu->arch.eff_db[3], 3);
+               set_debugreg(vcpu->arch.dr6, 6);
+               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
++      } else if (unlikely(hw_breakpoint_active())) {
++              set_debugreg(0, 7);
+       }
+ 
+       for (;;) {
diff --git a/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch b/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch

new file mode 100644 (file)

index 0000000..a1277b9
--- /dev/null
+++ b/queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch
@@ -0,0 +1,111 @@
+From fc9bf2e087efcd81bda2e52d09616d2a1bf982a8 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 23 Jun 2021 16:05:49 -0700
+Subject: KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit fc9bf2e087efcd81bda2e52d09616d2a1bf982a8 upstream.
+
+Ignore "dynamic" host adjustments to the physical address mask when
+generating the masks for guest PTEs, i.e. the guest PA masks.  The host
+physical address space and guest physical address space are two different
+beasts, e.g. even though SEV's C-bit is the same bit location for both
+host and guest, disabling SME in the host (which clears shadow_me_mask)
+does not affect the guest PTE->GPA "translation".
+
+For non-SEV guests, not dropping bits is the correct behavior.  Assuming
+KVM and userspace correctly enumerate/configure guest MAXPHYADDR, bits
+that are lost as collateral damage from memory encryption are treated as
+reserved bits, i.e. KVM will never get to the point where it attempts to
+generate a gfn using the affected bits.  And if userspace wants to create
+a bogus vCPU, then userspace gets to deal with the fallout of hardware
+doing odd things with bad GPAs.
+
+For SEV guests, not dropping the C-bit is technically wrong, but it's a
+moot point because KVM can't read SEV guest's page tables in any case
+since they're always encrypted.  Not to mention that the current KVM code
+is also broken since sme_me_mask does not have to be non-zero for SEV to
+be supported by KVM.  The proper fix would be to teach all of KVM to
+correctly handle guest private memory, but that's a task for the future.
+
+Fixes: d0ec49d4de90 ("kvm/x86/svm: Support Secure Memory Encryption within KVM")
+Cc: stable@vger.kernel.org
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210623230552.4027702-5-seanjc@google.com>
+[Use a new header instead of adding header guards to paging_tmpl.h. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c         |    2 ++
+ arch/x86/kvm/mmu/paging.h      |   14 ++++++++++++++
+ arch/x86/kvm/mmu/paging_tmpl.h |    4 ++--
+ arch/x86/kvm/mmu/spte.h        |    6 ------
+ 4 files changed, 18 insertions(+), 8 deletions(-)
+ create mode 100644 arch/x86/kvm/mmu/paging.h
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -53,6 +53,8 @@
+ #include <asm/kvm_page_track.h>
+ #include "trace.h"
+ 
++#include "paging.h"
++
+ extern bool itlb_multihit_kvm_mitigation;
+ 
+ static int __read_mostly nx_huge_pages = -1;
+--- /dev/null
++++ b/arch/x86/kvm/mmu/paging.h
+@@ -0,0 +1,14 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/* Shadow paging constants/helpers that don't need to be #undef'd. */
++#ifndef __KVM_X86_PAGING_H
++#define __KVM_X86_PAGING_H
++
++#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
++#define PT64_LVL_ADDR_MASK(level) \
++      (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
++                                              * PT64_LEVEL_BITS))) - 1))
++#define PT64_LVL_OFFSET_MASK(level) \
++      (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
++                                              * PT64_LEVEL_BITS))) - 1))
++#endif /* __KVM_X86_PAGING_H */
++
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -24,7 +24,7 @@
+       #define pt_element_t u64
+       #define guest_walker guest_walker64
+       #define FNAME(name) paging##64_##name
+-      #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
++      #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
+       #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+       #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+       #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+@@ -57,7 +57,7 @@
+       #define pt_element_t u64
+       #define guest_walker guest_walkerEPT
+       #define FNAME(name) ept_##name
+-      #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
++      #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
+       #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+       #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+       #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+--- a/arch/x86/kvm/mmu/spte.h
++++ b/arch/x86/kvm/mmu/spte.h
+@@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK =
+ #else
+ #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+ #endif
+-#define PT64_LVL_ADDR_MASK(level) \
+-      (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+-                                              * PT64_LEVEL_BITS))) - 1))
+-#define PT64_LVL_OFFSET_MASK(level) \
+-      (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+-                                              * PT64_LEVEL_BITS))) - 1))
+ 
+ #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
+                       | shadow_x_mask | shadow_nx_mask | shadow_me_mask)
diff --git a/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch b/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch

new file mode 100644 (file)

index 0000000..74315d2
--- /dev/null
+++ b/queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch
@@ -0,0 +1,44 @@
+From 4bf48e3c0aafd32b960d341c4925b48f416f14a5 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 23 Jun 2021 16:05:46 -0700
+Subject: KVM: x86: Use guest MAXPHYADDR from CPUID.0x8000_0008 iff TDP is enabled
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 4bf48e3c0aafd32b960d341c4925b48f416f14a5 upstream.
+
+Ignore the guest MAXPHYADDR reported by CPUID.0x8000_0008 if TDP, i.e.
+NPT, is disabled, and instead use the host's MAXPHYADDR.  Per AMD'S APM:
+
+  Maximum guest physical address size in bits. This number applies only
+  to guests using nested paging. When this field is zero, refer to the
+  PhysAddrSize field for the maximum guest physical address size.
+
+Fixes: 24c82e576b78 ("KVM: Sanitize cpuid")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210623230552.4027702-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -940,8 +940,14 @@ static inline int __do_cpuid_func(struct
+               unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+               unsigned phys_as = entry->eax & 0xff;
+ 
+-              if (!g_phys_as)
++              /*
++               * Use bare metal's MAXPHADDR if the CPU doesn't report guest
++               * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the
++               * guest version "applies only to guests using nested paging".
++               */
++              if (!g_phys_as || !tdp_enabled)
+                       g_phys_as = phys_as;
++
+               entry->eax = g_phys_as | (virt_as << 8);
+               entry->edx = 0;
+               cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch b/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch

new file mode 100644 (file)

index 0000000..51faf2e
--- /dev/null
+++ b/queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch
@@ -0,0 +1,78 @@
+From e39f00f60ebd2e7b295c37a05e6349df656d3eb8 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 23 Jun 2021 16:05:47 -0700
+Subject: KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit e39f00f60ebd2e7b295c37a05e6349df656d3eb8 upstream.
+
+Use boot_cpu_data.x86_phys_bits instead of the raw CPUID information to
+enumerate the MAXPHYADDR for KVM guests when TDP is disabled (the guest
+version is only relevant to NPT/TDP).
+
+When using shadow paging, any reductions to the host's MAXPHYADDR apply
+to KVM and its guests as well, i.e. using the raw CPUID info will cause
+KVM to misreport the number of PA bits available to the guest.
+
+Unconditionally zero out the "Physical Address bit reduction" entry.
+For !TDP, the adjustment is already done, and for TDP enumerating the
+host's reduction is wrong as the reduction does not apply to GPAs.
+
+Fixes: 9af9b94068fb ("x86/cpu/AMD: Handle SME reduction in physical address size")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210623230552.4027702-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   27 ++++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -941,11 +941,18 @@ static inline int __do_cpuid_func(struct
+               unsigned phys_as = entry->eax & 0xff;
+ 
+               /*
+-               * Use bare metal's MAXPHADDR if the CPU doesn't report guest
+-               * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the
+-               * guest version "applies only to guests using nested paging".
++               * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
++               * the guest operates in the same PA space as the host, i.e.
++               * reductions in MAXPHYADDR for memory encryption affect shadow
++               * paging, too.
++               *
++               * If TDP is enabled but an explicit guest MAXPHYADDR is not
++               * provided, use the raw bare metal MAXPHYADDR as reductions to
++               * the HPAs do not affect GPAs.
+                */
+-              if (!g_phys_as || !tdp_enabled)
++              if (!tdp_enabled)
++                      g_phys_as = boot_cpu_data.x86_phys_bits;
++              else if (!g_phys_as)
+                       g_phys_as = phys_as;
+ 
+               entry->eax = g_phys_as | (virt_as << 8);
+@@ -970,12 +977,18 @@ static inline int __do_cpuid_func(struct
+       case 0x8000001a:
+       case 0x8000001e:
+               break;
+-      /* Support memory encryption cpuid if host supports it */
+       case 0x8000001F:
+-              if (!kvm_cpu_cap_has(X86_FEATURE_SEV))
++              if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
+                       entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+-              else
++              } else {
+                       cpuid_entry_override(entry, CPUID_8000_001F_EAX);
++
++                      /*
++                       * Enumerate '0' for "PA bits reduction", the adjusted
++                       * MAXPHYADDR is enumerated directly (see 0x80000008).
++                       */
++                      entry->ebx &= ~GENMASK(11, 6);
++              }
+               break;
+       /*Add support for Centaur's CPUID instruction*/
+       case 0xC0000000:
diff --git a/queue-5.13/series b/queue-5.13/series

new file mode 100644 (file)

index 0000000..2799920
--- /dev/null
+++ b/queue-5.13/series
@@ -0,0 +1,13 @@
+cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch
+cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch
+cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch
+kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch
+kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch
+kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch
+kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch
+kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch
+kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch
+kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch
+kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch
+kvm-svm-smi-interception-must-not-skip-the-instruction.patch
+kvm-svm-remove-init-intercept-handler.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 19 Jul 2021 08:56:42 +0000 (10:56 +0200)
queue-5.13/cifs-do-not-use-the-original-cruid-when-following-dfs-links-for-multiuser-mounts.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/cifs-handle-reconnect-of-tcon-when-there-is-no-cached-dfs-referral.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/cifs-use-the-expiry-output-of-dns_query-to-schedule-next-resolution.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-mmio-fix-use-after-free-read-in-kvm_vm_ioctl_unregister_coalesced_mmio.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-nsvm-check-the-value-written-to-msr_vm_hsave_pa.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-selftests-do-not-require-64gb-in-set_memory_region_test.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-svm-remove-init-intercept-handler.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-svm-revert-clearing-of-c-bit-on-gpa-in-npf-handler.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-svm-smi-interception-must-not-skip-the-instruction.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-x86-disable-hardware-breakpoints-unconditionally-before-kvm_x86-run.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-x86-mmu-do-not-apply-hpa-memory-encryption-mask-to-gpas.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-x86-use-guest-maxphyaddr-from-cpuid.0x8000_0008-iff-tdp-is-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/kvm-x86-use-kernel-s-x86_phys_bits-to-handle-reduced-maxphyaddr.patch	[new file with mode: 0644]	patch \| blob
queue-5.13/series	[new file with mode: 0644]	patch \| blob