5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)
diff --git a/queue-5.10/kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch b/queue-5.10/kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch

new file mode 100644 (file)

index 0000000..dd6e09c
--- /dev/null
+++ b/queue-5.10/kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch
@@ -0,0 +1,92 @@
+From 934002cd660b035b926438244b4294e647507e13 Mon Sep 17 00:00:00 2001
+From: Alper Gun <alpergun@google.com>
+Date: Thu, 10 Jun 2021 17:46:04 +0000
+Subject: KVM: SVM: Call SEV Guest Decommission if ASID binding fails
+
+From: Alper Gun <alpergun@google.com>
+
+commit 934002cd660b035b926438244b4294e647507e13 upstream.
+
+Send SEV_CMD_DECOMMISSION command to PSP firmware if ASID binding
+fails. If a failure happens after  a successful LAUNCH_START command,
+a decommission command should be executed. Otherwise, guest context
+will be unfreed inside the AMD SP. After the firmware will not have
+memory to allocate more SEV guest context, LAUNCH_START command will
+begin to fail with SEV_RET_RESOURCE_LIMIT error.
+
+The existing code calls decommission inside sev_unbind_asid, but it is
+not called if a failure happens before guest activation succeeds. If
+sev_bind_asid fails, decommission is never called. PSP firmware has a
+limit for the number of guests. If sev_asid_binding fails many times,
+PSP firmware will not have resources to create another guest context.
+
+Cc: stable@vger.kernel.org
+Fixes: 59414c989220 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START command")
+Reported-by: Peter Gonda <pgonda@google.com>
+Signed-off-by: Alper Gun <alpergun@google.com>
+Reviewed-by: Marc Orr <marcorr@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <20210610174604.2554090-1-alpergun@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |   32 +++++++++++++++++++++-----------
+ 1 file changed, 21 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -130,9 +130,25 @@ static void sev_asid_free(int asid)
+       mutex_unlock(&sev_bitmap_lock);
+ }
+ 
+-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
++static void sev_decommission(unsigned int handle)
+ {
+       struct sev_data_decommission *decommission;
++
++      if (!handle)
++              return;
++
++      decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
++      if (!decommission)
++              return;
++
++      decommission->handle = handle;
++      sev_guest_decommission(decommission, NULL);
++
++      kfree(decommission);
++}
++
++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
++{
+       struct sev_data_deactivate *data;
+ 
+       if (!handle)
+@@ -152,15 +168,7 @@ static void sev_unbind_asid(struct kvm *
+ 
+       kfree(data);
+ 
+-      decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+-      if (!decommission)
+-              return;
+-
+-      /* decommission handle */
+-      decommission->handle = handle;
+-      sev_guest_decommission(decommission, NULL);
+-
+-      kfree(decommission);
++      sev_decommission(handle);
+ }
+ 
+ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+@@ -288,8 +296,10 @@ static int sev_launch_start(struct kvm *
+ 
+       /* Bind ASID to this guest */
+       ret = sev_bind_asid(kvm, start->handle, error);
+-      if (ret)
++      if (ret) {
++              sev_decommission(start->handle);
+               goto e_free_session;
++      }
+ 
+       /* return handle to userspace */
+       params.handle = start->handle;
diff --git a/queue-5.10/netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch b/queue-5.10/netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch

new file mode 100644 (file)

index 0000000..c046320
--- /dev/null
+++ b/queue-5.10/netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch
@@ -0,0 +1,128 @@
+From 827a746f405d25f79560c7868474aec5aee174e1 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Sun, 13 Jun 2021 19:33:45 -0400
+Subject: netfs: fix test for whether we can skip read when writing beyond EOF
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 827a746f405d25f79560c7868474aec5aee174e1 upstream.
+
+It's not sufficient to skip reading when the pos is beyond the EOF.
+There may be data at the head of the page that we need to fill in
+before the write.
+
+Add a new helper function that corrects and clarifies the logic of
+when we can skip reads, and have it only zero out the part of the page
+that won't have data copied in for the write.
+
+Finally, don't set the page Uptodate after zeroing. It's not up to date
+since the write data won't have been copied in yet.
+
+[DH made the following changes:
+
+ - Prefixed the new function with "netfs_".
+
+ - Don't call zero_user_segments() for a full-page write.
+
+ - Altered the beyond-last-page check to avoid a DIV instruction and got
+   rid of then-redundant zero-length file check.
+]
+
+[ Note: this fix is commit 827a746f405d in mainline kernels. The
+       original bug was in ceph, but got lifted into the fs/netfs
+       library for v5.13. This backport should apply to stable
+       kernels v5.10 though v5.12. ]
+
+Fixes: e1b1240c1ff5f ("netfs: Add write_begin helper")
+Reported-by: Andrew W Elble <aweits@rit.edu>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+cc: ceph-devel@vger.kernel.org
+Link: https://lore.kernel.org/r/20210613233345.113565-1-jlayton@kernel.org/
+Link: https://lore.kernel.org/r/162367683365.460125.4467036947364047314.stgit@warthog.procyon.org.uk/ # v1
+Link: https://lore.kernel.org/r/162391826758.1173366.11794946719301590013.stgit@warthog.procyon.org.uk/ # v2
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/addr.c |   54 +++++++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 41 insertions(+), 13 deletions(-)
+
+--- a/fs/ceph/addr.c
++++ b/fs/ceph/addr.c
+@@ -1302,6 +1302,45 @@ ceph_find_incompatible(struct page *page
+       return NULL;
+ }
+ 
++/**
++ * prep_noread_page - prep a page for writing without reading first
++ * @page: page being prepared
++ * @pos: starting position for the write
++ * @len: length of write
++ *
++ * In some cases, write_begin doesn't need to read at all:
++ * - full page write
++ * - file is currently zero-length
++ * - write that lies in a page that is completely beyond EOF
++ * - write that covers the the page from start to EOF or beyond it
++ *
++ * If any of these criteria are met, then zero out the unwritten parts
++ * of the page and return true. Otherwise, return false.
++ */
++static bool skip_page_read(struct page *page, loff_t pos, size_t len)
++{
++      struct inode *inode = page->mapping->host;
++      loff_t i_size = i_size_read(inode);
++      size_t offset = offset_in_page(pos);
++
++      /* Full page write */
++      if (offset == 0 && len >= PAGE_SIZE)
++              return true;
++
++      /* pos beyond last page in the file */
++      if (pos - offset >= i_size)
++              goto zero_out;
++
++      /* write that covers the whole page from start to EOF or beyond it */
++      if (offset == 0 && (pos + len) >= i_size)
++              goto zero_out;
++
++      return false;
++zero_out:
++      zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
++      return true;
++}
++
+ /*
+  * We are only allowed to write into/dirty the page if the page is
+  * clean, or already dirty within the same snap context.
+@@ -1315,7 +1354,6 @@ static int ceph_write_begin(struct file
+       struct ceph_snap_context *snapc;
+       struct page *page = NULL;
+       pgoff_t index = pos >> PAGE_SHIFT;
+-      int pos_in_page = pos & ~PAGE_MASK;
+       int r = 0;
+ 
+       dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
+@@ -1350,19 +1388,9 @@ static int ceph_write_begin(struct file
+                       break;
+               }
+ 
+-              /*
+-               * In some cases we don't need to read at all:
+-               * - full page write
+-               * - write that lies completely beyond EOF
+-               * - write that covers the the page from start to EOF or beyond it
+-               */
+-              if ((pos_in_page == 0 && len == PAGE_SIZE) ||
+-                  (pos >= i_size_read(inode)) ||
+-                  (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
+-                      zero_user_segments(page, 0, pos_in_page,
+-                                         pos_in_page + len, PAGE_SIZE);
++              /* No need to read in some cases */
++              if (skip_page_read(page, pos, len))
+                       break;
+-              }
+ 
+               /*
+                * We need to read it. If we get back -EINPROGRESS, then the page was
diff --git a/queue-5.10/series b/queue-5.10/series

index f0fa781e3a17e4b89c15fee7648f239a84bef430..867e8ec7a9c674799ecb981ef100eefeeba8dfed 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -90,3 +90,6 @@ mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
  mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
  mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
  mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
+kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch
+swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch
+netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch
diff --git a/queue-5.10/swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch b/queue-5.10/swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch

new file mode 100644 (file)

index 0000000..94b6944
--- /dev/null
+++ b/queue-5.10/swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch
@@ -0,0 +1,124 @@
+From 5f89468e2f060031cd89fd4287298e0eaf246bf6 Mon Sep 17 00:00:00 2001
+From: Bumyong Lee <bumyong.lee@samsung.com>
+Date: Mon, 10 May 2021 18:10:04 +0900
+Subject: swiotlb: manipulate orig_addr when tlb_addr has offset
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Bumyong Lee <bumyong.lee@samsung.com>
+
+commit 5f89468e2f060031cd89fd4287298e0eaf246bf6 upstream.
+
+in case of driver wants to sync part of ranges with offset,
+swiotlb_tbl_sync_single() copies from orig_addr base to tlb_addr with
+offset and ends up with data mismatch.
+
+It was removed from
+"swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single",
+but said logic has to be added back in.
+
+From Linus's email:
+"That commit which the removed the offset calculation entirely, because the old
+
+        (unsigned long)tlb_addr & (IO_TLB_SIZE - 1)
+
+was wrong, but instead of removing it, I think it should have just
+fixed it to be
+
+        (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);
+
+instead. That way the slot offset always matches the slot index calculation."
+
+(Unfortunatly that broke NVMe).
+
+The use-case that drivers are hitting is as follow:
+
+1. Get dma_addr_t from dma_map_single()
+
+dma_addr_t tlb_addr = dma_map_single(dev, vaddr, vsize, DMA_TO_DEVICE);
+
+    |<---------------vsize------------->|
+    +-----------------------------------+
+    |                                   | original buffer
+    +-----------------------------------+
+  vaddr
+
+ swiotlb_align_offset
+     |<----->|<---------------vsize------------->|
+     +-------+-----------------------------------+
+     |       |                                   | swiotlb buffer
+     +-------+-----------------------------------+
+          tlb_addr
+
+2. Do something
+3. Sync dma_addr_t through dma_sync_single_for_device(..)
+
+dma_sync_single_for_device(dev, tlb_addr + offset, size, DMA_TO_DEVICE);
+
+  Error case.
+    Copy data to original buffer but it is from base addr (instead of
+  base addr + offset) in original buffer:
+
+ swiotlb_align_offset
+     |<----->|<- offset ->|<- size ->|
+     +-------+-----------------------------------+
+     |       |            |##########|           | swiotlb buffer
+     +-------+-----------------------------------+
+          tlb_addr
+
+    |<- size ->|
+    +-----------------------------------+
+    |##########|                        | original buffer
+    +-----------------------------------+
+  vaddr
+
+The fix is to copy the data to the original buffer and take into
+account the offset, like so:
+
+ swiotlb_align_offset
+     |<----->|<- offset ->|<- size ->|
+     +-------+-----------------------------------+
+     |       |            |##########|           | swiotlb buffer
+     +-------+-----------------------------------+
+          tlb_addr
+
+    |<- offset ->|<- size ->|
+    +-----------------------------------+
+    |            |##########|           | original buffer
+    +-----------------------------------+
+  vaddr
+
+[One fix which was Linus's that made more sense to as it created a
+symmetry would break NVMe. The reason for that is the:
+ unsigned int offset = (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);
+
+would come up with the proper offset, but it would lose the
+alignment (which this patch contains).]
+
+Fixes: 16fc3cef33a0 ("swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single")
+Signed-off-by: Bumyong Lee <bumyong.lee@samsung.com>
+Signed-off-by: Chanho Park <chanho61.park@samsung.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reported-by: Dominique MARTINET <dominique.martinet@atmark-techno.com>
+Reported-by: Horia Geantă <horia.geanta@nxp.com>
+Tested-by: Horia Geantă <horia.geanta@nxp.com>
+CC: stable@vger.kernel.org
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -667,6 +667,9 @@ void swiotlb_tbl_sync_single(struct devi
+       if (orig_addr == INVALID_PHYS_ADDR)
+               return;
+ 
++      orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) -
++              swiotlb_align_offset(hwdev, orig_addr);
++
+       switch (target) {
+       case SYNC_FOR_CPU:
+               if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Jun 2021 12:22:03 +0000 (14:22 +0200)
queue-5.10/kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch	[new file with mode: 0644]	patch \| blob