From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 5 May 2021 07:27:31 +0000 (+0200)
Subject: 5.11-stable patches
X-Git-Tag: v4.19.190~20
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=24d36bc1cef314a6b4f35de402987718cf797058;p=thirdparty%2Fkernel%2Fstable-queue.git

5.11-stable patches

added patches:
	driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch
	nvme-pci-set-min_align_mask.patch
	swiotlb-add-a-io_tlb_size-define.patch
	swiotlb-clean-up-swiotlb_tbl_unmap_single.patch
	swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch
	swiotlb-factor-out-a-nr_slots-helper.patch
	swiotlb-factor-out-an-io_tlb_offset-helper.patch
	swiotlb-refactor-swiotlb_tbl_map_single.patch
	swiotlb-respect-min_align_mask.patch
---

diff --git a/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch b/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch
new file mode 100644
index 00000000000..00fa2e23467
--- /dev/null
+++ b/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch
@@ -0,0 +1,63 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:07 +0000
+Subject: driver core: add a min_align_mask field to struct device_dma_parameters
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-2-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 36950f2da1ea4cb683be174f6f581e25b2d33e71
+
+Some devices rely on the address offset in a page to function
+correctly (NVMe driver as an example). These devices may use
+a different page size than the Linux kernel. The address offset
+has to be preserved upon mapping, and in order to do so, we
+need to record the page_offset_mask first.
+
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/device.h      |    1 +
+ include/linux/dma-mapping.h |   16 ++++++++++++++++
+ 2 files changed, 17 insertions(+)
+
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -291,6 +291,7 @@ struct device_dma_parameters {
+ 	 * sg limitations.
+ 	 */
+ 	unsigned int max_segment_size;
++	unsigned int min_align_mask;
+ 	unsigned long segment_boundary_mask;
+ };
+ 
+--- a/include/linux/dma-mapping.h
++++ b/include/linux/dma-mapping.h
+@@ -500,6 +500,22 @@ static inline int dma_set_seg_boundary(s
+ 	return -EIO;
+ }
+ 
++static inline unsigned int dma_get_min_align_mask(struct device *dev)
++{
++	if (dev->dma_parms)
++		return dev->dma_parms->min_align_mask;
++	return 0;
++}
++
++static inline int dma_set_min_align_mask(struct device *dev,
++		unsigned int min_align_mask)
++{
++	if (WARN_ON_ONCE(!dev->dma_parms))
++		return -EIO;
++	dev->dma_parms->min_align_mask = min_align_mask;
++	return 0;
++}
++
+ static inline int dma_get_cache_alignment(void)
+ {
+ #ifdef ARCH_DMA_MINALIGN
diff --git a/queue-5.11/nvme-pci-set-min_align_mask.patch b/queue-5.11/nvme-pci-set-min_align_mask.patch
new file mode 100644
index 00000000000..fb7a6744197
--- /dev/null
+++ b/queue-5.11/nvme-pci-set-min_align_mask.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:15 +0000
+Subject: nvme-pci: set min_align_mask
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-10-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 3d2d861eb03e8ee96dc430a54361c900cbe28afd
+
+The PRP addressing scheme requires all PRP entries except for the
+first one to have a zero offset into the NVMe controller pages (which
+can be different from the Linux PAGE_SIZE).  Use the min_align_mask
+device parameter to ensure that swiotlb does not change the address
+of the buffer modulo the device page size to ensure that the PRPs
+won't be malformed.
+
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/pci.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -2629,6 +2629,7 @@ static void nvme_reset_work(struct work_
+ 	 * Don't limit the IOMMU merged segment size.
+ 	 */
+ 	dma_set_max_seg_size(dev->dev, 0xffffffff);
++	dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
+ 
+ 	mutex_unlock(&dev->shutdown_lock);
+ 
diff --git a/queue-5.11/series b/queue-5.11/series
index 3262abdcd91..5f7caba8cd0 100644
--- a/queue-5.11/series
+++ b/queue-5.11/series
@@ -10,3 +10,12 @@ perf-data-fix-error-return-code-in-perf_data__create.patch
 capabilities-require-cap_setfcap-to-map-uid-0.patch
 perf-ftrace-fix-access-to-pid-in-array-when-setting-.patch
 tools-cgroup-slabinfo.py-updated-to-work-on-current-.patch
+driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch
+swiotlb-add-a-io_tlb_size-define.patch
+swiotlb-factor-out-an-io_tlb_offset-helper.patch
+swiotlb-factor-out-a-nr_slots-helper.patch
+swiotlb-clean-up-swiotlb_tbl_unmap_single.patch
+swiotlb-refactor-swiotlb_tbl_map_single.patch
+swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch
+swiotlb-respect-min_align_mask.patch
+nvme-pci-set-min_align_mask.patch
diff --git a/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch b/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch
new file mode 100644
index 00000000000..c415b655ac8
--- /dev/null
+++ b/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch
@@ -0,0 +1,89 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:08 +0000
+Subject: swiotlb: add a IO_TLB_SIZE define
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-3-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: b5d7ccb7aac3895c2138fe0980a109116ce15eff
+
+Add a new IO_TLB_SIZE define instead open coding it using
+IO_TLB_SHIFT all over.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swiotlb.h |    1 +
+ kernel/dma/swiotlb.c    |   12 ++++++------
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/include/linux/swiotlb.h
++++ b/include/linux/swiotlb.h
+@@ -29,6 +29,7 @@ enum swiotlb_force {
+  * controllable.
+  */
+ #define IO_TLB_SHIFT 11
++#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
+ 
+ /* default to 64MB */
+ #define IO_TLB_DEFAULT_SIZE (64UL<<20)
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -491,20 +491,20 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 
+ 	tbl_dma_addr &= mask;
+ 
+-	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++	offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+ 
+ 	/*
+ 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
+ 	 */
+ 	max_slots = mask + 1
+-		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
++		    ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT
+ 		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ 
+ 	/*
+ 	 * For mappings greater than or equal to a page, we limit the stride
+ 	 * (and hence alignment) to a page size.
+ 	 */
+-	nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++	nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+ 	if (alloc_size >= PAGE_SIZE)
+ 		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ 	else
+@@ -598,7 +598,7 @@ void swiotlb_tbl_unmap_single(struct dev
+ 			      enum dma_data_direction dir, unsigned long attrs)
+ {
+ 	unsigned long flags;
+-	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++	int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+ 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ 
+@@ -649,7 +649,7 @@ void swiotlb_tbl_sync_single(struct devi
+ 
+ 	if (orig_addr == INVALID_PHYS_ADDR)
+ 		return;
+-	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
++	orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1);
+ 
+ 	switch (target) {
+ 	case SYNC_FOR_CPU:
+@@ -707,7 +707,7 @@ dma_addr_t swiotlb_map(struct device *de
+ 
+ size_t swiotlb_max_mapping_size(struct device *dev)
+ {
+-	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
++	return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
+ }
+ 
+ bool is_swiotlb_active(void)
diff --git a/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch b/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch
new file mode 100644
index 00000000000..5a1e2376b17
--- /dev/null
+++ b/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:11 +0000
+Subject: swiotlb: clean up swiotlb_tbl_unmap_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-6-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: ca10d0f8e530600ec63c603dbace2c30927d70b7
+
+swiotlb: clean up swiotlb_tbl_unmap_single
+
+Remove a layer of pointless indentation, replace a hard to follow
+ternary expression with a plain if/else.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |   41 +++++++++++++++++++++--------------------
+ 1 file changed, 21 insertions(+), 20 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -626,28 +626,29 @@ void swiotlb_tbl_unmap_single(struct dev
+ 	 * with slots below and above the pool being returned.
+ 	 */
+ 	spin_lock_irqsave(&io_tlb_lock, flags);
+-	{
+-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+-			 io_tlb_list[index + nslots] : 0);
+-		/*
+-		 * Step 1: return the slots to the free list, merging the
+-		 * slots with superceeding slots
+-		 */
+-		for (i = index + nslots - 1; i >= index; i--) {
+-			io_tlb_list[i] = ++count;
+-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+-		}
+-		/*
+-		 * Step 2: merge the returned slots with the preceding slots,
+-		 * if available (non zero)
+-		 */
+-		for (i = index - 1;
+-		     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+-		     io_tlb_list[i]; i--)
+-			io_tlb_list[i] = ++count;
++	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
++		count = io_tlb_list[index + nslots];
++	else
++		count = 0;
+ 
+-		io_tlb_used -= nslots;
++	/*
++	 * Step 1: return the slots to the free list, merging the slots with
++	 * superceeding slots
++	 */
++	for (i = index + nslots - 1; i >= index; i--) {
++		io_tlb_list[i] = ++count;
++		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ 	}
++
++	/*
++	 * Step 2: merge the returned slots with the preceding slots, if
++	 * available (non zero)
++	 */
++	for (i = index - 1;
++	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
++	     i--)
++		io_tlb_list[i] = ++count;
++	io_tlb_used -= nslots;
+ 	spin_unlock_irqrestore(&io_tlb_lock, flags);
+ }
+ 
diff --git a/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch b/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch
new file mode 100644
index 00000000000..bffe59b179e
--- /dev/null
+++ b/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:13 +0000
+Subject: swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-8-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 16fc3cef33a04632ab6b31758abdd77563a20759
+
+swiotlb_tbl_map_single currently nevers sets a tlb_addr that is not
+aligned to the tlb bucket size.  But we're going to add such a case
+soon, for which this adjustment would be bogus.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -660,7 +660,6 @@ void swiotlb_tbl_sync_single(struct devi
+ 
+ 	if (orig_addr == INVALID_PHYS_ADDR)
+ 		return;
+-	orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1);
+ 
+ 	switch (target) {
+ 	case SYNC_FOR_CPU:
diff --git a/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch b/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch
new file mode 100644
index 00000000000..e5b3988f7af
--- /dev/null
+++ b/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch
@@ -0,0 +1,71 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:10 +0000
+Subject: swiotlb: factor out a nr_slots helper
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-5-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: c32a77fd18780a5192dfb6eec69f239faebf28fd
+
+Factor out a helper to find the number of slots for a given size.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -194,6 +194,11 @@ static inline unsigned long io_tlb_offse
+ 	return val & (IO_TLB_SEGSIZE - 1);
+ }
+ 
++static inline unsigned long nr_slots(u64 val)
++{
++	return DIV_ROUND_UP(val, IO_TLB_SIZE);
++}
++
+ /*
+  * Early SWIOTLB allocation may be too early to allow an architecture to
+  * perform the desired operations.  This function allows the architecture to
+@@ -493,20 +498,20 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 
+ 	tbl_dma_addr &= mask;
+ 
+-	offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++	offset_slots = nr_slots(tbl_dma_addr);
+ 
+ 	/*
+ 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
+ 	 */
+ 	max_slots = mask + 1
+-		    ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT
++		    ? nr_slots(mask + 1)
+ 		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ 
+ 	/*
+ 	 * For mappings greater than or equal to a page, we limit the stride
+ 	 * (and hence alignment) to a page size.
+ 	 */
+-	nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++	nslots = nr_slots(alloc_size);
+ 	if (alloc_size >= PAGE_SIZE)
+ 		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ 	else
+@@ -602,7 +607,7 @@ void swiotlb_tbl_unmap_single(struct dev
+ 			      enum dma_data_direction dir, unsigned long attrs)
+ {
+ 	unsigned long flags;
+-	int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++	int i, count, nslots = nr_slots(alloc_size);
+ 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ 
diff --git a/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch b/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch
new file mode 100644
index 00000000000..afb19f7a4c6
--- /dev/null
+++ b/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch
@@ -0,0 +1,89 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:09 +0000
+Subject: swiotlb: factor out an io_tlb_offset helper
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-4-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: c7fbeca757fe74135d8b6a4c8ddaef76f5775d68
+
+Replace the very genericly named OFFSET macro with a little inline
+helper that hardcodes the alignment to the only value ever passed.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -50,9 +50,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/swiotlb.h>
+ 
+-#define OFFSET(val,align) ((unsigned long)	\
+-	                   ( (val) & ( (align) - 1)))
+-
+ #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+ 
+ /*
+@@ -192,6 +189,11 @@ void swiotlb_print_info(void)
+ 	       bytes >> 20);
+ }
+ 
++static inline unsigned long io_tlb_offset(unsigned long val)
++{
++	return val & (IO_TLB_SEGSIZE - 1);
++}
++
+ /*
+  * Early SWIOTLB allocation may be too early to allow an architecture to
+  * perform the desired operations.  This function allows the architecture to
+@@ -241,7 +243,7 @@ int __init swiotlb_init_with_tbl(char *t
+ 		      __func__, alloc_size, PAGE_SIZE);
+ 
+ 	for (i = 0; i < io_tlb_nslabs; i++) {
+-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
++		io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ 	}
+ 	io_tlb_index = 0;
+@@ -375,7 +377,7 @@ swiotlb_late_init_with_tbl(char *tlb, un
+ 		goto cleanup4;
+ 
+ 	for (i = 0; i < io_tlb_nslabs; i++) {
+-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
++		io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ 	}
+ 	io_tlb_index = 0;
+@@ -546,7 +548,9 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 
+ 			for (i = index; i < (int) (index + nslots); i++)
+ 				io_tlb_list[i] = 0;
+-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
++			for (i = index - 1;
++			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++			     io_tlb_list[i]; i--)
+ 				io_tlb_list[i] = ++count;
+ 			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+ 
+@@ -632,7 +636,9 @@ void swiotlb_tbl_unmap_single(struct dev
+ 		 * Step 2: merge the returned slots with the preceding slots,
+ 		 * if available (non zero)
+ 		 */
+-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
++		for (i = index - 1;
++		     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++		     io_tlb_list[i]; i--)
+ 			io_tlb_list[i] = ++count;
+ 
+ 		io_tlb_used -= nslots;
diff --git a/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch b/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch
new file mode 100644
index 00000000000..ea43786b346
--- /dev/null
+++ b/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch
@@ -0,0 +1,251 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:12 +0000
+Subject: swiotlb: refactor swiotlb_tbl_map_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-7-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 26a7e094783d482f3e125f09945a5bb1d867b2e6
+
+Split out a bunch of a self-contained helpers to make the function easier
+to follow.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |  179 +++++++++++++++++++++++++--------------------------
+ 1 file changed, 89 insertions(+), 90 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -468,134 +468,133 @@ static void swiotlb_bounce(phys_addr_t o
+ 	}
+ }
+ 
+-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+-		size_t mapping_size, size_t alloc_size,
+-		enum dma_data_direction dir, unsigned long attrs)
+-{
+-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+-	unsigned long flags;
+-	phys_addr_t tlb_addr;
+-	unsigned int nslots, stride, index, wrap;
+-	int i;
+-	unsigned long mask;
+-	unsigned long offset_slots;
+-	unsigned long max_slots;
+-	unsigned long tmp_io_tlb_used;
++#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
+ 
+-	if (no_iotlb_memory)
+-		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+-
+-	if (mem_encrypt_active())
+-		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+-
+-	if (mapping_size > alloc_size) {
+-		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+-			      mapping_size, alloc_size);
+-		return (phys_addr_t)DMA_MAPPING_ERROR;
+-	}
+-
+-	mask = dma_get_seg_boundary(hwdev);
++/*
++ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
++ */
++static inline unsigned long get_max_slots(unsigned long boundary_mask)
++{
++	if (boundary_mask == ~0UL)
++		return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
++	return nr_slots(boundary_mask + 1);
++}
+ 
+-	tbl_dma_addr &= mask;
++static unsigned int wrap_index(unsigned int index)
++{
++	if (index >= io_tlb_nslabs)
++		return 0;
++	return index;
++}
+ 
+-	offset_slots = nr_slots(tbl_dma_addr);
++/*
++ * Find a suitable number of IO TLB entries size that will fit this request and
++ * allocate a buffer from that IO TLB pool.
++ */
++static int find_slots(struct device *dev, size_t alloc_size)
++{
++	unsigned long boundary_mask = dma_get_seg_boundary(dev);
++	dma_addr_t tbl_dma_addr =
++		phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
++	unsigned long max_slots = get_max_slots(boundary_mask);
++	unsigned int nslots = nr_slots(alloc_size), stride = 1;
++	unsigned int index, wrap, count = 0, i;
++	unsigned long flags;
+ 
+-	/*
+-	 * Carefully handle integer overflow which can occur when mask == ~0UL.
+-	 */
+-	max_slots = mask + 1
+-		    ? nr_slots(mask + 1)
+-		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
++	BUG_ON(!nslots);
+ 
+ 	/*
+ 	 * For mappings greater than or equal to a page, we limit the stride
+ 	 * (and hence alignment) to a page size.
+ 	 */
+-	nslots = nr_slots(alloc_size);
+ 	if (alloc_size >= PAGE_SIZE)
+-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+-	else
+-		stride = 1;
+-
+-	BUG_ON(!nslots);
++		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
+ 
+-	/*
+-	 * Find suitable number of IO TLB entries size that will fit this
+-	 * request and allocate a buffer from that IO TLB pool.
+-	 */
+ 	spin_lock_irqsave(&io_tlb_lock, flags);
+-
+ 	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ 		goto not_found;
+ 
+-	index = ALIGN(io_tlb_index, stride);
+-	if (index >= io_tlb_nslabs)
+-		index = 0;
+-	wrap = index;
+-
++	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ 	do {
+-		while (iommu_is_span_boundary(index, nslots, offset_slots,
+-					      max_slots)) {
+-			index += stride;
+-			if (index >= io_tlb_nslabs)
+-				index = 0;
+-			if (index == wrap)
+-				goto not_found;
+-		}
+-
+ 		/*
+ 		 * If we find a slot that indicates we have 'nslots' number of
+ 		 * contiguous buffers, we allocate the buffers from that slot
+ 		 * and mark the entries as '0' indicating unavailable.
+ 		 */
+-		if (io_tlb_list[index] >= nslots) {
+-			int count = 0;
+-
+-			for (i = index; i < (int) (index + nslots); i++)
+-				io_tlb_list[i] = 0;
+-			for (i = index - 1;
+-			     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+-			     io_tlb_list[i]; i--)
+-				io_tlb_list[i] = ++count;
+-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+-
+-			/*
+-			 * Update the indices to avoid searching in the next
+-			 * round.
+-			 */
+-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
+-					? (index + nslots) : 0);
+-
+-			goto found;
++		if (!iommu_is_span_boundary(index, nslots,
++					    nr_slots(tbl_dma_addr),
++					    max_slots)) {
++			if (io_tlb_list[index] >= nslots)
++				goto found;
+ 		}
+-		index += stride;
+-		if (index >= io_tlb_nslabs)
+-			index = 0;
++		index = wrap_index(index + stride);
+ 	} while (index != wrap);
+ 
+ not_found:
+-	tmp_io_tlb_used = io_tlb_used;
+-
+ 	spin_unlock_irqrestore(&io_tlb_lock, flags);
+-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
+-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+-	return (phys_addr_t)DMA_MAPPING_ERROR;
++	return -1;
++
+ found:
++	for (i = index; i < index + nslots; i++)
++		io_tlb_list[i] = 0;
++	for (i = index - 1;
++	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++	     io_tlb_list[i]; i--)
++		io_tlb_list[i] = ++count;
++
++	/*
++	 * Update the indices to avoid searching in the next round.
++	 */
++	if (index + nslots < io_tlb_nslabs)
++		io_tlb_index = index + nslots;
++	else
++		io_tlb_index = 0;
+ 	io_tlb_used += nslots;
++
+ 	spin_unlock_irqrestore(&io_tlb_lock, flags);
++	return index;
++}
++
++phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
++		size_t mapping_size, size_t alloc_size,
++		enum dma_data_direction dir, unsigned long attrs)
++{
++	unsigned int index, i;
++	phys_addr_t tlb_addr;
++
++	if (no_iotlb_memory)
++		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
++
++	if (mem_encrypt_active())
++		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
++
++	if (mapping_size > alloc_size) {
++		dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
++			      mapping_size, alloc_size);
++		return (phys_addr_t)DMA_MAPPING_ERROR;
++	}
++
++	index = find_slots(dev, alloc_size);
++	if (index == -1) {
++		if (!(attrs & DMA_ATTR_NO_WARN))
++			dev_warn_ratelimited(dev,
++	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
++				 alloc_size, io_tlb_nslabs, io_tlb_used);
++		return (phys_addr_t)DMA_MAPPING_ERROR;
++	}
+ 
+ 	/*
+ 	 * Save away the mapping from the original address to the DMA address.
+ 	 * This is needed when we sync the memory.  Then we sync the buffer if
+ 	 * needed.
+ 	 */
+-	for (i = 0; i < nslots; i++)
+-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
++	for (i = 0; i < nr_slots(alloc_size); i++)
++		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
++
++	tlb_addr = slot_addr(io_tlb_start, index);
+ 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+-
+ 	return tlb_addr;
+ }
+ 
diff --git a/queue-5.11/swiotlb-respect-min_align_mask.patch b/queue-5.11/swiotlb-respect-min_align_mask.patch
new file mode 100644
index 00000000000..ec64c610aab
--- /dev/null
+++ b/queue-5.11/swiotlb-respect-min_align_mask.patch
@@ -0,0 +1,142 @@
+From foo@baz Wed May  5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:14 +0000
+Subject: swiotlb: respect min_align_mask
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-9-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 1f221a0d0dbf0e48ef3a9c62871281d6a7819f05
+
+swiotlb: respect min_align_mask
+
+Respect the min_align_mask in struct device_dma_parameters in swiotlb.
+
+There are two parts to it:
+ 1) for the lower bits of the alignment inside the io tlb slot, just
+    extent the size of the allocation and leave the start of the slot
+     empty
+ 2) for the high bits ensure we find a slot that matches the high bits
+    of the alignment to avoid wasting too much memory
+
+Based on an earlier patch from Jianxiong Gao <jxgao@google.com>.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c |   41 +++++++++++++++++++++++++++++++----------
+ 1 file changed, 31 insertions(+), 10 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -471,6 +471,14 @@ static void swiotlb_bounce(phys_addr_t o
+ #define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
+ 
+ /*
++ * Return the offset into a iotlb slot required to keep the device happy.
++ */
++static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
++{
++	return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
++}
++
++/*
+  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+  */
+ static inline unsigned long get_max_slots(unsigned long boundary_mask)
+@@ -491,24 +499,29 @@ static unsigned int wrap_index(unsigned
+  * Find a suitable number of IO TLB entries size that will fit this request and
+  * allocate a buffer from that IO TLB pool.
+  */
+-static int find_slots(struct device *dev, size_t alloc_size)
++static int find_slots(struct device *dev, phys_addr_t orig_addr,
++		size_t alloc_size)
+ {
+ 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ 	dma_addr_t tbl_dma_addr =
+ 		phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ 	unsigned long max_slots = get_max_slots(boundary_mask);
+-	unsigned int nslots = nr_slots(alloc_size), stride = 1;
++	unsigned int iotlb_align_mask =
++		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
++	unsigned int nslots = nr_slots(alloc_size), stride;
+ 	unsigned int index, wrap, count = 0, i;
+ 	unsigned long flags;
+ 
+ 	BUG_ON(!nslots);
+ 
+ 	/*
+-	 * For mappings greater than or equal to a page, we limit the stride
+-	 * (and hence alignment) to a page size.
++	 * For mappings with an alignment requirement don't bother looping to
++	 * unaligned slots once we found an aligned one.  For allocations of
++	 * PAGE_SIZE or larger only look for page aligned allocations.
+ 	 */
++	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+ 	if (alloc_size >= PAGE_SIZE)
+-		stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
++		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+ 
+ 	spin_lock_irqsave(&io_tlb_lock, flags);
+ 	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+@@ -516,6 +529,12 @@ static int find_slots(struct device *dev
+ 
+ 	index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ 	do {
++		if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
++		    (orig_addr & iotlb_align_mask)) {
++			index = wrap_index(index + 1);
++			continue;
++		}
++
+ 		/*
+ 		 * If we find a slot that indicates we have 'nslots' number of
+ 		 * contiguous buffers, we allocate the buffers from that slot
+@@ -559,6 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 		size_t mapping_size, size_t alloc_size,
+ 		enum dma_data_direction dir, unsigned long attrs)
+ {
++	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ 	unsigned int index, i;
+ 	phys_addr_t tlb_addr;
+ 
+@@ -574,7 +594,7 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 		return (phys_addr_t)DMA_MAPPING_ERROR;
+ 	}
+ 
+-	index = find_slots(dev, alloc_size);
++	index = find_slots(dev, orig_addr, alloc_size + offset);
+ 	if (index == -1) {
+ 		if (!(attrs & DMA_ATTR_NO_WARN))
+ 			dev_warn_ratelimited(dev,
+@@ -588,10 +608,10 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ 	 * This is needed when we sync the memory.  Then we sync the buffer if
+ 	 * needed.
+ 	 */
+-	for (i = 0; i < nr_slots(alloc_size); i++)
++	for (i = 0; i < nr_slots(alloc_size + offset); i++)
+ 		io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+ 
+-	tlb_addr = slot_addr(io_tlb_start, index);
++	tlb_addr = slot_addr(io_tlb_start, index) + offset;
+ 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+@@ -606,8 +626,9 @@ void swiotlb_tbl_unmap_single(struct dev
+ 			      enum dma_data_direction dir, unsigned long attrs)
+ {
+ 	unsigned long flags;
+-	int i, count, nslots = nr_slots(alloc_size);
+-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
++	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
++	int i, count, nslots = nr_slots(alloc_size + offset);
++	int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
+ 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ 
+ 	/*