--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:07 +0000
+Subject: driver core: add a min_align_mask field to struct device_dma_parameters
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-2-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 36950f2da1ea4cb683be174f6f581e25b2d33e71
+
+Some devices rely on the address offset in a page to function
+correctly (NVMe driver as an example). These devices may use
+a different page size than the Linux kernel. The address offset
+has to be preserved upon mapping, and in order to do so, we
+need to record the page_offset_mask first.
+
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/device.h | 1 +
+ include/linux/dma-mapping.h | 16 ++++++++++++++++
+ 2 files changed, 17 insertions(+)
+
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -291,6 +291,7 @@ struct device_dma_parameters {
+ * sg limitations.
+ */
+ unsigned int max_segment_size;
++ unsigned int min_align_mask;
+ unsigned long segment_boundary_mask;
+ };
+
+--- a/include/linux/dma-mapping.h
++++ b/include/linux/dma-mapping.h
+@@ -500,6 +500,22 @@ static inline int dma_set_seg_boundary(s
+ return -EIO;
+ }
+
++static inline unsigned int dma_get_min_align_mask(struct device *dev)
++{
++ if (dev->dma_parms)
++ return dev->dma_parms->min_align_mask;
++ return 0;
++}
++
++static inline int dma_set_min_align_mask(struct device *dev,
++ unsigned int min_align_mask)
++{
++ if (WARN_ON_ONCE(!dev->dma_parms))
++ return -EIO;
++ dev->dma_parms->min_align_mask = min_align_mask;
++ return 0;
++}
++
+ static inline int dma_get_cache_alignment(void)
+ {
+ #ifdef ARCH_DMA_MINALIGN
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:15 +0000
+Subject: nvme-pci: set min_align_mask
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-10-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 3d2d861eb03e8ee96dc430a54361c900cbe28afd
+
+The PRP addressing scheme requires all PRP entries except for the
+first one to have a zero offset into the NVMe controller pages (which
+can be different from the Linux PAGE_SIZE). Use the min_align_mask
+device parameter to ensure that swiotlb does not change the address
+of the buffer modulo the device page size to ensure that the PRPs
+won't be malformed.
+
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/pci.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -2629,6 +2629,7 @@ static void nvme_reset_work(struct work_
+ * Don't limit the IOMMU merged segment size.
+ */
+ dma_set_max_seg_size(dev->dev, 0xffffffff);
++ dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
+
+ mutex_unlock(&dev->shutdown_lock);
+
capabilities-require-cap_setfcap-to-map-uid-0.patch
perf-ftrace-fix-access-to-pid-in-array-when-setting-.patch
tools-cgroup-slabinfo.py-updated-to-work-on-current-.patch
+driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch
+swiotlb-add-a-io_tlb_size-define.patch
+swiotlb-factor-out-an-io_tlb_offset-helper.patch
+swiotlb-factor-out-a-nr_slots-helper.patch
+swiotlb-clean-up-swiotlb_tbl_unmap_single.patch
+swiotlb-refactor-swiotlb_tbl_map_single.patch
+swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch
+swiotlb-respect-min_align_mask.patch
+nvme-pci-set-min_align_mask.patch
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:08 +0000
+Subject: swiotlb: add a IO_TLB_SIZE define
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-3-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: b5d7ccb7aac3895c2138fe0980a109116ce15eff
+
+Add a new IO_TLB_SIZE define instead open coding it using
+IO_TLB_SHIFT all over.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swiotlb.h | 1 +
+ kernel/dma/swiotlb.c | 12 ++++++------
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/include/linux/swiotlb.h
++++ b/include/linux/swiotlb.h
+@@ -29,6 +29,7 @@ enum swiotlb_force {
+ * controllable.
+ */
+ #define IO_TLB_SHIFT 11
++#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
+
+ /* default to 64MB */
+ #define IO_TLB_DEFAULT_SIZE (64UL<<20)
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -491,20 +491,20 @@ phys_addr_t swiotlb_tbl_map_single(struc
+
+ tbl_dma_addr &= mask;
+
+- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++ offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+
+ /*
+ * Carefully handle integer overflow which can occur when mask == ~0UL.
+ */
+ max_slots = mask + 1
+- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
++ ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT
+ : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+
+ /*
+ * For mappings greater than or equal to a page, we limit the stride
+ * (and hence alignment) to a page size.
+ */
+- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++ nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+ if (alloc_size >= PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+@@ -598,7 +598,7 @@ void swiotlb_tbl_unmap_single(struct dev
+ enum dma_data_direction dir, unsigned long attrs)
+ {
+ unsigned long flags;
+- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
++ int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
+ int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
+
+@@ -649,7 +649,7 @@ void swiotlb_tbl_sync_single(struct devi
+
+ if (orig_addr == INVALID_PHYS_ADDR)
+ return;
+- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
++ orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1);
+
+ switch (target) {
+ case SYNC_FOR_CPU:
+@@ -707,7 +707,7 @@ dma_addr_t swiotlb_map(struct device *de
+
+ size_t swiotlb_max_mapping_size(struct device *dev)
+ {
+- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
++ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
+ }
+
+ bool is_swiotlb_active(void)
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:11 +0000
+Subject: swiotlb: clean up swiotlb_tbl_unmap_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-6-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: ca10d0f8e530600ec63c603dbace2c30927d70b7
+
+swiotlb: clean up swiotlb_tbl_unmap_single
+
+Remove a layer of pointless indentation, replace a hard to follow
+ternary expression with a plain if/else.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 41 +++++++++++++++++++++--------------------
+ 1 file changed, 21 insertions(+), 20 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -626,28 +626,29 @@ void swiotlb_tbl_unmap_single(struct dev
+ * with slots below and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+- {
+- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+- io_tlb_list[index + nslots] : 0);
+- /*
+- * Step 1: return the slots to the free list, merging the
+- * slots with superceeding slots
+- */
+- for (i = index + nslots - 1; i >= index; i--) {
+- io_tlb_list[i] = ++count;
+- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+- }
+- /*
+- * Step 2: merge the returned slots with the preceding slots,
+- * if available (non zero)
+- */
+- for (i = index - 1;
+- io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+- io_tlb_list[i]; i--)
+- io_tlb_list[i] = ++count;
++ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
++ count = io_tlb_list[index + nslots];
++ else
++ count = 0;
+
+- io_tlb_used -= nslots;
++ /*
++ * Step 1: return the slots to the free list, merging the slots with
++ * superceeding slots
++ */
++ for (i = index + nslots - 1; i >= index; i--) {
++ io_tlb_list[i] = ++count;
++ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ }
++
++ /*
++ * Step 2: merge the returned slots with the preceding slots, if
++ * available (non zero)
++ */
++ for (i = index - 1;
++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
++ i--)
++ io_tlb_list[i] = ++count;
++ io_tlb_used -= nslots;
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ }
+
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:13 +0000
+Subject: swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-8-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 16fc3cef33a04632ab6b31758abdd77563a20759
+
+swiotlb_tbl_map_single currently nevers sets a tlb_addr that is not
+aligned to the tlb bucket size. But we're going to add such a case
+soon, for which this adjustment would be bogus.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -660,7 +660,6 @@ void swiotlb_tbl_sync_single(struct devi
+
+ if (orig_addr == INVALID_PHYS_ADDR)
+ return;
+- orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1);
+
+ switch (target) {
+ case SYNC_FOR_CPU:
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:10 +0000
+Subject: swiotlb: factor out a nr_slots helper
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-5-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: c32a77fd18780a5192dfb6eec69f239faebf28fd
+
+Factor out a helper to find the number of slots for a given size.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -194,6 +194,11 @@ static inline unsigned long io_tlb_offse
+ return val & (IO_TLB_SEGSIZE - 1);
+ }
+
++static inline unsigned long nr_slots(u64 val)
++{
++ return DIV_ROUND_UP(val, IO_TLB_SIZE);
++}
++
+ /*
+ * Early SWIOTLB allocation may be too early to allow an architecture to
+ * perform the desired operations. This function allows the architecture to
+@@ -493,20 +498,20 @@ phys_addr_t swiotlb_tbl_map_single(struc
+
+ tbl_dma_addr &= mask;
+
+- offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++ offset_slots = nr_slots(tbl_dma_addr);
+
+ /*
+ * Carefully handle integer overflow which can occur when mask == ~0UL.
+ */
+ max_slots = mask + 1
+- ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT
++ ? nr_slots(mask + 1)
+ : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+
+ /*
+ * For mappings greater than or equal to a page, we limit the stride
+ * (and hence alignment) to a page size.
+ */
+- nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++ nslots = nr_slots(alloc_size);
+ if (alloc_size >= PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+@@ -602,7 +607,7 @@ void swiotlb_tbl_unmap_single(struct dev
+ enum dma_data_direction dir, unsigned long attrs)
+ {
+ unsigned long flags;
+- int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT;
++ int i, count, nslots = nr_slots(alloc_size);
+ int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
+
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:09 +0000
+Subject: swiotlb: factor out an io_tlb_offset helper
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-4-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: c7fbeca757fe74135d8b6a4c8ddaef76f5775d68
+
+Replace the very genericly named OFFSET macro with a little inline
+helper that hardcodes the alignment to the only value ever passed.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -50,9 +50,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/swiotlb.h>
+
+-#define OFFSET(val,align) ((unsigned long) \
+- ( (val) & ( (align) - 1)))
+-
+ #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+
+ /*
+@@ -192,6 +189,11 @@ void swiotlb_print_info(void)
+ bytes >> 20);
+ }
+
++static inline unsigned long io_tlb_offset(unsigned long val)
++{
++ return val & (IO_TLB_SEGSIZE - 1);
++}
++
+ /*
+ * Early SWIOTLB allocation may be too early to allow an architecture to
+ * perform the desired operations. This function allows the architecture to
+@@ -241,7 +243,7 @@ int __init swiotlb_init_with_tbl(char *t
+ __func__, alloc_size, PAGE_SIZE);
+
+ for (i = 0; i < io_tlb_nslabs; i++) {
+- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
++ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ }
+ io_tlb_index = 0;
+@@ -375,7 +377,7 @@ swiotlb_late_init_with_tbl(char *tlb, un
+ goto cleanup4;
+
+ for (i = 0; i < io_tlb_nslabs; i++) {
+- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
++ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ }
+ io_tlb_index = 0;
+@@ -546,7 +548,9 @@ phys_addr_t swiotlb_tbl_map_single(struc
+
+ for (i = index; i < (int) (index + nslots); i++)
+ io_tlb_list[i] = 0;
+- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
++ for (i = index - 1;
++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++ io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+ tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+@@ -632,7 +636,9 @@ void swiotlb_tbl_unmap_single(struct dev
+ * Step 2: merge the returned slots with the preceding slots,
+ * if available (non zero)
+ */
+- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
++ for (i = index - 1;
++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++ io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+
+ io_tlb_used -= nslots;
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:12 +0000
+Subject: swiotlb: refactor swiotlb_tbl_map_single
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-7-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 26a7e094783d482f3e125f09945a5bb1d867b2e6
+
+Split out a bunch of a self-contained helpers to make the function easier
+to follow.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 179 +++++++++++++++++++++++++--------------------------
+ 1 file changed, 89 insertions(+), 90 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -468,134 +468,133 @@ static void swiotlb_bounce(phys_addr_t o
+ }
+ }
+
+-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+- size_t mapping_size, size_t alloc_size,
+- enum dma_data_direction dir, unsigned long attrs)
+-{
+- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+- unsigned long flags;
+- phys_addr_t tlb_addr;
+- unsigned int nslots, stride, index, wrap;
+- int i;
+- unsigned long mask;
+- unsigned long offset_slots;
+- unsigned long max_slots;
+- unsigned long tmp_io_tlb_used;
++#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+
+- if (no_iotlb_memory)
+- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+-
+- if (mem_encrypt_active())
+- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+-
+- if (mapping_size > alloc_size) {
+- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+- mapping_size, alloc_size);
+- return (phys_addr_t)DMA_MAPPING_ERROR;
+- }
+-
+- mask = dma_get_seg_boundary(hwdev);
++/*
++ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
++ */
++static inline unsigned long get_max_slots(unsigned long boundary_mask)
++{
++ if (boundary_mask == ~0UL)
++ return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
++ return nr_slots(boundary_mask + 1);
++}
+
+- tbl_dma_addr &= mask;
++static unsigned int wrap_index(unsigned int index)
++{
++ if (index >= io_tlb_nslabs)
++ return 0;
++ return index;
++}
+
+- offset_slots = nr_slots(tbl_dma_addr);
++/*
++ * Find a suitable number of IO TLB entries size that will fit this request and
++ * allocate a buffer from that IO TLB pool.
++ */
++static int find_slots(struct device *dev, size_t alloc_size)
++{
++ unsigned long boundary_mask = dma_get_seg_boundary(dev);
++ dma_addr_t tbl_dma_addr =
++ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
++ unsigned long max_slots = get_max_slots(boundary_mask);
++ unsigned int nslots = nr_slots(alloc_size), stride = 1;
++ unsigned int index, wrap, count = 0, i;
++ unsigned long flags;
+
+- /*
+- * Carefully handle integer overflow which can occur when mask == ~0UL.
+- */
+- max_slots = mask + 1
+- ? nr_slots(mask + 1)
+- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
++ BUG_ON(!nslots);
+
+ /*
+ * For mappings greater than or equal to a page, we limit the stride
+ * (and hence alignment) to a page size.
+ */
+- nslots = nr_slots(alloc_size);
+ if (alloc_size >= PAGE_SIZE)
+- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+- else
+- stride = 1;
+-
+- BUG_ON(!nslots);
++ stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
+
+- /*
+- * Find suitable number of IO TLB entries size that will fit this
+- * request and allocate a buffer from that IO TLB pool.
+- */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+-
+ if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ goto not_found;
+
+- index = ALIGN(io_tlb_index, stride);
+- if (index >= io_tlb_nslabs)
+- index = 0;
+- wrap = index;
+-
++ index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ do {
+- while (iommu_is_span_boundary(index, nslots, offset_slots,
+- max_slots)) {
+- index += stride;
+- if (index >= io_tlb_nslabs)
+- index = 0;
+- if (index == wrap)
+- goto not_found;
+- }
+-
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot
+ * and mark the entries as '0' indicating unavailable.
+ */
+- if (io_tlb_list[index] >= nslots) {
+- int count = 0;
+-
+- for (i = index; i < (int) (index + nslots); i++)
+- io_tlb_list[i] = 0;
+- for (i = index - 1;
+- io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+- io_tlb_list[i]; i--)
+- io_tlb_list[i] = ++count;
+- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+-
+- /*
+- * Update the indices to avoid searching in the next
+- * round.
+- */
+- io_tlb_index = ((index + nslots) < io_tlb_nslabs
+- ? (index + nslots) : 0);
+-
+- goto found;
++ if (!iommu_is_span_boundary(index, nslots,
++ nr_slots(tbl_dma_addr),
++ max_slots)) {
++ if (io_tlb_list[index] >= nslots)
++ goto found;
+ }
+- index += stride;
+- if (index >= io_tlb_nslabs)
+- index = 0;
++ index = wrap_index(index + stride);
+ } while (index != wrap);
+
+ not_found:
+- tmp_io_tlb_used = io_tlb_used;
+-
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
+- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+- return (phys_addr_t)DMA_MAPPING_ERROR;
++ return -1;
++
+ found:
++ for (i = index; i < index + nslots; i++)
++ io_tlb_list[i] = 0;
++ for (i = index - 1;
++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
++ io_tlb_list[i]; i--)
++ io_tlb_list[i] = ++count;
++
++ /*
++ * Update the indices to avoid searching in the next round.
++ */
++ if (index + nslots < io_tlb_nslabs)
++ io_tlb_index = index + nslots;
++ else
++ io_tlb_index = 0;
+ io_tlb_used += nslots;
++
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
++ return index;
++}
++
++phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
++ size_t mapping_size, size_t alloc_size,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ unsigned int index, i;
++ phys_addr_t tlb_addr;
++
++ if (no_iotlb_memory)
++ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
++
++ if (mem_encrypt_active())
++ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
++
++ if (mapping_size > alloc_size) {
++ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
++ mapping_size, alloc_size);
++ return (phys_addr_t)DMA_MAPPING_ERROR;
++ }
++
++ index = find_slots(dev, alloc_size);
++ if (index == -1) {
++ if (!(attrs & DMA_ATTR_NO_WARN))
++ dev_warn_ratelimited(dev,
++ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
++ alloc_size, io_tlb_nslabs, io_tlb_used);
++ return (phys_addr_t)DMA_MAPPING_ERROR;
++ }
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+- for (i = 0; i < nslots; i++)
+- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
++ for (i = 0; i < nr_slots(alloc_size); i++)
++ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
++
++ tlb_addr = slot_addr(io_tlb_start, index);
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+-
+ return tlb_addr;
+ }
+
--- /dev/null
+From foo@baz Wed May 5 09:23:46 AM CEST 2021
+From: Jianxiong Gao <jxgao@google.com>
+Date: Thu, 29 Apr 2021 17:33:14 +0000
+Subject: swiotlb: respect min_align_mask
+To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org
+Cc: Jianxiong Gao <jxgao@google.com>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Message-ID: <20210429173315.1252465-9-jxgao@google.com>
+
+From: Jianxiong Gao <jxgao@google.com>
+
+commit: 1f221a0d0dbf0e48ef3a9c62871281d6a7819f05
+
+swiotlb: respect min_align_mask
+
+Respect the min_align_mask in struct device_dma_parameters in swiotlb.
+
+There are two parts to it:
+ 1) for the lower bits of the alignment inside the io tlb slot, just
+ extent the size of the allocation and leave the start of the slot
+ empty
+ 2) for the high bits ensure we find a slot that matches the high bits
+ of the alignment to avoid wasting too much memory
+
+Based on an earlier patch from Jianxiong Gao <jxgao@google.com>.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Jianxiong Gao <jxgao@google.com>
+Tested-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jianxiong Gao <jxgao@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/dma/swiotlb.c | 41 +++++++++++++++++++++++++++++++----------
+ 1 file changed, 31 insertions(+), 10 deletions(-)
+
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -471,6 +471,14 @@ static void swiotlb_bounce(phys_addr_t o
+ #define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+
+ /*
++ * Return the offset into a iotlb slot required to keep the device happy.
++ */
++static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
++{
++ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
++}
++
++/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+ static inline unsigned long get_max_slots(unsigned long boundary_mask)
+@@ -491,24 +499,29 @@ static unsigned int wrap_index(unsigned
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+-static int find_slots(struct device *dev, size_t alloc_size)
++static int find_slots(struct device *dev, phys_addr_t orig_addr,
++ size_t alloc_size)
+ {
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ dma_addr_t tbl_dma_addr =
+ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ unsigned long max_slots = get_max_slots(boundary_mask);
+- unsigned int nslots = nr_slots(alloc_size), stride = 1;
++ unsigned int iotlb_align_mask =
++ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
++ unsigned int nslots = nr_slots(alloc_size), stride;
+ unsigned int index, wrap, count = 0, i;
+ unsigned long flags;
+
+ BUG_ON(!nslots);
+
+ /*
+- * For mappings greater than or equal to a page, we limit the stride
+- * (and hence alignment) to a page size.
++ * For mappings with an alignment requirement don't bother looping to
++ * unaligned slots once we found an aligned one. For allocations of
++ * PAGE_SIZE or larger only look for page aligned allocations.
+ */
++ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+ if (alloc_size >= PAGE_SIZE)
+- stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
++ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+@@ -516,6 +529,12 @@ static int find_slots(struct device *dev
+
+ index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ do {
++ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
++ (orig_addr & iotlb_align_mask)) {
++ index = wrap_index(index + 1);
++ continue;
++ }
++
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot
+@@ -559,6 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+ {
++ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int index, i;
+ phys_addr_t tlb_addr;
+
+@@ -574,7 +594,7 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+- index = find_slots(dev, alloc_size);
++ index = find_slots(dev, orig_addr, alloc_size + offset);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+@@ -588,10 +608,10 @@ phys_addr_t swiotlb_tbl_map_single(struc
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+- for (i = 0; i < nr_slots(alloc_size); i++)
++ for (i = 0; i < nr_slots(alloc_size + offset); i++)
+ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+
+- tlb_addr = slot_addr(io_tlb_start, index);
++ tlb_addr = slot_addr(io_tlb_start, index) + offset;
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+@@ -606,8 +626,9 @@ void swiotlb_tbl_unmap_single(struct dev
+ enum dma_data_direction dir, unsigned long attrs)
+ {
+ unsigned long flags;
+- int i, count, nslots = nr_slots(alloc_size);
+- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
++ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
++ int i, count, nslots = nr_slots(alloc_size + offset);
++ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = io_tlb_orig_addr[index];
+
+ /*