From: Greg Kroah-Hartman Date: Wed, 5 May 2021 07:27:31 +0000 (+0200) Subject: 5.11-stable patches X-Git-Tag: v4.19.190~20 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=24d36bc1cef314a6b4f35de402987718cf797058;p=thirdparty%2Fkernel%2Fstable-queue.git 5.11-stable patches added patches: driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch nvme-pci-set-min_align_mask.patch swiotlb-add-a-io_tlb_size-define.patch swiotlb-clean-up-swiotlb_tbl_unmap_single.patch swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch swiotlb-factor-out-a-nr_slots-helper.patch swiotlb-factor-out-an-io_tlb_offset-helper.patch swiotlb-refactor-swiotlb_tbl_map_single.patch swiotlb-respect-min_align_mask.patch --- diff --git a/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch b/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch new file mode 100644 index 00000000000..00fa2e23467 --- /dev/null +++ b/queue-5.11/driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch @@ -0,0 +1,63 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:07 +0000 +Subject: driver core: add a min_align_mask field to struct device_dma_parameters +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Greg Kroah-Hartman , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-2-jxgao@google.com> + +From: Jianxiong Gao + +commit: 36950f2da1ea4cb683be174f6f581e25b2d33e71 + +Some devices rely on the address offset in a page to function +correctly (NVMe driver as an example). These devices may use +a different page size than the Linux kernel. The address offset +has to be preserved upon mapping, and in order to do so, we +need to record the page_offset_mask first. + +Signed-off-by: Jianxiong Gao +Signed-off-by: Christoph Hellwig +Acked-by: Greg Kroah-Hartman +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/device.h | 1 + + include/linux/dma-mapping.h | 16 ++++++++++++++++ + 2 files changed, 17 insertions(+) + +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -291,6 +291,7 @@ struct device_dma_parameters { + * sg limitations. + */ + unsigned int max_segment_size; ++ unsigned int min_align_mask; + unsigned long segment_boundary_mask; + }; + +--- a/include/linux/dma-mapping.h ++++ b/include/linux/dma-mapping.h +@@ -500,6 +500,22 @@ static inline int dma_set_seg_boundary(s + return -EIO; + } + ++static inline unsigned int dma_get_min_align_mask(struct device *dev) ++{ ++ if (dev->dma_parms) ++ return dev->dma_parms->min_align_mask; ++ return 0; ++} ++ ++static inline int dma_set_min_align_mask(struct device *dev, ++ unsigned int min_align_mask) ++{ ++ if (WARN_ON_ONCE(!dev->dma_parms)) ++ return -EIO; ++ dev->dma_parms->min_align_mask = min_align_mask; ++ return 0; ++} ++ + static inline int dma_get_cache_alignment(void) + { + #ifdef ARCH_DMA_MINALIGN diff --git a/queue-5.11/nvme-pci-set-min_align_mask.patch b/queue-5.11/nvme-pci-set-min_align_mask.patch new file mode 100644 index 00000000000..fb7a6744197 --- /dev/null +++ b/queue-5.11/nvme-pci-set-min_align_mask.patch @@ -0,0 +1,38 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:15 +0000 +Subject: nvme-pci: set min_align_mask +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-10-jxgao@google.com> + +From: Jianxiong Gao + +commit: 3d2d861eb03e8ee96dc430a54361c900cbe28afd + +The PRP addressing scheme requires all PRP entries except for the +first one to have a zero offset into the NVMe controller pages (which +can be different from the Linux PAGE_SIZE). Use the min_align_mask +device parameter to ensure that swiotlb does not change the address +of the buffer modulo the device page size to ensure that the PRPs +won't be malformed. + +Signed-off-by: Jianxiong Gao +Signed-off-by: Christoph Hellwig +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/pci.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -2629,6 +2629,7 @@ static void nvme_reset_work(struct work_ + * Don't limit the IOMMU merged segment size. + */ + dma_set_max_seg_size(dev->dev, 0xffffffff); ++ dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); + + mutex_unlock(&dev->shutdown_lock); + diff --git a/queue-5.11/series b/queue-5.11/series index 3262abdcd91..5f7caba8cd0 100644 --- a/queue-5.11/series +++ b/queue-5.11/series @@ -10,3 +10,12 @@ perf-data-fix-error-return-code-in-perf_data__create.patch capabilities-require-cap_setfcap-to-map-uid-0.patch perf-ftrace-fix-access-to-pid-in-array-when-setting-.patch tools-cgroup-slabinfo.py-updated-to-work-on-current-.patch +driver-core-add-a-min_align_mask-field-to-struct-device_dma_parameters.patch +swiotlb-add-a-io_tlb_size-define.patch +swiotlb-factor-out-an-io_tlb_offset-helper.patch +swiotlb-factor-out-a-nr_slots-helper.patch +swiotlb-clean-up-swiotlb_tbl_unmap_single.patch +swiotlb-refactor-swiotlb_tbl_map_single.patch +swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch +swiotlb-respect-min_align_mask.patch +nvme-pci-set-min_align_mask.patch diff --git a/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch b/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch new file mode 100644 index 00000000000..c415b655ac8 --- /dev/null +++ b/queue-5.11/swiotlb-add-a-io_tlb_size-define.patch @@ -0,0 +1,89 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:08 +0000 +Subject: swiotlb: add a IO_TLB_SIZE define +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-3-jxgao@google.com> + +From: Jianxiong Gao + +commit: b5d7ccb7aac3895c2138fe0980a109116ce15eff + +Add a new IO_TLB_SIZE define instead open coding it using +IO_TLB_SHIFT all over. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/swiotlb.h | 1 + + kernel/dma/swiotlb.c | 12 ++++++------ + 2 files changed, 7 insertions(+), 6 deletions(-) + +--- a/include/linux/swiotlb.h ++++ b/include/linux/swiotlb.h +@@ -29,6 +29,7 @@ enum swiotlb_force { + * controllable. + */ + #define IO_TLB_SHIFT 11 ++#define IO_TLB_SIZE (1 << IO_TLB_SHIFT) + + /* default to 64MB */ + #define IO_TLB_DEFAULT_SIZE (64UL<<20) +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -491,20 +491,20 @@ phys_addr_t swiotlb_tbl_map_single(struc + + tbl_dma_addr &= mask; + +- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; ++ offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT; + + /* + * Carefully handle integer overflow which can occur when mask == ~0UL. + */ + max_slots = mask + 1 +- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT ++ ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + + /* + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. + */ +- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; ++ nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT; + if (alloc_size >= PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else +@@ -598,7 +598,7 @@ void swiotlb_tbl_unmap_single(struct dev + enum dma_data_direction dir, unsigned long attrs) + { + unsigned long flags; +- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; ++ int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; + +@@ -649,7 +649,7 @@ void swiotlb_tbl_sync_single(struct devi + + if (orig_addr == INVALID_PHYS_ADDR) + return; +- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); ++ orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1); + + switch (target) { + case SYNC_FOR_CPU: +@@ -707,7 +707,7 @@ dma_addr_t swiotlb_map(struct device *de + + size_t swiotlb_max_mapping_size(struct device *dev) + { +- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; ++ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE; + } + + bool is_swiotlb_active(void) diff --git a/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch b/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch new file mode 100644 index 00000000000..5a1e2376b17 --- /dev/null +++ b/queue-5.11/swiotlb-clean-up-swiotlb_tbl_unmap_single.patch @@ -0,0 +1,79 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:11 +0000 +Subject: swiotlb: clean up swiotlb_tbl_unmap_single +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-6-jxgao@google.com> + +From: Jianxiong Gao + +commit: ca10d0f8e530600ec63c603dbace2c30927d70b7 + +swiotlb: clean up swiotlb_tbl_unmap_single + +Remove a layer of pointless indentation, replace a hard to follow +ternary expression with a plain if/else. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 41 +++++++++++++++++++++-------------------- + 1 file changed, 21 insertions(+), 20 deletions(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -626,28 +626,29 @@ void swiotlb_tbl_unmap_single(struct dev + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); +- { +- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? +- io_tlb_list[index + nslots] : 0); +- /* +- * Step 1: return the slots to the free list, merging the +- * slots with superceeding slots +- */ +- for (i = index + nslots - 1; i >= index; i--) { +- io_tlb_list[i] = ++count; +- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; +- } +- /* +- * Step 2: merge the returned slots with the preceding slots, +- * if available (non zero) +- */ +- for (i = index - 1; +- io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && +- io_tlb_list[i]; i--) +- io_tlb_list[i] = ++count; ++ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) ++ count = io_tlb_list[index + nslots]; ++ else ++ count = 0; + +- io_tlb_used -= nslots; ++ /* ++ * Step 1: return the slots to the free list, merging the slots with ++ * superceeding slots ++ */ ++ for (i = index + nslots - 1; i >= index; i--) { ++ io_tlb_list[i] = ++count; ++ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } ++ ++ /* ++ * Step 2: merge the returned slots with the preceding slots, if ++ * available (non zero) ++ */ ++ for (i = index - 1; ++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; ++ i--) ++ io_tlb_list[i] = ++count; ++ io_tlb_used -= nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); + } + diff --git a/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch b/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch new file mode 100644 index 00000000000..bffe59b179e --- /dev/null +++ b/queue-5.11/swiotlb-don-t-modify-orig_addr-in-swiotlb_tbl_sync_single.patch @@ -0,0 +1,36 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:13 +0000 +Subject: swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-8-jxgao@google.com> + +From: Jianxiong Gao + +commit: 16fc3cef33a04632ab6b31758abdd77563a20759 + +swiotlb_tbl_map_single currently nevers sets a tlb_addr that is not +aligned to the tlb bucket size. But we're going to add such a case +soon, for which this adjustment would be bogus. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -660,7 +660,6 @@ void swiotlb_tbl_sync_single(struct devi + + if (orig_addr == INVALID_PHYS_ADDR) + return; +- orig_addr += (unsigned long)tlb_addr & (IO_TLB_SIZE - 1); + + switch (target) { + case SYNC_FOR_CPU: diff --git a/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch b/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch new file mode 100644 index 00000000000..e5b3988f7af --- /dev/null +++ b/queue-5.11/swiotlb-factor-out-a-nr_slots-helper.patch @@ -0,0 +1,71 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:10 +0000 +Subject: swiotlb: factor out a nr_slots helper +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-5-jxgao@google.com> + +From: Jianxiong Gao + +commit: c32a77fd18780a5192dfb6eec69f239faebf28fd + +Factor out a helper to find the number of slots for a given size. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -194,6 +194,11 @@ static inline unsigned long io_tlb_offse + return val & (IO_TLB_SEGSIZE - 1); + } + ++static inline unsigned long nr_slots(u64 val) ++{ ++ return DIV_ROUND_UP(val, IO_TLB_SIZE); ++} ++ + /* + * Early SWIOTLB allocation may be too early to allow an architecture to + * perform the desired operations. This function allows the architecture to +@@ -493,20 +498,20 @@ phys_addr_t swiotlb_tbl_map_single(struc + + tbl_dma_addr &= mask; + +- offset_slots = ALIGN(tbl_dma_addr, IO_TLB_SIZE) >> IO_TLB_SHIFT; ++ offset_slots = nr_slots(tbl_dma_addr); + + /* + * Carefully handle integer overflow which can occur when mask == ~0UL. + */ + max_slots = mask + 1 +- ? ALIGN(mask + 1, IO_TLB_SIZE) >> IO_TLB_SHIFT ++ ? nr_slots(mask + 1) + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + + /* + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. + */ +- nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT; ++ nslots = nr_slots(alloc_size); + if (alloc_size >= PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else +@@ -602,7 +607,7 @@ void swiotlb_tbl_unmap_single(struct dev + enum dma_data_direction dir, unsigned long attrs) + { + unsigned long flags; +- int i, count, nslots = ALIGN(alloc_size, IO_TLB_SIZE) >> IO_TLB_SHIFT; ++ int i, count, nslots = nr_slots(alloc_size); + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; + diff --git a/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch b/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch new file mode 100644 index 00000000000..afb19f7a4c6 --- /dev/null +++ b/queue-5.11/swiotlb-factor-out-an-io_tlb_offset-helper.patch @@ -0,0 +1,89 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:09 +0000 +Subject: swiotlb: factor out an io_tlb_offset helper +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-4-jxgao@google.com> + +From: Jianxiong Gao + +commit: c7fbeca757fe74135d8b6a4c8ddaef76f5775d68 + +Replace the very genericly named OFFSET macro with a little inline +helper that hardcodes the alignment to the only value ever passed. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -50,9 +50,6 @@ + #define CREATE_TRACE_POINTS + #include + +-#define OFFSET(val,align) ((unsigned long) \ +- ( (val) & ( (align) - 1))) +- + #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) + + /* +@@ -192,6 +189,11 @@ void swiotlb_print_info(void) + bytes >> 20); + } + ++static inline unsigned long io_tlb_offset(unsigned long val) ++{ ++ return val & (IO_TLB_SEGSIZE - 1); ++} ++ + /* + * Early SWIOTLB allocation may be too early to allow an architecture to + * perform the desired operations. This function allows the architecture to +@@ -241,7 +243,7 @@ int __init swiotlb_init_with_tbl(char *t + __func__, alloc_size, PAGE_SIZE); + + for (i = 0; i < io_tlb_nslabs; i++) { +- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); ++ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; +@@ -375,7 +377,7 @@ swiotlb_late_init_with_tbl(char *tlb, un + goto cleanup4; + + for (i = 0; i < io_tlb_nslabs; i++) { +- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); ++ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; +@@ -546,7 +548,9 @@ phys_addr_t swiotlb_tbl_map_single(struc + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; +- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) ++ for (i = index - 1; ++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && ++ io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); + +@@ -632,7 +636,9 @@ void swiotlb_tbl_unmap_single(struct dev + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ +- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) ++ for (i = index - 1; ++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && ++ io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + + io_tlb_used -= nslots; diff --git a/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch b/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch new file mode 100644 index 00000000000..ea43786b346 --- /dev/null +++ b/queue-5.11/swiotlb-refactor-swiotlb_tbl_map_single.patch @@ -0,0 +1,251 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:12 +0000 +Subject: swiotlb: refactor swiotlb_tbl_map_single +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-7-jxgao@google.com> + +From: Jianxiong Gao + +commit: 26a7e094783d482f3e125f09945a5bb1d867b2e6 + +Split out a bunch of a self-contained helpers to make the function easier +to follow. + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 179 +++++++++++++++++++++++++-------------------------- + 1 file changed, 89 insertions(+), 90 deletions(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -468,134 +468,133 @@ static void swiotlb_bounce(phys_addr_t o + } + } + +-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, +- size_t mapping_size, size_t alloc_size, +- enum dma_data_direction dir, unsigned long attrs) +-{ +- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start); +- unsigned long flags; +- phys_addr_t tlb_addr; +- unsigned int nslots, stride, index, wrap; +- int i; +- unsigned long mask; +- unsigned long offset_slots; +- unsigned long max_slots; +- unsigned long tmp_io_tlb_used; ++#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) + +- if (no_iotlb_memory) +- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); +- +- if (mem_encrypt_active()) +- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); +- +- if (mapping_size > alloc_size) { +- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", +- mapping_size, alloc_size); +- return (phys_addr_t)DMA_MAPPING_ERROR; +- } +- +- mask = dma_get_seg_boundary(hwdev); ++/* ++ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. ++ */ ++static inline unsigned long get_max_slots(unsigned long boundary_mask) ++{ ++ if (boundary_mask == ~0UL) ++ return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); ++ return nr_slots(boundary_mask + 1); ++} + +- tbl_dma_addr &= mask; ++static unsigned int wrap_index(unsigned int index) ++{ ++ if (index >= io_tlb_nslabs) ++ return 0; ++ return index; ++} + +- offset_slots = nr_slots(tbl_dma_addr); ++/* ++ * Find a suitable number of IO TLB entries size that will fit this request and ++ * allocate a buffer from that IO TLB pool. ++ */ ++static int find_slots(struct device *dev, size_t alloc_size) ++{ ++ unsigned long boundary_mask = dma_get_seg_boundary(dev); ++ dma_addr_t tbl_dma_addr = ++ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; ++ unsigned long max_slots = get_max_slots(boundary_mask); ++ unsigned int nslots = nr_slots(alloc_size), stride = 1; ++ unsigned int index, wrap, count = 0, i; ++ unsigned long flags; + +- /* +- * Carefully handle integer overflow which can occur when mask == ~0UL. +- */ +- max_slots = mask + 1 +- ? nr_slots(mask + 1) +- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); ++ BUG_ON(!nslots); + + /* + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. + */ +- nslots = nr_slots(alloc_size); + if (alloc_size >= PAGE_SIZE) +- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); +- else +- stride = 1; +- +- BUG_ON(!nslots); ++ stride <<= (PAGE_SHIFT - IO_TLB_SHIFT); + +- /* +- * Find suitable number of IO TLB entries size that will fit this +- * request and allocate a buffer from that IO TLB pool. +- */ + spin_lock_irqsave(&io_tlb_lock, flags); +- + if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) + goto not_found; + +- index = ALIGN(io_tlb_index, stride); +- if (index >= io_tlb_nslabs) +- index = 0; +- wrap = index; +- ++ index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); + do { +- while (iommu_is_span_boundary(index, nslots, offset_slots, +- max_slots)) { +- index += stride; +- if (index >= io_tlb_nslabs) +- index = 0; +- if (index == wrap) +- goto not_found; +- } +- + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot + * and mark the entries as '0' indicating unavailable. + */ +- if (io_tlb_list[index] >= nslots) { +- int count = 0; +- +- for (i = index; i < (int) (index + nslots); i++) +- io_tlb_list[i] = 0; +- for (i = index - 1; +- io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && +- io_tlb_list[i]; i--) +- io_tlb_list[i] = ++count; +- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); +- +- /* +- * Update the indices to avoid searching in the next +- * round. +- */ +- io_tlb_index = ((index + nslots) < io_tlb_nslabs +- ? (index + nslots) : 0); +- +- goto found; ++ if (!iommu_is_span_boundary(index, nslots, ++ nr_slots(tbl_dma_addr), ++ max_slots)) { ++ if (io_tlb_list[index] >= nslots) ++ goto found; + } +- index += stride; +- if (index >= io_tlb_nslabs) +- index = 0; ++ index = wrap_index(index + stride); + } while (index != wrap); + + not_found: +- tmp_io_tlb_used = io_tlb_used; +- + spin_unlock_irqrestore(&io_tlb_lock, flags); +- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) +- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", +- alloc_size, io_tlb_nslabs, tmp_io_tlb_used); +- return (phys_addr_t)DMA_MAPPING_ERROR; ++ return -1; ++ + found: ++ for (i = index; i < index + nslots; i++) ++ io_tlb_list[i] = 0; ++ for (i = index - 1; ++ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && ++ io_tlb_list[i]; i--) ++ io_tlb_list[i] = ++count; ++ ++ /* ++ * Update the indices to avoid searching in the next round. ++ */ ++ if (index + nslots < io_tlb_nslabs) ++ io_tlb_index = index + nslots; ++ else ++ io_tlb_index = 0; + io_tlb_used += nslots; ++ + spin_unlock_irqrestore(&io_tlb_lock, flags); ++ return index; ++} ++ ++phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ++ size_t mapping_size, size_t alloc_size, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ unsigned int index, i; ++ phys_addr_t tlb_addr; ++ ++ if (no_iotlb_memory) ++ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); ++ ++ if (mem_encrypt_active()) ++ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); ++ ++ if (mapping_size > alloc_size) { ++ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", ++ mapping_size, alloc_size); ++ return (phys_addr_t)DMA_MAPPING_ERROR; ++ } ++ ++ index = find_slots(dev, alloc_size); ++ if (index == -1) { ++ if (!(attrs & DMA_ATTR_NO_WARN)) ++ dev_warn_ratelimited(dev, ++ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", ++ alloc_size, io_tlb_nslabs, io_tlb_used); ++ return (phys_addr_t)DMA_MAPPING_ERROR; ++ } + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ +- for (i = 0; i < nslots; i++) +- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); ++ for (i = 0; i < nr_slots(alloc_size); i++) ++ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); ++ ++ tlb_addr = slot_addr(io_tlb_start, index); + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); +- + return tlb_addr; + } + diff --git a/queue-5.11/swiotlb-respect-min_align_mask.patch b/queue-5.11/swiotlb-respect-min_align_mask.patch new file mode 100644 index 00000000000..ec64c610aab --- /dev/null +++ b/queue-5.11/swiotlb-respect-min_align_mask.patch @@ -0,0 +1,142 @@ +From foo@baz Wed May 5 09:23:46 AM CEST 2021 +From: Jianxiong Gao +Date: Thu, 29 Apr 2021 17:33:14 +0000 +Subject: swiotlb: respect min_align_mask +To: stable@vger.kernel.org, hch@lst.de, marcorr@google.com, sashal@kernel.org +Cc: Jianxiong Gao , Konrad Rzeszutek Wilk +Message-ID: <20210429173315.1252465-9-jxgao@google.com> + +From: Jianxiong Gao + +commit: 1f221a0d0dbf0e48ef3a9c62871281d6a7819f05 + +swiotlb: respect min_align_mask + +Respect the min_align_mask in struct device_dma_parameters in swiotlb. + +There are two parts to it: + 1) for the lower bits of the alignment inside the io tlb slot, just + extent the size of the allocation and leave the start of the slot + empty + 2) for the high bits ensure we find a slot that matches the high bits + of the alignment to avoid wasting too much memory + +Based on an earlier patch from Jianxiong Gao . + +Signed-off-by: Christoph Hellwig +Acked-by: Jianxiong Gao +Tested-by: Jianxiong Gao +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jianxiong Gao +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/swiotlb.c | 41 +++++++++++++++++++++++++++++++---------- + 1 file changed, 31 insertions(+), 10 deletions(-) + +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -471,6 +471,14 @@ static void swiotlb_bounce(phys_addr_t o + #define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) + + /* ++ * Return the offset into a iotlb slot required to keep the device happy. ++ */ ++static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) ++{ ++ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); ++} ++ ++/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ + static inline unsigned long get_max_slots(unsigned long boundary_mask) +@@ -491,24 +499,29 @@ static unsigned int wrap_index(unsigned + * Find a suitable number of IO TLB entries size that will fit this request and + * allocate a buffer from that IO TLB pool. + */ +-static int find_slots(struct device *dev, size_t alloc_size) ++static int find_slots(struct device *dev, phys_addr_t orig_addr, ++ size_t alloc_size) + { + unsigned long boundary_mask = dma_get_seg_boundary(dev); + dma_addr_t tbl_dma_addr = + phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); +- unsigned int nslots = nr_slots(alloc_size), stride = 1; ++ unsigned int iotlb_align_mask = ++ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); ++ unsigned int nslots = nr_slots(alloc_size), stride; + unsigned int index, wrap, count = 0, i; + unsigned long flags; + + BUG_ON(!nslots); + + /* +- * For mappings greater than or equal to a page, we limit the stride +- * (and hence alignment) to a page size. ++ * For mappings with an alignment requirement don't bother looping to ++ * unaligned slots once we found an aligned one. For allocations of ++ * PAGE_SIZE or larger only look for page aligned allocations. + */ ++ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; + if (alloc_size >= PAGE_SIZE) +- stride <<= (PAGE_SHIFT - IO_TLB_SHIFT); ++ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); + + spin_lock_irqsave(&io_tlb_lock, flags); + if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) +@@ -516,6 +529,12 @@ static int find_slots(struct device *dev + + index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); + do { ++ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != ++ (orig_addr & iotlb_align_mask)) { ++ index = wrap_index(index + 1); ++ continue; ++ } ++ + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot +@@ -559,6 +578,7 @@ phys_addr_t swiotlb_tbl_map_single(struc + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs) + { ++ unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int index, i; + phys_addr_t tlb_addr; + +@@ -574,7 +594,7 @@ phys_addr_t swiotlb_tbl_map_single(struc + return (phys_addr_t)DMA_MAPPING_ERROR; + } + +- index = find_slots(dev, alloc_size); ++ index = find_slots(dev, orig_addr, alloc_size + offset); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, +@@ -588,10 +608,10 @@ phys_addr_t swiotlb_tbl_map_single(struc + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ +- for (i = 0; i < nr_slots(alloc_size); i++) ++ for (i = 0; i < nr_slots(alloc_size + offset); i++) + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); + +- tlb_addr = slot_addr(io_tlb_start, index); ++ tlb_addr = slot_addr(io_tlb_start, index) + offset; + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); +@@ -606,8 +626,9 @@ void swiotlb_tbl_unmap_single(struct dev + enum dma_data_direction dir, unsigned long attrs) + { + unsigned long flags; +- int i, count, nslots = nr_slots(alloc_size); +- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; ++ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); ++ int i, count, nslots = nr_slots(alloc_size + offset); ++ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; + + /*