From: Barry Song Date: Sat, 28 Feb 2026 22:13:37 +0000 (+0800) Subject: dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=661f8a193d48d123aedcbd401ace137333d02523;p=thirdparty%2Fkernel%2Flinux.git dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg Extending these APIs with a flush argument: dma_direct_unmap_phys(), dma_direct_map_phys(), and dma_direct_sync_single_for_cpu(). For single-buffer cases, flush=true would be used, while for SG cases flush=false would be used, followed by a single flush after all cache operations are issued in dma_direct_{map,unmap}_sg(). This ultimately benefits dma_map_sg() and dma_unmap_sg(). Cc: Catalin Marinas Cc: Will Deacon Cc: Marek Szyprowski Cc: Robin Murphy Cc: Ada Couprie Diaz Cc: Ard Biesheuvel Cc: Marc Zyngier Cc: Anshuman Khandual Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tangquan Zheng Reviewed-by: Leon Romanovsky Tested-by: Xueyuan Chen Signed-off-by: Barry Song Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260228221337.59951-1-21cnbao@gmail.com --- diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index c7666e5d5e7c1..ec887f4437418 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -444,14 +444,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, { struct scatterlist *sg; int i; + bool need_sync = false; for_each_sg(sgl, sg, nents, i) { - if (sg_dma_is_bus_address(sg)) + if (sg_dma_is_bus_address(sg)) { sg_dma_unmark_bus_address(sg); - else + } else { + need_sync = true; dma_direct_unmap_phys(dev, sg->dma_address, - sg_dma_len(sg), dir, attrs); + sg_dma_len(sg), dir, attrs, false); + } } + if (need_sync && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); } #endif @@ -461,6 +466,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, struct pci_p2pdma_map_state p2pdma_state = {}; struct scatterlist *sg; int i, ret; + bool need_sync = false; for_each_sg(sgl, sg, nents, i) { switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { @@ -472,8 +478,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, */ break; case PCI_P2PDMA_MAP_NONE: + need_sync = true; sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), - sg->length, dir, attrs); + sg->length, dir, attrs, false); if (sg->dma_address == DMA_MAPPING_ERROR) { ret = -EIO; goto out_unmap; @@ -492,6 +499,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, sg_dma_len(sg) = sg->length; } + if (need_sync && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); return nents; out_unmap: diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index f925a7e8b000f..52b361e667008 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -67,13 +67,15 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, } static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) + dma_addr_t addr, size_t size, enum dma_data_direction dir, + bool flush) { phys_addr_t paddr = dma_to_phys(dev, addr); if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_flush(); + if (flush) + arch_sync_dma_flush(); arch_sync_dma_for_cpu_all(); } @@ -82,7 +84,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, static inline dma_addr_t dma_direct_map_phys(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, - unsigned long attrs) + unsigned long attrs, bool flush) { dma_addr_t dma_addr; @@ -111,7 +113,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, if (!dev_is_dma_coherent(dev) && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { arch_sync_dma_for_device(phys, size, dir); - arch_sync_dma_flush(); + if (flush) + arch_sync_dma_flush(); } return dma_addr; @@ -124,7 +127,8 @@ err_overflow: } static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs, + bool flush) { phys_addr_t phys; @@ -134,7 +138,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, phys = dma_to_phys(dev, addr); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); + dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush); swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 3928a509c44c2..78d8b4039c3e6 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -166,7 +166,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, if (dma_map_direct(dev, ops) || (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) - addr = dma_direct_map_phys(dev, phys, size, dir, attrs); + addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true); else if (use_dma_iommu(dev)) addr = iommu_dma_map_phys(dev, phys, size, dir, attrs); else if (ops->map_phys) @@ -207,7 +207,7 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops) || (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size))) - dma_direct_unmap_phys(dev, addr, size, dir, attrs); + dma_direct_unmap_phys(dev, addr, size, dir, attrs, true); else if (use_dma_iommu(dev)) iommu_dma_unmap_phys(dev, addr, size, dir, attrs); else if (ops->unmap_phys) @@ -373,7 +373,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); + dma_direct_sync_single_for_cpu(dev, addr, size, dir, true); else if (use_dma_iommu(dev)) iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu)