From: Jason Gunthorpe Date: Fri, 8 May 2026 14:53:07 +0000 (-0300) Subject: iommu/riscv: Add NAPOT range invalidation support X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=082ad5ed078516972571060b54643a0777486071;p=thirdparty%2Fkernel%2Flinux.git iommu/riscv: Add NAPOT range invalidation support Use the RISC-V IOMMU Address Range Invalidation extension (capabilities.S, spec section 9.3) to invalidate an IOVA range with a single IOTINVAL.VMA command using NAPOT-encoded addressing. One iommu_iotlb_gather maps to one NAPOT invalidation command. The smallest power-of-two aligned range covering the gather is used since over-invalidation is always safe. S and NL seem to be orthogonal in the spec, so if NL is not supported then global invalidation is probably always going to happen as wiping a large range without a table change is not common. Reviewed-by: Tomasz Jeznach Signed-off-by: Jason Gunthorpe Tested-by: Andrew Jones Signed-off-by: Joerg Roedel --- diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h index 8c60780363da7..f2ef9bd3cde96 100644 --- a/drivers/iommu/riscv/iommu-bits.h +++ b/drivers/iommu/riscv/iommu-bits.h @@ -64,6 +64,7 @@ #define RISCV_IOMMU_CAPABILITIES_PD17 BIT_ULL(39) #define RISCV_IOMMU_CAPABILITIES_PD20 BIT_ULL(40) #define RISCV_IOMMU_CAPABILITIES_NL BIT_ULL(42) +#define RISCV_IOMMU_CAPABILITIES_S BIT_ULL(43) /** * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings @@ -475,6 +476,7 @@ struct riscv_iommu_command { #define RISCV_IOMMU_CMD0_IOTINVAL_GV BIT_ULL(33) #define RISCV_IOMMU_CMD0_IOTINVAL_GSCID GENMASK_ULL(59, 44) #define RISCV_IOMMU_CMD0_IOTINVAL_NL BIT_ULL(34) +#define RISCV_IOMMU_CMD1_IOTINVAL_S BIT_ULL(9) /* dword1[61:10] is the 4K-aligned page address */ #define RISCV_IOMMU_CMD1_IOTINVAL_ADDR GENMASK_ULL(61, 10) @@ -731,6 +733,22 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd) cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_NL; } +/* + * Set NAPOT-encoded address for range invalidation (S=1). + * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages). + * addr must be naturally aligned to 2^sz_lg2. + */ +static inline void riscv_iommu_cmd_inval_set_napot( + struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2) +{ + u64 pfn = addr >> 12; + + pfn |= BIT_U64(sz_lg2 - 13) - 1; + cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD1_IOTINVAL_ADDR, pfn) | + RISCV_IOMMU_CMD1_IOTINVAL_S; + cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_AV; +} + static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd, int pscid) { diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 165ced9937562..cec3ddd7ab103 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -929,6 +929,10 @@ struct riscv_iommu_tlbi { u8 stride_lg2; unsigned int num; } single; + struct { + u8 sz_lg2; + u64 addr; + } range; }; static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi, @@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi, /* No level information available */ if (!combined) { tlbi->single.use_global = true; + tlbi->range.sz_lg2 = 0; return; } + /* + * Calculate the smallest NAPOT range containing [start, last]. + * NAPOT encoding requires a power-of-two sized, naturally aligned + * range. Over-invalidation is always safe. + */ + tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last); + if (unlikely(tlbi->range.sz_lg2 >= 64)) { + tlbi->single.use_global = true; + tlbi->range.sz_lg2 = 0; + return; + } + tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1); + /* * Calculate stride from the lowest changed level. RISC-V uses 4KiB * granule with 9 bits per level. @@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu, bool use_nl = tlbi->non_leaf && (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL); struct riscv_iommu_command cmd; - unsigned long iova; unsigned int i; riscv_iommu_cmd_inval_vma(&cmd); @@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu, * If non-leaf entries were changed and the IOMMU doesn't * support NL, we must fall back to global invalidation (AV=0). */ - if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl)) + if (tlbi->non_leaf && !use_nl) goto global; - iova = tlbi->start; - for (i = 0; i < tlbi->single.num; i++) { - riscv_iommu_cmd_inval_set_addr(&cmd, iova); + if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S && + tlbi->range.sz_lg2 >= 13) { + riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr, + tlbi->range.sz_lg2); if (use_nl) riscv_iommu_cmd_inval_set_nl(&cmd); riscv_iommu_cmd_send(iommu, &cmd); - iova += 1ULL << tlbi->single.stride_lg2; + } else { + unsigned long iova; + + if (tlbi->single.use_global) + goto global; + + iova = tlbi->start; + for (i = 0; i < tlbi->single.num; i++) { + riscv_iommu_cmd_inval_set_addr(&cmd, iova); + if (use_nl) + riscv_iommu_cmd_inval_set_nl(&cmd); + riscv_iommu_cmd_send(iommu, &cmd); + iova += 1ULL << tlbi->single.stride_lg2; + } } return; global: