]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
iommu/riscv: Add NAPOT range invalidation support
authorJason Gunthorpe <jgg@nvidia.com>
Fri, 8 May 2026 14:53:07 +0000 (11:53 -0300)
committerJoerg Roedel <joerg.roedel@amd.com>
Tue, 19 May 2026 08:48:09 +0000 (10:48 +0200)
Use the RISC-V IOMMU Address Range Invalidation extension
(capabilities.S, spec section 9.3) to invalidate an IOVA range with
a single IOTINVAL.VMA command using NAPOT-encoded addressing.

One iommu_iotlb_gather maps to one NAPOT invalidation command. The
smallest power-of-two aligned range covering the gather is used since
over-invalidation is always safe.

S and NL seem to be orthogonal in the spec, so if NL is not
supported then global invalidation is probably always going to happen
as wiping a large range without a table change is not common.

Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Andrew Jones <andrew.jones@oss.qualcomm.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
drivers/iommu/riscv/iommu-bits.h
drivers/iommu/riscv/iommu.c

index 8c60780363da720c8e54088cbf4527d4d440bc4c..f2ef9bd3cde960a3c6c2f86032a958226dfb4c38 100644 (file)
@@ -64,6 +64,7 @@
 #define RISCV_IOMMU_CAPABILITIES_PD17          BIT_ULL(39)
 #define RISCV_IOMMU_CAPABILITIES_PD20          BIT_ULL(40)
 #define RISCV_IOMMU_CAPABILITIES_NL            BIT_ULL(42)
+#define RISCV_IOMMU_CAPABILITIES_S             BIT_ULL(43)
 
 /**
  * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
@@ -475,6 +476,7 @@ struct riscv_iommu_command {
 #define RISCV_IOMMU_CMD0_IOTINVAL_GV           BIT_ULL(33)
 #define RISCV_IOMMU_CMD0_IOTINVAL_GSCID                GENMASK_ULL(59, 44)
 #define RISCV_IOMMU_CMD0_IOTINVAL_NL           BIT_ULL(34)
+#define RISCV_IOMMU_CMD1_IOTINVAL_S            BIT_ULL(9)
 /* dword1[61:10] is the 4K-aligned page address */
 #define RISCV_IOMMU_CMD1_IOTINVAL_ADDR         GENMASK_ULL(61, 10)
 
@@ -731,6 +733,22 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
        cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_NL;
 }
 
+/*
+ * Set NAPOT-encoded address for range invalidation (S=1).
+ * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
+ * addr must be naturally aligned to 2^sz_lg2.
+ */
+static inline void riscv_iommu_cmd_inval_set_napot(
+       struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
+{
+       u64 pfn = addr >> 12;
+
+       pfn |= BIT_U64(sz_lg2 - 13) - 1;
+       cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD1_IOTINVAL_ADDR, pfn) |
+                     RISCV_IOMMU_CMD1_IOTINVAL_S;
+       cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_AV;
+}
+
 static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
                                                   int pscid)
 {
index 165ced9937562b533723e4ed2bd526b6e11ecac2..cec3ddd7ab1032f3ee2d66d21ef6bea3dd545d17 100644 (file)
@@ -929,6 +929,10 @@ struct riscv_iommu_tlbi {
                u8 stride_lg2;
                unsigned int num;
        } single;
+       struct {
+               u8 sz_lg2;
+               u64 addr;
+       } range;
 };
 
 static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
@@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
        /* No level information available */
        if (!combined) {
                tlbi->single.use_global = true;
+               tlbi->range.sz_lg2 = 0;
                return;
        }
 
+       /*
+        * Calculate the smallest NAPOT range containing [start, last].
+        * NAPOT encoding requires a power-of-two sized, naturally aligned
+        * range. Over-invalidation is always safe.
+        */
+       tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
+       if (unlikely(tlbi->range.sz_lg2 >= 64)) {
+               tlbi->single.use_global = true;
+               tlbi->range.sz_lg2 = 0;
+               return;
+       }
+       tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
+
        /*
         * Calculate stride from the lowest changed level. RISC-V uses 4KiB
         * granule with 9 bits per level.
@@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
        bool use_nl = tlbi->non_leaf &&
                      (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
        struct riscv_iommu_command cmd;
-       unsigned long iova;
        unsigned int i;
 
        riscv_iommu_cmd_inval_vma(&cmd);
@@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
         * If non-leaf entries were changed and the IOMMU doesn't
         * support NL, we must fall back to global invalidation (AV=0).
         */
-       if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl))
+       if (tlbi->non_leaf && !use_nl)
                goto global;
 
-       iova = tlbi->start;
-       for (i = 0; i < tlbi->single.num; i++) {
-               riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+       if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
+           tlbi->range.sz_lg2 >= 13) {
+               riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
+                                               tlbi->range.sz_lg2);
                if (use_nl)
                        riscv_iommu_cmd_inval_set_nl(&cmd);
                riscv_iommu_cmd_send(iommu, &cmd);
-               iova += 1ULL << tlbi->single.stride_lg2;
+       } else {
+               unsigned long iova;
+
+               if (tlbi->single.use_global)
+                       goto global;
+
+               iova = tlbi->start;
+               for (i = 0; i < tlbi->single.num; i++) {
+                       riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+                       if (use_nl)
+                               riscv_iommu_cmd_inval_set_nl(&cmd);
+                       riscv_iommu_cmd_send(iommu, &cmd);
+                       iova += 1ULL << tlbi->single.stride_lg2;
+               }
        }
        return;
 global: