]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
iommu/arm-smmu-v3: Perform per-domain invalidations using arm_smmu_invs
authorNicolin Chen <nicolinc@nvidia.com>
Tue, 17 Mar 2026 07:59:23 +0000 (00:59 -0700)
committerWill Deacon <will@kernel.org>
Thu, 19 Mar 2026 15:08:22 +0000 (15:08 +0000)
Replace the old invalidation functions with arm_smmu_domain_inv_range() in
all the existing invalidation routines. And deprecate the old functions.

The new arm_smmu_domain_inv_range() handles the CMDQ_MAX_TLBI_OPS as well,
so drop it in the SVA function.

Since arm_smmu_cmdq_batch_add_range() has only one caller now, and it must
be given a valid size, add a WARN_ON_ONCE to catch any missed case.

Also update the comments in arm_smmu_tlb_inv_context() to clarify things
with the new invalidation functions.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Will Deacon <will@kernel.org>
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h

index 440ad8cc07de28d4c444a42232e96ca9863c45de..f1f8e01a7e9142a755395fdd09eafcf2e05256ba 100644 (file)
@@ -122,15 +122,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
 }
 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
 
-/*
- * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
- * is used as a threshold to replace per-page TLBI commands to issue in the
- * command queue with an address-space TLBI command, when SMMU w/o a range
- * invalidation feature handles too many per-page TLBI commands, which will
- * otherwise result in a soft lockup.
- */
-#define CMDQ_MAX_TLBI_OPS              (1 << (PAGE_SHIFT - 3))
-
 static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
                                                struct mm_struct *mm,
                                                unsigned long start,
@@ -146,21 +137,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
         * range. So do a simple translation here by calculating size correctly.
         */
        size = end - start;
-       if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
-               if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
-                       size = 0;
-       } else {
-               if (size == ULONG_MAX)
-                       size = 0;
-       }
-
-       if (!size)
-               arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
-       else
-               arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid,
-                                           PAGE_SIZE, false, smmu_domain);
 
-       arm_smmu_atc_inv_domain(smmu_domain, start, size);
+       arm_smmu_domain_inv_range(smmu_domain, start, size, PAGE_SIZE, false);
 }
 
 static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -191,8 +169,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
        }
        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
-       arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
-       arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
+       arm_smmu_domain_inv(smmu_domain);
 }
 
 static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
@@ -302,7 +279,7 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
        /*
         * Ensure the ASID is empty in the iommu cache before allowing reuse.
         */
-       arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
+       arm_smmu_domain_inv(smmu_domain);
 
        /*
         * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can
index 19e47c614ba7f0643b9fc3a0e406cf15022af41e..01030ffd2fe23134ecf18d1deba65bd2abae72ee 100644 (file)
@@ -1289,16 +1289,6 @@ struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)
 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge);
 
 /* Context descriptor manipulation functions */
-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
-{
-       struct arm_smmu_cmdq_ent cmd = {
-               .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
-                       CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
-               .tlbi.asid = asid,
-       };
-
-       arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
-}
 
 /*
  * Based on the value of ent report which bits of the STE the HW will access. It
@@ -2509,90 +2499,27 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
        return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
 }
 
-int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
-                           unsigned long iova, size_t size)
-{
-       struct arm_smmu_master_domain *master_domain;
-       int i;
-       unsigned long flags;
-       struct arm_smmu_cmdq_ent cmd = {
-               .opcode = CMDQ_OP_ATC_INV,
-       };
-       struct arm_smmu_cmdq_batch cmds;
-
-       if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
-               return 0;
-
-       /*
-        * Ensure that we've completed prior invalidation of the main TLBs
-        * before we read 'nr_ats_masters' in case of a concurrent call to
-        * arm_smmu_enable_ats():
-        *
-        *      // unmap()                      // arm_smmu_enable_ats()
-        *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
-        *      smp_mb();                       [...]
-        *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
-        *
-        * Ensures that we always see the incremented 'nr_ats_masters' count if
-        * ATS was enabled at the PCI device before completion of the TLBI.
-        */
-       smp_mb();
-       if (!atomic_read(&smmu_domain->nr_ats_masters))
-               return 0;
-
-       arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
-
-       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-       list_for_each_entry(master_domain, &smmu_domain->devices,
-                           devices_elm) {
-               struct arm_smmu_master *master = master_domain->master;
-
-               if (!master->ats_enabled)
-                       continue;
-
-               if (master_domain->nested_ats_flush) {
-                       /*
-                        * If a S2 used as a nesting parent is changed we have
-                        * no option but to completely flush the ATC.
-                        */
-                       arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
-               } else {
-                       arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
-                                               &cmd);
-               }
-
-               for (i = 0; i < master->num_streams; i++) {
-                       cmd.atc.sid = master->streams[i].id;
-                       arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
-               }
-       }
-       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-
-       return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
-}
-
 /* IO_PGTABLE API */
 static void arm_smmu_tlb_inv_context(void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
-       struct arm_smmu_cmdq_ent cmd;
 
        /*
-        * NOTE: when io-pgtable is in non-strict mode, we may get here with
-        * PTEs previously cleared by unmaps on the current CPU not yet visible
-        * to the SMMU. We are relying on the dma_wmb() implicit during cmd
-        * insertion to guarantee those are observed before the TLBI. Do be
-        * careful, 007.
+        * If the DMA API is running in non-strict mode then another CPU could
+        * have changed the page table and not invoked any flush op. Instead the
+        * other CPU will do an atomic_read() and this CPU will have done an
+        * atomic_write(). That handshake is enough to acquire the page table
+        * writes from the other CPU.
+        *
+        * All command execution has a dma_wmb() to release all the in-memory
+        * structures written by this CPU, that barrier must also release the
+        * writes acquired from all the other CPUs too.
+        *
+        * There are other barriers and atomics on this path, but the above is
+        * the essential mechanism for ensuring that HW sees the page table
+        * writes from another CPU before it executes the IOTLB invalidation.
         */
-       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
-       } else {
-               cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
-               cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
-               arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
-       }
-       arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
+       arm_smmu_domain_inv(smmu_domain);
 }
 
 static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
@@ -2604,7 +2531,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
        unsigned long end = iova + size, num_pages = 0, tg = pgsize;
        size_t inv_range = granule;
 
-       if (!size)
+       if (WARN_ON_ONCE(!size))
                return;
 
        if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
@@ -2659,76 +2586,6 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
        }
 }
 
-static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
-                                    unsigned long iova, size_t size,
-                                    size_t granule,
-                                    struct arm_smmu_domain *smmu_domain)
-{
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
-       struct arm_smmu_cmdq_batch cmds;
-       size_t pgsize;
-
-       /* Get the leaf page size */
-       pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
-
-       arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
-       arm_smmu_cmdq_batch_add_range(smmu, &cmds, cmd, iova, size, granule,
-                                     pgsize);
-       arm_smmu_cmdq_batch_submit(smmu, &cmds);
-}
-
-static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
-                                         size_t granule, bool leaf,
-                                         struct arm_smmu_domain *smmu_domain)
-{
-       struct arm_smmu_cmdq_ent cmd = {
-               .tlbi = {
-                       .leaf   = leaf,
-               },
-       };
-
-       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
-                                 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
-               cmd.tlbi.asid   = smmu_domain->cd.asid;
-       } else {
-               cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
-               cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
-       }
-       __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
-
-       if (smmu_domain->nest_parent) {
-               /*
-                * When the S2 domain changes all the nested S1 ASIDs have to be
-                * flushed too.
-                */
-               cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
-               arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
-       }
-
-       /*
-        * Unfortunately, this can't be leaf-only since we may have
-        * zapped an entire table.
-        */
-       arm_smmu_atc_inv_domain(smmu_domain, iova, size);
-}
-
-void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
-                                size_t granule, bool leaf,
-                                struct arm_smmu_domain *smmu_domain)
-{
-       struct arm_smmu_cmdq_ent cmd = {
-               .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
-                         CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
-               .tlbi = {
-                       .asid   = asid,
-                       .leaf   = leaf,
-               },
-       };
-
-       __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
-}
-
 static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
                                      size_t granule)
 {
@@ -2930,7 +2787,9 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
                                  size_t granule, void *cookie)
 {
-       arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
+       struct arm_smmu_domain *smmu_domain = cookie;
+
+       arm_smmu_domain_inv_range(smmu_domain, iova, size, granule, false);
 }
 
 static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -4201,9 +4060,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
        if (!gather->pgsize)
                return;
 
-       arm_smmu_tlb_inv_range_domain(gather->start,
-                                     gather->end - gather->start + 1,
-                                     gather->pgsize, true, smmu_domain);
+       arm_smmu_domain_inv_range(smmu_domain, gather->start,
+                                 gather->end - gather->start + 1,
+                                 gather->pgsize, true);
 }
 
 static phys_addr_t
index 534e9a5ddca313332993e069833e1c1eda0ce00b..36de2b0b2ebe6cf50b743bcb9685ec51473392c9 100644 (file)
@@ -1080,13 +1080,6 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
                       struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
                       struct arm_smmu_cd *cd, struct iommu_domain *old);
 
-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
-void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
-                                size_t granule, bool leaf,
-                                struct arm_smmu_domain *smmu_domain);
-int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
-                           unsigned long iova, size_t size);
-
 void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
                               unsigned long iova, size_t size,
                               unsigned int granule, bool leaf);