]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
iommu/amd/pgtbl: Fix possible race while increase page table level
authorVasant Hegde <vasant.hegde@amd.com>
Sat, 13 Sep 2025 06:26:57 +0000 (06:26 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 25 Sep 2025 09:13:46 +0000 (11:13 +0200)
commit 1e56310b40fd2e7e0b9493da9ff488af145bdd0c upstream.

The AMD IOMMU host page table implementation supports dynamic page table levels
(up to 6 levels), starting with a 3-level configuration that expands based on
IOVA address. The kernel maintains a root pointer and current page table level
to enable proper page table walks in alloc_pte()/fetch_pte() operations.

The IOMMU IOVA allocator initially starts with 32-bit address and onces its
exhuasted it switches to 64-bit address (max address is determined based
on IOMMU and device DMA capability). To support larger IOVA, AMD IOMMU
driver increases page table level.

But in unmap path (iommu_v1_unmap_pages()), fetch_pte() reads
pgtable->[root/mode] without lock. So its possible that in exteme corner case,
when increase_address_space() is updating pgtable->[root/mode], fetch_pte()
reads wrong page table level (pgtable->mode). It does compare the value with
level encoded in page table and returns NULL. This will result is
iommu_unmap ops to fail and upper layer may retry/log WARN_ON.

CPU 0                                         CPU 1
------                                       ------
map pages                                    unmap pages
alloc_pte() -> increase_address_space()      iommu_v1_unmap_pages() -> fetch_pte()
  pgtable->root = pte (new root value)
                                             READ pgtable->[mode/root]
       Reads new root, old mode
  Updates mode (pgtable->mode += 1)

Since Page table level updates are infrequent and already synchronized with a
spinlock, implement seqcount to enable lock-free read operations on the read path.

Fixes: 754265bcab7 ("iommu/amd: Fix race in increase_address_space()")
Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Cc: stable@vger.kernel.org
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/iommu/amd/amd_iommu_types.h
drivers/iommu/amd/io_pgtable.c

index 6fb2f2919ab1ff1e3847c7b33a3ffdae4cdfd3f2..a14ee649d3da3b9fa5c6884eae79387436e130e1 100644 (file)
@@ -545,6 +545,7 @@ struct gcr3_tbl_info {
 };
 
 struct amd_io_pgtable {
+       seqcount_t              seqcount;       /* Protects root/mode update */
        struct io_pgtable       pgtbl;
        int                     mode;
        u64                     *root;
index f3399087859fd1c6107f4d14d94a6a77c5de9b4e..91cc1e0c663dbfc537c85500808e9db128b8030e 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
+#include <linux/seqlock.h>
 
 #include <asm/barrier.h>
 
@@ -144,8 +145,11 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
 
        *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
 
+       write_seqcount_begin(&pgtable->seqcount);
        pgtable->root  = pte;
        pgtable->mode += 1;
+       write_seqcount_end(&pgtable->seqcount);
+
        amd_iommu_update_and_flush_device_table(domain);
 
        pte = NULL;
@@ -167,6 +171,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
 {
        unsigned long last_addr = address + (page_size - 1);
        struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
+       unsigned int seqcount;
        int level, end_lvl;
        u64 *pte, *page;
 
@@ -184,8 +189,14 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
        }
 
 
-       level   = pgtable->mode - 1;
-       pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+       do {
+               seqcount = read_seqcount_begin(&pgtable->seqcount);
+
+               level   = pgtable->mode - 1;
+               pte     = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+       } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
+
        address = PAGE_SIZE_ALIGN(address, page_size);
        end_lvl = PAGE_SIZE_LEVEL(page_size);
 
@@ -262,6 +273,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
                      unsigned long *page_size)
 {
        int level;
+       unsigned int seqcount;
        u64 *pte;
 
        *page_size = 0;
@@ -269,8 +281,12 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
        if (address > PM_LEVEL_SIZE(pgtable->mode))
                return NULL;
 
-       level      =  pgtable->mode - 1;
-       pte        = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+       do {
+               seqcount = read_seqcount_begin(&pgtable->seqcount);
+               level      =  pgtable->mode - 1;
+               pte        = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+       } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
        *page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
        while (level > 0) {
@@ -552,6 +568,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
        if (!pgtable->root)
                return NULL;
        pgtable->mode = PAGE_MODE_3_LEVEL;
+       seqcount_init(&pgtable->seqcount);
 
        cfg->pgsize_bitmap  = amd_iommu_pgsize_bitmap;
        cfg->ias            = IOMMU_IN_ADDR_BIT_SIZE;