From 7a2ce64950fbe43038eeba1889a142c5784f5021 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Fri, 19 Sep 2025 21:35:01 +0000 Subject: [PATCH] amd_iommu: Add a page walker to sync shadow page tables on invalidation For the specified address range, walk the page table identifying regions as mapped or unmapped and invoke registered notifiers with the corresponding event type. Signed-off-by: Alejandro Jimenez Reviewed-by: Michael S. Tsirkin Message-ID: <20250919213515.917111-9-alejandro.j.jimenez@oracle.com> Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index c25981ff93..0e45435c77 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -681,6 +681,86 @@ fetch_pte(AMDVIAddressSpace *as, hwaddr address, uint64_t dte, uint64_t *pte, return 0; } +/* + * Walk the guest page table for an IOVA and range and signal the registered + * notifiers to sync the shadow page tables in the host. + * Must be called with a valid DTE for DMA remapping i.e. V=1,TV=1 + */ +static void __attribute__((unused)) +amdvi_sync_shadow_page_table_range(AMDVIAddressSpace *as, uint64_t *dte, + hwaddr addr, uint64_t size, bool send_unmap) +{ + IOMMUTLBEvent event; + + hwaddr iova_next, page_mask, pagesize; + hwaddr iova = addr; + hwaddr end = iova + size - 1; + + uint64_t pte; + int ret; + + while (iova < end) { + + ret = fetch_pte(as, iova, dte[0], &pte, &pagesize); + + if (ret == -AMDVI_FR_PT_ROOT_INV) { + /* + * Invalid conditions such as the IOVA being larger than supported + * by current page table mode as configured in the DTE, or a failure + * to fetch the Page Table from the Page Table Root Pointer in DTE. + */ + assert(pagesize == 0); + return; + } + /* PTE has been validated for major errors and pagesize is set */ + assert(pagesize); + page_mask = ~(pagesize - 1); + iova_next = (iova & page_mask) + pagesize; + + if (ret == -AMDVI_FR_PT_ENTRY_INV) { + /* + * Failure to read PTE from memory, the pagesize matches the current + * level. Unable to determine the region type, so a safe strategy is + * to skip the range and continue the page walk. + */ + goto next; + } + + event.entry.target_as = &address_space_memory; + event.entry.iova = iova & page_mask; + /* translated_addr is irrelevant for the unmap case */ + event.entry.translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & + page_mask; + event.entry.addr_mask = ~page_mask; + event.entry.perm = amdvi_get_perms(pte); + + /* + * In cases where the leaf PTE is not found, or it has invalid + * permissions, an UNMAP type notification is sent, but only if the + * caller requested it. + */ + if (!IOMMU_PTE_PRESENT(pte) || (event.entry.perm == IOMMU_NONE)) { + if (!send_unmap) { + goto next; + } + event.type = IOMMU_NOTIFIER_UNMAP; + } else { + event.type = IOMMU_NOTIFIER_MAP; + } + + /* Invoke the notifiers registered for this address space */ + memory_region_notify_iommu(&as->iommu, 0, event); + +next: + /* Check for 64-bit overflow and terminate walk in such cases */ + if (iova_next < iova) { + break; + } else { + iova = iova_next; + } + } +} + /* log error without aborting since linux seems to be using reserved bits */ static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) { -- 2.47.3