From 15abfced803929f935bb59a0e1b02558bd8325c4 Mon Sep 17 00:00:00 2001 From: "Guo Ren (Alibaba DAMO Academy)" Date: Sat, 13 Sep 2025 00:12:33 -0400 Subject: [PATCH] hw/riscv/riscv-iommu: Fixup PDT Nested Walk MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Current implementation is wrong when iohgatp != bare. The RISC-V IOMMU specification has defined that the PDT is based on GPA, not SPA. So this patch fixes the problem, making PDT walk correctly when the G-stage table walk is enabled. Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation") Cc: qemu-stable@nongnu.org Cc: Sebastien Boeuf Cc: Tomasz Jeznach Reviewed-by: Weiwei Li Reviewed-by: Nutty Liu Signed-off-by: Guo Ren (Alibaba DAMO Academy) Tested-by: Chen Pei Tested-by: Fangyu Yu  Message-ID: <20250913041233.972870-1-guoren@kernel.org> [ Changes by AF: - Add braces to if statements ] Signed-off-by: Alistair Francis --- hw/riscv/riscv-iommu.c | 143 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 2 deletions(-) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index 155190d032d..b33c7fe3259 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -869,6 +869,145 @@ static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, return true; } +/** + * pdt_memory_read: PDT wrapper of dma_memory_read. + * + * @s: IOMMU Device State + * @ctx: Device Translation Context with devid and pasid set + * @addr: address within that address space + * @buf: buffer with the data transferred + * @len: length of the data transferred + * @attrs: memory transaction attributes + */ +static MemTxResult pdt_memory_read(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, + dma_addr_t addr, + void *buf, dma_addr_t len, + MemTxAttrs attrs) +{ + uint64_t gatp_mode, pte; + struct { + unsigned char step; + unsigned char levels; + unsigned char ptidxbits; + unsigned char ptesize; + } sc; + MemTxResult ret; + dma_addr_t base = addr; + + /* G stages translation mode */ + gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { + goto out; + } + + /* G stages translation tables root pointer */ + base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); + + /* Start at step 0 */ + sc.step = 0; + + if (s->fctl & RISCV_IOMMU_FCTL_GXL) { + /* 32bit mode for GXL == 1 */ + switch (gatp_mode) { + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 2; + sc.ptidxbits = 10; + sc.ptesize = 4; + break; + default: + return MEMTX_ACCESS_ERROR; + } + } else { + /* 64bit mode for GXL == 0 */ + switch (gatp_mode) { + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 3; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 4; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 5; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + default: + return MEMTX_ACCESS_ERROR; + } + } + + do { + const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; + const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * + (sc.levels - 1 - sc.step); + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); + const dma_addr_t pte_addr = base + idx * sc.ptesize; + + /* Address range check before first level lookup */ + if (!sc.step) { + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; + if ((addr & va_mask) != addr) { + return MEMTX_ACCESS_ERROR; + } + } + + /* Read page table entry */ + if (sc.ptesize == 4) { + uint32_t pte32 = 0; + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); + pte = pte32; + } else { + ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); + } + if (ret != MEMTX_OK) { + return ret; + } + + sc.step++; + hwaddr ppn = pte >> PTE_PPN_SHIFT; + + if (!(pte & PTE_V)) { + return MEMTX_ACCESS_ERROR; /* Invalid PTE */ + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */ + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { + return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ + } else { + /* Leaf PTE, translation completed. */ + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); + break; + } + + if (sc.step == sc.levels) { + return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ + } + } while (1); + +out: + return dma_memory_read(s->target_as, base, buf, len, attrs); +} + /* * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk * @@ -1041,7 +1180,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) */ const int split = depth * 9 + 8; addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; - if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + if (pdt_memory_read(s, ctx, addr, &de, sizeof(de), MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; } @@ -1056,7 +1195,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) /* Leaf entry in PDT */ addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; - if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, + if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2, MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; } -- 2.47.3