#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/generic_pt/iommu.h>
#include "../iommu-pages.h"
#include "iommu-bits.h"
/* This struct contains protection domain specific IOMMU driver data. */
struct riscv_iommu_domain {
- struct iommu_domain domain;
+ union {
+ struct iommu_domain domain;
+ struct pt_iommu_riscv_64 riscvpt;
+ };
struct list_head bonds;
spinlock_t lock; /* protect bonds list updates. */
int pscid;
- int numa_node;
- unsigned int pgd_mode;
- unsigned long *pgd_root;
};
+PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain);
#define iommu_domain_to_riscv(iommu_domain) \
container_of(iommu_domain, struct riscv_iommu_domain, domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
-}
-
-#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
-
-#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
-#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
-#define _io_pte_none(pte) ((pte) == 0)
-#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
-
-static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
- unsigned long pte,
- struct iommu_pages_list *freelist)
-{
- unsigned long *ptr;
- int i;
-
- if (!_io_pte_present(pte) || _io_pte_leaf(pte))
- return;
-
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
-
- /* Recursively free all sub page table pages */
- for (i = 0; i < PTRS_PER_PTE; i++) {
- pte = READ_ONCE(ptr[i]);
- if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
- riscv_iommu_pte_free(domain, pte, freelist);
- }
-
- if (freelist)
- iommu_pages_list_add(freelist, ptr);
- else
- iommu_free_pages(ptr);
-}
-
-static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
- unsigned long iova, size_t pgsize,
- gfp_t gfp)
-{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte, old;
- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
- void *addr;
-
- do {
- const int shift = PAGE_SHIFT + PT_SHIFT * level;
-
- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
- /*
- * Note: returned entry might be a non-leaf if there was
- * existing mapping with smaller granularity. Up to the caller
- * to replace and invalidate.
- */
- if (((size_t)1 << shift) == pgsize)
- return ptr;
-pte_retry:
- pte = READ_ONCE(*ptr);
- /*
- * This is very likely incorrect as we should not be adding
- * new mapping with smaller granularity on top
- * of existing 2M/1G mapping. Fail.
- */
- if (_io_pte_present(pte) && _io_pte_leaf(pte))
- return NULL;
- /*
- * Non-leaf entry is missing, allocate and try to add to the
- * page table. This might race with other mappings, retry.
- */
- if (_io_pte_none(pte)) {
- addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
- SZ_4K);
- if (!addr)
- return NULL;
- old = pte;
- pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
- if (cmpxchg_relaxed(ptr, old, pte) != old) {
- iommu_free_pages(addr);
- goto pte_retry;
- }
- }
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
- } while (level-- > 0);
-
- return NULL;
-}
-
-static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
- unsigned long iova, size_t *pte_pgsize)
-{
- unsigned long *ptr = domain->pgd_root;
- unsigned long pte;
- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
-
- do {
- const int shift = PAGE_SHIFT + PT_SHIFT * level;
-
- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
- pte = READ_ONCE(*ptr);
- if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
- *pte_pgsize = (size_t)1 << shift;
- return ptr;
- }
- if (_io_pte_none(pte))
- return NULL;
- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
- } while (level-- > 0);
-
- return NULL;
-}
-
-static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
- unsigned long iova, phys_addr_t phys,
- size_t pgsize, size_t pgcount, int prot,
- gfp_t gfp, size_t *mapped)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t size = 0;
- unsigned long *ptr;
- unsigned long pte, old, pte_prot;
- int rc = 0;
- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
-
- if (!(prot & IOMMU_WRITE))
- pte_prot = _PAGE_BASE | _PAGE_READ;
- else
- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
-
- while (pgcount) {
- ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);
- if (!ptr) {
- rc = -ENOMEM;
- break;
- }
-
- old = READ_ONCE(*ptr);
- pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
- if (cmpxchg_relaxed(ptr, old, pte) != old)
- continue;
-
- riscv_iommu_pte_free(domain, old, &freelist);
-
- size += pgsize;
- iova += pgsize;
- phys += pgsize;
- --pgcount;
- }
-
- *mapped = size;
-
- if (!iommu_pages_list_empty(&freelist)) {
+ if (iommu_pages_list_empty(&gather->freelist)) {
+ riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
+ } else {
/*
* In 1.0 spec version, the smallest scope we can use to
* invalidate all levels of page table (i.e. leaf and non-leaf)
* capability.NL (non-leaf) IOTINVAL command.
*/
riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
- iommu_put_pages_list(&freelist);
- }
-
- return rc;
-}
-
-static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
- unsigned long iova, size_t pgsize,
- size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t size = pgcount << __ffs(pgsize);
- unsigned long *ptr, old;
- size_t unmapped = 0;
- size_t pte_size;
-
- while (unmapped < size) {
- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
- if (!ptr)
- return unmapped;
-
- /* partial unmap is not allowed, fail. */
- if (iova & (pte_size - 1))
- return unmapped;
-
- old = READ_ONCE(*ptr);
- if (cmpxchg_relaxed(ptr, old, 0) != old)
- continue;
-
- iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
- pte_size);
-
- iova += pte_size;
- unmapped += pte_size;
+ iommu_put_pages_list(&gather->freelist);
}
-
- return unmapped;
-}
-
-static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
- dma_addr_t iova)
-{
- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- size_t pte_size;
- unsigned long *ptr;
-
- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
- if (!ptr)
- return 0;
-
- return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
}
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- const unsigned long pfn = virt_to_pfn(domain->pgd_root);
WARN_ON(!list_empty(&domain->bonds));
if ((int)domain->pscid > 0)
ida_free(&riscv_iommu_pscids, domain->pscid);
- riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
+ pt_iommu_deinit(&domain->riscvpt.iommu);
kfree(domain);
}
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
+ struct pt_iommu_riscv_64_hw_info pt_info;
u64 fsc, ta;
- if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
+ pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info);
+
+ if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode))
return -ENODEV;
- fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
- FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+ fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) |
+ FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn);
ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
RISCV_IOMMU_PC_TA_V;
}
static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
+ IOMMU_PT_DOMAIN_OPS(riscv_64),
.attach_dev = riscv_iommu_attach_paging_domain,
.free = riscv_iommu_free_paging_domain,
- .map_pages = riscv_iommu_map_pages,
- .unmap_pages = riscv_iommu_unmap_pages,
- .iova_to_phys = riscv_iommu_iova_to_phys,
.iotlb_sync = riscv_iommu_iotlb_sync,
.flush_iotlb_all = riscv_iommu_iotlb_flush_all,
};
static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
{
+ struct pt_iommu_riscv_64_cfg cfg = {};
struct riscv_iommu_domain *domain;
struct riscv_iommu_device *iommu;
- unsigned int pgd_mode;
- dma_addr_t va_mask;
- int va_bits;
+ int ret;
iommu = dev_to_iommu(dev);
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;
- va_bits = 57;
+ cfg.common.hw_max_vasz_lg2 = 57;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;
- va_bits = 48;
+ cfg.common.hw_max_vasz_lg2 = 48;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {
- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;
- va_bits = 39;
+ cfg.common.hw_max_vasz_lg2 = 39;
} else {
dev_err(dev, "cannot find supported page table mode\n");
return ERR_PTR(-ENODEV);
}
+ cfg.common.hw_max_oasz_lg2 = 56;
domain = kzalloc_obj(*domain);
if (!domain)
INIT_LIST_HEAD_RCU(&domain->bonds);
spin_lock_init(&domain->lock);
- domain->numa_node = dev_to_node(iommu->dev);
- domain->pgd_mode = pgd_mode;
- domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
- GFP_KERNEL_ACCOUNT, SZ_4K);
- if (!domain->pgd_root) {
- kfree(domain);
- return ERR_PTR(-ENOMEM);
- }
+ cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
+ BIT(PT_FEAT_FLUSH_RANGE);
+ domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
+ domain->domain.ops = &riscv_iommu_paging_domain_ops;
domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
if (domain->pscid < 0) {
- iommu_free_pages(domain->pgd_root);
- kfree(domain);
+ riscv_iommu_free_paging_domain(&domain->domain);
return ERR_PTR(-ENOMEM);
}
- /*
- * Note: RISC-V Privilege spec mandates that virtual addresses
- * need to be sign-extended, so if (VA_BITS - 1) is set, all
- * bits >= VA_BITS need to also be set or else we'll get a
- * page fault. However the code that creates the mappings
- * above us (e.g. iommu_dma_alloc_iova()) won't do that for us
- * for now, so we'll end up with invalid virtual addresses
- * to map. As a workaround until we get this sorted out
- * limit the available virtual addresses to VA_BITS - 1.
- */
- va_mask = DMA_BIT_MASK(va_bits - 1);
-
- domain->domain.geometry.aperture_start = 0;
- domain->domain.geometry.aperture_end = va_mask;
- domain->domain.geometry.force_aperture = true;
- domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
-
- domain->domain.ops = &riscv_iommu_paging_domain_ops;
-
+ ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL);
+ if (ret) {
+ riscv_iommu_free_paging_domain(&domain->domain);
+ return ERR_PTR(ret);
+ }
return &domain->domain;
}
riscv_iommu_queue_disable(&iommu->cmdq);
return rc;
}
+
+MODULE_IMPORT_NS("GENERIC_PT_IOMMU");