From: Nicolin Chen Date: Thu, 29 Jan 2026 13:32:04 +0000 (+0000) Subject: hw/arm/smmuv3-accel: Add set/unset_iommu_device callback X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=703b7b91db064b94bb67bc49eaf503b2d6a8d333;p=thirdparty%2Fqemu.git hw/arm/smmuv3-accel: Add set/unset_iommu_device callback Implement the VFIO/PCI callbacks to attach and detach a HostIOMMUDevice to a vSMMUv3 when accel=on, - set_iommu_device(): attach a HostIOMMUDevice to a vIOMMU - unset_iommu_device(): detach and release associated resources In SMMUv3 accel=on mode, the guest SMMUv3 is backed by the host SMMUv3 via IOMMUFD. A vIOMMU object (created via IOMMU_VIOMMU_ALLOC) provides a per-VM, security-isolated handle to the physical SMMUv3. Without a vIOMMU, the vSMMUv3 cannot relay guest operations to the host hardware nor maintain isolation across VMs or devices. Therefore, set_iommu_device() allocates a vIOMMU object if one does not already exist. There are two main points to consider in this implementation: 1) VFIO core allocates and attaches a S2 HWPT that acts as the nesting parent for nested HWPTs(IOMMU_DOMAIN_NESTED). This parent HWPT will be shared across multiple vSMMU instances within a VM. 2) A device cannot attach directly to a vIOMMU. Instead, it attaches through a proxy nested HWPT (IOMMU_DOMAIN_NESTED). Based on the STE configuration,there are three types of nested HWPTs: bypass, abort, and translate. -The bypass and abort proxy HWPTs are pre-allocated. When SMMUv3 operates in global abort or bypass modes, as controlled by the GBPA register, or issues a vSTE for bypass or abort we attach these pre-allocated nested HWPTs. -The translate HWPT requires a vDEVICE to be allocated first, since invalidations and events depend on a valid vSID. -The vDEVICE allocation and attach operations for vSTE based HWPTs are implemented in subsequent patches. In summary, a device placed behind a vSMMU instance must have a vSID for translate vSTE. The bypass and abort vSTEs are pre-allocated as proxy nested HWPTs and is attached based on GBPA register. The core-managed nesting parent S2 HWPT is used as parent S2 HWPT for all the nested HWPTs and is intended to be shared across vSMMU instances within the same VM. set_iommu_device(): - Reuse an existing vIOMMU for the same physical SMMU if available. If not, allocate a new one using the nesting parent S2 HWPT. - Pre-allocate two proxy nested HWPTs (bypass and abort) under the vIOMMU and install one based on GBPA.ABORT value. - Add the device to the vIOMMU’s device list. unset_iommu_device(): - Re-attach device to the nesting parent S2 HWPT. - Remove the device from the vIOMMU’s device list. - If the list is empty, free the proxy HWPTs (bypass and abort) and release the vIOMMU object. Introduce struct SMMUv3AccelState, representing an accelerated SMMUv3 instance backed by an iommufd vIOMMU object, and storing the bypass and abort proxy HWPT IDs. Signed-off-by: Nicolin Chen Signed-off-by: Shameer Kolothum Tested-by: Zhangfei Gao Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Shameer Kolothum Message-id: 20260126104342.253965-13-skolothumtho@nvidia.com Signed-off-by: Peter Maydell --- diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c index be09cf8b73..9c2b917a11 100644 --- a/hw/arm/smmuv3-accel.c +++ b/hw/arm/smmuv3-accel.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "trace.h" #include "hw/arm/smmuv3.h" #include "hw/core/iommu.h" @@ -15,6 +16,7 @@ #include "hw/pci-host/gpex.h" #include "hw/vfio/pci.h" +#include "smmuv3-internal.h" #include "smmuv3-accel.h" /* @@ -43,6 +45,157 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus, return accel_dev; } +static uint32_t smmuv3_accel_gbpa_hwpt(SMMUv3State *s, SMMUv3AccelState *accel) +{ + return FIELD_EX32(s->gbpa, GBPA, ABORT) ? + accel->abort_hwpt_id : accel->bypass_hwpt_id; +} + +static bool +smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + SMMUv3AccelState *accel = s->s_accel; + struct iommu_hwpt_arm_smmuv3 bypass_data = { + .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL }, + }; + struct iommu_hwpt_arm_smmuv3 abort_data = { + .ste = { SMMU_STE_VALID, 0x0ULL }, + }; + uint32_t s2_hwpt_id = idev->hwpt_id; + uint32_t viommu_id, hwpt_id; + IOMMUFDViommu *viommu; + + if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid, + IOMMU_VIOMMU_TYPE_ARM_SMMUV3, + s2_hwpt_id, &viommu_id, errp)) { + return false; + } + + viommu = g_new0(IOMMUFDViommu, 1); + viommu->viommu_id = viommu_id; + viommu->s2_hwpt_id = s2_hwpt_id; + viommu->iommufd = idev->iommufd; + + /* + * Pre-allocate HWPTs for S1 bypass and abort cases. These will be attached + * later for guest STEs or GBPAs that require bypass or abort configuration. + */ + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id, + 0, IOMMU_HWPT_DATA_ARM_SMMUV3, + sizeof(abort_data), &abort_data, + &accel->abort_hwpt_id, errp)) { + goto free_viommu; + } + + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id, + 0, IOMMU_HWPT_DATA_ARM_SMMUV3, + sizeof(bypass_data), &bypass_data, + &accel->bypass_hwpt_id, errp)) { + goto free_abort_hwpt; + } + + /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */ + hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel); + if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) { + goto free_bypass_hwpt; + } + accel->viommu = viommu; + return true; + +free_bypass_hwpt: + iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id); +free_abort_hwpt: + iommufd_backend_free_id(idev->iommufd, accel->abort_hwpt_id); +free_viommu: + iommufd_backend_free_id(idev->iommufd, viommu->viommu_id); + g_free(viommu); + return false; +} + +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + SMMUState *bs = opaque; + SMMUv3State *s = ARM_SMMUV3(bs); + SMMUPciBus *sbus = smmu_get_sbus(bs, bus); + SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn); + + if (!idev) { + return true; + } + + if (accel_dev->idev) { + if (accel_dev->idev != idev) { + error_setg(errp, "Device already has an associated idev 0x%x", + idev->devid); + return false; + } + return true; + } + + if (s->s_accel->viommu) { + goto done; + } + + if (!smmuv3_accel_alloc_viommu(s, idev, errp)) { + error_append_hint(errp, "Unable to alloc vIOMMU: idev devid 0x%x: ", + idev->devid); + return false; + } + +done: + accel_dev->idev = idev; + accel_dev->s_accel = s->s_accel; + QLIST_INSERT_HEAD(&s->s_accel->device_list, accel_dev, next); + trace_smmuv3_accel_set_iommu_device(devfn, idev->devid); + return true; +} + +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque, + int devfn) +{ + SMMUState *bs = opaque; + SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus); + HostIOMMUDeviceIOMMUFD *idev; + SMMUv3AccelDevice *accel_dev; + SMMUv3AccelState *accel; + SMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev); + idev = accel_dev->idev; + accel = accel_dev->s_accel; + /* Re-attach the default s2 hwpt id */ + if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) { + error_report("Unable to attach the default HW pagetable: idev devid " + "0x%x", idev->devid); + } + + accel_dev->idev = NULL; + accel_dev->s_accel = NULL; + QLIST_REMOVE(accel_dev, next); + trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid); + + if (QLIST_EMPTY(&accel->device_list)) { + iommufd_backend_free_id(accel->viommu->iommufd, accel->bypass_hwpt_id); + iommufd_backend_free_id(accel->viommu->iommufd, accel->abort_hwpt_id); + iommufd_backend_free_id(accel->viommu->iommufd, + accel->viommu->viommu_id); + g_free(accel->viommu); + accel->viommu = NULL; + } +} + /* * Only allow PCIe bridges, pxb-pcie roots, and GPEX roots so vfio-pci * endpoints can sit downstream. Accelerated SMMUv3 requires a vfio-pci @@ -145,6 +298,8 @@ static const PCIIOMMUOps smmuv3_accel_ops = { .supports_address_space = smmuv3_accel_supports_as, .get_address_space = smmuv3_accel_find_add_as, .get_viommu_flags = smmuv3_accel_get_viommu_flags, + .set_iommu_device = smmuv3_accel_set_iommu_device, + .unset_iommu_device = smmuv3_accel_unset_iommu_device, }; static void smmuv3_accel_as_init(SMMUv3State *s) @@ -168,6 +323,7 @@ void smmuv3_accel_init(SMMUv3State *s) { SMMUState *bs = ARM_SMMU(s); + s->s_accel = g_new0(SMMUv3AccelState, 1); bs->iommu_ops = &smmuv3_accel_ops; smmuv3_accel_as_init(s); } diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h index 0dc6b00d35..efb631db4f 100644 --- a/hw/arm/smmuv3-accel.h +++ b/hw/arm/smmuv3-accel.h @@ -10,10 +10,28 @@ #define HW_ARM_SMMUV3_ACCEL_H #include "hw/arm/smmu-common.h" +#include "system/iommufd.h" +#ifdef CONFIG_LINUX +#include +#endif #include CONFIG_DEVICES +/* + * Represents an accelerated SMMU instance backed by an iommufd vIOMMU object. + * Holds bypass and abort proxy HWPT IDs used for device attachment. + */ +typedef struct SMMUv3AccelState { + IOMMUFDViommu *viommu; + uint32_t bypass_hwpt_id; + uint32_t abort_hwpt_id; + QLIST_HEAD(, SMMUv3AccelDevice) device_list; +} SMMUv3AccelState; + typedef struct SMMUv3AccelDevice { SMMUDevice sdev; + HostIOMMUDeviceIOMMUFD *idev; + QLIST_ENTRY(SMMUv3AccelDevice) next; + SMMUv3AccelState *s_accel; } SMMUv3AccelDevice; #ifdef CONFIG_ARM_SMMUV3_ACCEL diff --git a/hw/arm/trace-events b/hw/arm/trace-events index f3386bd7ae..2aaa0c40c7 100644 --- a/hw/arm/trace-events +++ b/hw/arm/trace-events @@ -66,6 +66,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d" smmu_reset_exit(void) "" +#smmuv3-accel.c +smmuv3_accel_set_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev devid=0x%x)" +smmuv3_accel_unset_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev devid=0x%x)" + # strongarm.c strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d" strongarm_ssp_read_underrun(void) "SSP rx underrun" diff --git a/include/hw/arm/smmuv3-common.h b/include/hw/arm/smmuv3-common.h index f644618f38..153310248d 100644 --- a/include/hw/arm/smmuv3-common.h +++ b/include/hw/arm/smmuv3-common.h @@ -100,6 +100,9 @@ REG32(STE_7, 28) #define STE_CFG_ABORT(config) (!(config & 0x4)) #define STE_CFG_BYPASS(config) (config == 0x4) +#define SMMU_STE_VALID (1ULL << 0) +#define SMMU_STE_CFG_BYPASS (1ULL << 3) + /* Update STE fields */ #define STE_SET_VALID(ste, v) \ ((ste)->word[0] = FIELD_DP32((ste)->word[0], STE_0, VALID, (v))) diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index bb7076286b..e54ece2d38 100644 --- a/include/hw/arm/smmuv3.h +++ b/include/hw/arm/smmuv3.h @@ -66,6 +66,7 @@ struct SMMUv3State { /* SMMU has HW accelerator support for nested S1 + s2 */ bool accel; + struct SMMUv3AccelState *s_accel; }; typedef enum {