]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/amdxdna: Add AIE4 VF hardware context create and destroy
authorDavid Zhang <yidong.zhang@amd.com>
Tue, 5 May 2026 16:09:33 +0000 (09:09 -0700)
committerLizhi Hou <lizhi.hou@amd.com>
Thu, 7 May 2026 21:07:34 +0000 (14:07 -0700)
Implement hardware context creation and destruction for AIE4 VF devices.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260505160936.3917732-4-lizhi.hou@amd.com
drivers/accel/amdxdna/Makefile
drivers/accel/amdxdna/aie4_ctx.c [new file with mode: 0644]
drivers/accel/amdxdna/aie4_host_queue.h [new file with mode: 0644]
drivers/accel/amdxdna/aie4_msg_priv.h
drivers/accel/amdxdna/aie4_pci.c
drivers/accel/amdxdna/aie4_pci.h
drivers/accel/amdxdna/amdxdna_ctx.c
drivers/accel/amdxdna/amdxdna_ctx.h
include/uapi/drm/amdxdna_accel.h

index d7720c8c8a9803fbca545d7e89183f2ed51db19d..05cce0a38692beab340d612d8720e0a3f3dfe9f6 100644 (file)
@@ -10,6 +10,7 @@ amdxdna-y := \
        aie2_pci.o \
        aie2_pm.o \
        aie2_solver.o \
+       aie4_ctx.o \
        aie4_message.o \
        aie4_pci.o \
        amdxdna_cbuf.o \
diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
new file mode 100644 (file)
index 0000000..84ac706
--- /dev/null
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "aie.h"
+#include "aie4_host_queue.h"
+#include "aie4_msg_priv.h"
+#include "aie4_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+static irqreturn_t cert_comp_isr(int irq, void *p)
+{
+       struct cert_comp *cert_comp = p;
+
+       wake_up_all(&cert_comp->waitq);
+       return IRQ_HANDLED;
+}
+
+static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx)
+{
+       struct amdxdna_dev *xdna = ndev->aie.xdna;
+       struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+       struct cert_comp *cert_comp;
+       int ret;
+
+       guard(mutex)(&ndev->cert_comp_lock);
+
+       cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx);
+       if (cert_comp) {
+               kref_get(&cert_comp->kref);
+               return cert_comp;
+       }
+
+       cert_comp = kzalloc_obj(*cert_comp);
+       if (!cert_comp)
+               return NULL;
+
+       cert_comp->ndev = ndev;
+       cert_comp->msix_idx = msix_idx;
+       init_waitqueue_head(&cert_comp->waitq);
+       kref_init(&cert_comp->kref);
+
+       ret = pci_irq_vector(pdev, cert_comp->msix_idx);
+       if (ret < 0) {
+               XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret);
+               goto free_cert_comp;
+       }
+       cert_comp->irq = ret;
+
+       ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp);
+       if (ret) {
+               XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret);
+               goto free_cert_comp;
+       }
+
+       ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL));
+       if (ret) {
+               XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret);
+               goto free_irq;
+       }
+
+       return cert_comp;
+
+free_irq:
+       free_irq(cert_comp->irq, cert_comp);
+free_cert_comp:
+       kfree(cert_comp);
+       return NULL;
+}
+
+static void cert_comp_release(struct kref *kref)
+{
+       struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref);
+       struct amdxdna_dev_hdl *ndev = cert_comp->ndev;
+
+       drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock));
+
+       xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx);
+       free_irq(cert_comp->irq, cert_comp);
+       kfree(cert_comp);
+}
+
+static void aie4_put_cert_comp(struct cert_comp *cert_comp)
+{
+       struct amdxdna_dev_hdl *ndev;
+
+       ndev = cert_comp->ndev;
+       guard(mutex)(&ndev->cert_comp_lock);
+       kref_put(&cert_comp->kref, cert_comp_release);
+}
+
+static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id)
+{
+       DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT);
+
+       req.hw_context_id = hw_context_id;
+       return aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+}
+
+static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx)
+{
+       DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT);
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_hwctx_priv *priv = hwctx->priv;
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+       int ret;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+       if (!ndev->partition_id || !hwctx->num_tiles) {
+               XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d",
+                        ndev->partition_id, hwctx->num_tiles);
+               return -EINVAL;
+       }
+
+       req.partition_id = ndev->partition_id;
+       req.request_num_tiles = hwctx->num_tiles;
+       req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) |
+               FIELD_PREP(AIE4_MSG_PASID_VLD, 1);
+       req.priority_band = hwctx->qos.priority;
+
+       req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+       req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+
+       XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]",
+                req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low);
+
+       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+       if (ret) {
+               XDNA_ERR(xdna, "create ctx failed: %d", ret);
+               return ret;
+       }
+
+       XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d",
+                resp.job_complete_msix_idx,
+                resp.hw_context_id,
+                resp.doorbell_offset);
+
+       /* setup interrupt completion per msix index */
+       priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx);
+       if (!priv->cert_comp) {
+               aie4_msg_destroy_context(ndev, resp.hw_context_id);
+               return -EINVAL;
+       }
+
+       priv->hw_ctx_id = resp.hw_context_id;
+       hwctx->doorbell_offset = resp.doorbell_offset;
+
+       return 0;
+}
+
+static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_hwctx_priv *priv = hwctx->priv;
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+       aie4_msg_destroy_context(ndev, priv->hw_ctx_id);
+       aie4_put_cert_comp(priv->cert_comp);
+}
+
+static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx)
+{
+       if (hwctx->priv && hwctx->priv->umq_bo)
+               amdxdna_gem_put_obj(hwctx->priv->umq_bo);
+}
+
+static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_hwctx_priv *priv = hwctx->priv;
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct amdxdna_gem_obj *umq_bo;
+       struct host_queue_header *qhdr;
+       int ret;
+
+       umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE);
+       if (!umq_bo) {
+               XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl);
+               return -ENOENT;
+       }
+       if (umq_bo->mem.size < sizeof(*qhdr)) {
+               XDNA_ERR(xdna, "umq_bo size is too small");
+               ret = -EINVAL;
+               goto put_umq_bo;
+       }
+
+       /* get kva address for host queue read index and write index */
+       qhdr = amdxdna_gem_vmap(umq_bo);
+       if (!qhdr) {
+               ret = -ENOMEM;
+               goto put_umq_bo;
+       }
+
+       priv->umq_bo = umq_bo;
+       priv->umq_read_index = &qhdr->read_index;
+       priv->umq_write_index = &qhdr->write_index;
+
+       return 0;
+
+put_umq_bo:
+       amdxdna_gem_put_obj(umq_bo);
+       return ret;
+}
+
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_hwctx_priv *priv;
+       int ret;
+
+       priv = kzalloc_obj(*priv);
+       if (!priv)
+               return -ENOMEM;
+       hwctx->priv = priv;
+
+       ret = aie4_hwctx_umq_init(hwctx);
+       if (ret)
+               goto free_priv;
+
+       ret = aie4_hwctx_create(hwctx);
+       if (ret)
+               goto umq_fini;
+
+       XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+       return 0;
+
+umq_fini:
+       aie4_hwctx_umq_fini(hwctx);
+free_priv:
+       kfree(priv);
+       hwctx->priv = NULL;
+       return ret;
+}
+
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+       aie4_hwctx_destroy(hwctx);
+       aie4_hwctx_umq_fini(hwctx);
+       kfree(hwctx->priv);
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
new file mode 100644 (file)
index 0000000..eb6a38d
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE4_HOST_QUEUE_H_
+#define _AIE4_HOST_QUEUE_H_
+
+#include <linux/types.h>
+
+struct host_queue_header {
+       __u64 read_index;
+       struct {
+               __u16 major;
+               __u16 minor;
+       } version;
+       __u32 capacity; /* Queue capacity, must be power of two. */
+       __u64 write_index;
+       __u64 data_address; /* The xdna dev addr for payload. */
+};
+
+#endif /* _AIE4_HOST_QUEUE_H_ */
index cada532579215c936424736c3a9dbad2cd833c71..7faa01ca3436105ab84d1a18284fe1e9fa94f62a 100644 (file)
@@ -16,6 +16,8 @@ enum aie4_msg_opcode {
 
        AIE4_MSG_OP_CREATE_PARTITION                 = 0x30001,
        AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
+       AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
+       AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
 };
 
 enum aie4_msg_status {
@@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp {
        enum aie4_msg_status status;
 } __packed;
 
+struct aie4_msg_create_hw_context_req {
+       __u32 partition_id;
+       __u32 request_num_tiles;
+       __u32 hsa_addr_high;
+       __u32 hsa_addr_low;
+#define AIE4_MSG_PASID GENMASK(19, 0)
+#define AIE4_MSG_PASID_VLD GENMASK(31, 31)
+       __u32 pasid;
+       __u32 priority_band;
+} __packed;
+
+struct aie4_msg_create_hw_context_resp {
+       enum aie4_msg_status status;
+       __u32 hw_context_id;
+       __u32 doorbell_offset;
+       __u32 job_complete_msix_idx;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_req {
+       __u32 hw_context_id;
+       __u32 resvd1;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_resp {
+       enum aie4_msg_status status;
+} __packed;
+
 #endif /* _AIE4_MSG_PRIV_H_ */
index 13f5d45e388de408675ac2dc11d5b875661b5750..3be9066b71782d57bdeb9bcb5d32b8a4e466157d 100644 (file)
@@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
        ndev->aie.xdna = xdna;
        xdna->dev_handle = ndev;
 
+       xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC);
+       mutex_init(&ndev->cert_comp_lock);
+
        /* Enable managed PCI device */
        ret = pcim_enable_device(pdev);
        if (ret) {
@@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = {
 const struct amdxdna_dev_ops aie4_vf_ops = {
        .init                   = aie4_vf_init,
        .fini                   = aie4_vf_fini,
+       .hwctx_init             = aie4_hwctx_init,
+       .hwctx_fini             = aie4_hwctx_fini,
 };
index 620fb5bd23e47d9b10a4583a586d9695661e4d40..6103007e6d2f7e9902fb894a08fadc59c4f37f61 100644 (file)
 #include "aie.h"
 #include "amdxdna_mailbox.h"
 
+struct cert_comp {
+       struct amdxdna_dev_hdl          *ndev;
+       u32                             msix_idx;
+       int                             irq;
+       struct kref                     kref;
+       wait_queue_head_t               waitq;
+};
+
+struct amdxdna_hwctx_priv {
+       struct amdxdna_gem_obj          *umq_bo;
+       u64                             *umq_read_index;
+       u64                             *umq_write_index;
+
+       struct cert_comp                *cert_comp;
+       u32                             hw_ctx_id;
+};
+
 struct amdxdna_dev_priv {
        const char              *npufw_path;
        const char              *certfw_path;
@@ -32,11 +49,18 @@ struct amdxdna_dev_hdl {
 
        struct mailbox                  *mbox;
        u32                             partition_id;
+
+       struct xarray                   cert_comp_xa; /* device level indexed by msix id */
+       struct mutex                    cert_comp_lock; /* protects cert_comp operations*/
 };
 
 /* aie4_message.c */
 int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
 
+/* aie4_ctx.c */
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+
 /* aie4_sriov.c */
 #if IS_ENABLED(CONFIG_PCI_IOV)
 int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs);
index 2c2c21992c87417e7594962037fa01e222c352b3..b5ad60d4b73411b57c8b6a09fd32fd4b5d7cba56 100644 (file)
@@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
        if (args->ext || args->ext_flags)
                return -EINVAL;
 
+       if (!xdna->dev_info->ops->hwctx_init)
+               return -EOPNOTSUPP;
+
        hwctx = kzalloc_obj(*hwctx);
        if (!hwctx)
                return -ENOMEM;
@@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
        hwctx->client = client;
        hwctx->fw_ctx_id = -1;
        hwctx->num_tiles = args->num_tiles;
+       hwctx->umq_bo_hdl = args->umq_bo;
+       hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET;
        hwctx->mem_size = args->mem_size;
        hwctx->max_opc = args->max_opc;
 
@@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 
        args->handle = hwctx->id;
        args->syncobj_handle = hwctx->syncobj_hdl;
+       args->umq_doorbell = hwctx->doorbell_offset;
 
        atomic64_set(&hwctx->job_submit_cnt, 0);
        atomic64_set(&hwctx->job_free_cnt, 0);
index 3557986873766019681240c13b539f23c2197ef2..c5622718b4d567ecb9cad08a72b96627d08c5d80 100644 (file)
@@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv;
 
 enum ert_cmd_opcode {
        ERT_START_CU = 0,
+       ERT_START_DPU = 18,
        ERT_CMD_CHAIN = 19,
        ERT_START_NPU = 20,
        ERT_START_NPU_PREEMPT = 21,
@@ -105,6 +106,8 @@ struct amdxdna_hwctx {
        u32                             *col_list;
        u32                             start_col;
        u32                             num_col;
+       u32                             umq_bo_hdl;
+       u32                             doorbell_offset;
        u32                             num_unused_col;
 
        struct amdxdna_qos_info              qos;
index 34212feee15c89e392ff21ca6d91165c72b3b9c5..ad9b33dd7b13a99e58d39f7a174df01700e77f2f 100644 (file)
@@ -18,6 +18,7 @@ extern "C" {
 #define AMDXDNA_INVALID_CTX_HANDLE     0
 #define AMDXDNA_INVALID_BO_HANDLE      0
 #define AMDXDNA_INVALID_FENCE_HANDLE   0
+#define AMDXDNA_INVALID_DOORBELL_OFFSET        (~0U)
 
 /*
  * Define hardware context priority