]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/amdxdna: Add command doorbell and wait support
authorDavid Zhang <yidong.zhang@amd.com>
Tue, 5 May 2026 16:09:34 +0000 (09:09 -0700)
committerLizhi Hou <lizhi.hou@amd.com>
Thu, 7 May 2026 21:07:34 +0000 (14:07 -0700)
Expose the command doorbell register to userspace on a per-hardware
context basis, enabling applications to notify the firmware of pending
commands via doorbell writes.

Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
completion of individual commands.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260505160936.3917732-5-lizhi.hou@amd.com
drivers/accel/amdxdna/aie4_ctx.c
drivers/accel/amdxdna/aie4_host_queue.h
drivers/accel/amdxdna/aie4_pci.c
drivers/accel/amdxdna/aie4_pci.h
drivers/accel/amdxdna/amdxdna_ctx.c
drivers/accel/amdxdna/amdxdna_ctx.h
drivers/accel/amdxdna/amdxdna_gem.c
drivers/accel/amdxdna/amdxdna_pci_drv.c
drivers/accel/amdxdna/amdxdna_pci_drv.h
drivers/accel/amdxdna/npu3_regs.c
include/uapi/drm/amdxdna_accel.h

index 84ac706d0ffb340fba14ca47bb8ef25329a348ec..8408b0d2696f62a6f81f4ae4d06b9b8e1c77310c 100644 (file)
@@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
        aie4_hwctx_umq_fini(hwctx);
        kfree(hwctx->priv);
 }
+
+static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
+{
+       return (write >= read) && ((write - read) <= capacity);
+}
+
+static u64 get_read_index(struct amdxdna_hwctx *hwctx)
+{
+       u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
+       u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+       /*
+        * CERT cannot update read index as uint64 atomically. Driver may read
+        * half-updated read index when it has bits in high 32bit. In case read
+        * index is not valid, wait for some time and retry once. It should
+        * allow CERT to complete the read index update.
+        */
+       if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+               XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
+               usleep_range(100, 200);
+               ri = READ_ONCE(*hwctx->priv->umq_read_index);
+               if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+                       XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
+                       ri = 0;
+               }
+       }
+
+       return ri;
+}
+
+static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+       u64 read_idx = get_read_index(hwctx);
+
+       return read_idx > seq;
+}
+
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
+{
+       unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
+       struct amdxdna_hwctx_priv *priv = hwctx->priv;
+       struct cert_comp *cert_comp = priv->cert_comp;
+       long ret;
+
+       if (timeout)
+               wait_jifs = msecs_to_jiffies(timeout);
+
+       ret = wait_event_interruptible_timeout(cert_comp->waitq,
+                                              (check_cmd_done(hwctx, seq)),
+                                              wait_jifs);
+
+       if (!ret)
+               ret = -ETIME;
+
+       return ret <= 0 ? ret : 0;
+}
+
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
+{
+       struct amdxdna_hwctx *hwctx;
+       unsigned long hwctx_id;
+       int idx;
+
+       idx = srcu_read_lock(&client->hwctx_srcu);
+       amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+               if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
+                       srcu_read_unlock(&client->hwctx_srcu, idx);
+                       return 1;
+               }
+       }
+       srcu_read_unlock(&client->hwctx_srcu, idx);
+
+       return 0;
+}
index eb6a38dfb53e5cdca2747e6ea974f4532918ab97..1b33eda3f7278c5abaa18af648a755f465883cee 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define CTX_MAX_CMDS                    32
+
 struct host_queue_header {
        __u64 read_index;
        struct {
index 3be9066b71782d57bdeb9bcb5d32b8a4e466157d..9ff34ce57fcbfa2ff3b265332f8946461e08f11b 100644 (file)
@@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
        return 0;
 }
 
+static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
+{
+       struct amdxdna_dev *xdna = client->xdna;
+       struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+       const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
+       phys_addr_t res_start;
+       unsigned long pfn;
+       int ret;
+
+       if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
+               XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff);
+               return -EINVAL;
+       }
+
+       if (vma_pages(vma) != 1) {
+               XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
+               return -EINVAL;
+       }
+
+       res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
+       pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
+       ret = io_remap_pfn_range(vma, vma->vm_start,
+                                pfn,
+                                PAGE_SIZE,
+                                vma->vm_page_prot);
+
+       XDNA_DBG(xdna, "doorbell ret %d", ret);
+       return ret;
+}
+
 static int aie4_pf_init(struct amdxdna_dev *xdna)
 {
        int ret;
@@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
        .fini                   = aie4_vf_fini,
        .hwctx_init             = aie4_hwctx_init,
        .hwctx_fini             = aie4_hwctx_fini,
+       .mmap                   = aie4_doorbell_mmap,
+       .cmd_wait               = aie4_cmd_wait,
 };
index 6103007e6d2f7e9902fb894a08fadc59c4f37f61..b69489acd53d36211a61681b69f896c78f464cb7 100644 (file)
@@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
        u32                     mbox_bar;
        u32                     mbox_rbuf_bar;
        u64                     mbox_info_off;
+       u32                     doorbell_off;
 
        struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
        struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
@@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
 /* aie4_ctx.c */
 int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
 void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
 
 /* aie4_sriov.c */
 #if IS_ENABLED(CONFIG_PCI_IOV)
index b5ad60d4b73411b57c8b6a09fd32fd4b5d7cba56..b79229a63af31786fe01c66ec73c384b1cd1b59c 100644 (file)
@@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
        XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
        return -EINVAL;
 }
+
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+       struct amdxdna_client *client = filp->driver_priv;
+       struct amdxdna_dev *xdna = to_xdna_dev(dev);
+       struct amdxdna_drm_wait_cmd *args = data;
+       struct amdxdna_hwctx *hwctx;
+       int ret, idx;
+
+       XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
+                client->pid, args->hwctx, args->timeout, args->seq);
+
+       if (!xdna->dev_info->ops->cmd_wait)
+               return -EOPNOTSUPP;
+
+       idx = srcu_read_lock(&client->hwctx_srcu);
+       hwctx = xa_load(&client->hwctx_xa, args->hwctx);
+       if (!hwctx) {
+               XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
+               ret = -EINVAL;
+               goto unlock_ctx_srcu;
+       }
+
+       ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout);
+
+       XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
+                client->pid, args->hwctx, args->seq, ret);
+
+       trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
+
+unlock_ctx_srcu:
+       srcu_read_unlock(&client->hwctx_srcu, idx);
+       return ret;
+}
index c5622718b4d567ecb9cad08a72b96627d08c5d80..6e3c6371a088521c57dfec3319ed9480318a6a89 100644 (file)
@@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
                       u32 *arg_bo_hdls, u32 arg_bo_cnt,
                       u32 hwctx_hdl, u64 *seq);
 
-int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
-                    u64 seq, u32 timeout);
-
 int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 #endif /* _AMDXDNA_CTX_H_ */
index ebfc472aa9e7fb175323c9eff27b81efbbeb22ca..319d2064fafa4739a4610fe0904ed3f28a26a22b 100644 (file)
@@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
        mmu_interval_set_seq(&mapp->notifier, cur_seq);
        up_write(&xdna->notifier_lock);
 
-       xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+       if (xdna->dev_info->ops->hmm_invalidate)
+               xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
 
        if (range->event == MMU_NOTIFY_UNMAP) {
                down_write(&xdna->notifier_lock);
@@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
        u32 nr_pages;
        int ret;
 
-       if (!xdna->dev_info->ops->hmm_invalidate)
+       if (!amdxdna_pasid_on(abo->client))
                return 0;
 
        mapp = kzalloc_obj(*mapp);
index 39ad081ac08269d9bdc52984eb6c7f3b9f74adab..c0d00db25cde5f8f482f7e83960e1447c927ed78 100644 (file)
@@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
        return ret;
 }
 
+static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       struct drm_file *drm_filp = filp->private_data;
+       struct amdxdna_client *client = drm_filp->driver_priv;
+       struct amdxdna_dev *xdna = client->xdna;
+
+       if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
+               return drm_gem_mmap(filp, vma);
+
+       if (!xdna->dev_info->ops->mmap)
+               return -EOPNOTSUPP;
+
+       return xdna->dev_info->ops->mmap(client, vma);
+}
+
 static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
        /* Context */
        DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
        DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
        /* Execution */
        DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+       DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
        /* AIE hardware */
        DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
        DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
@@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
        .poll           = drm_poll,
        .read           = drm_read,
        .llseek         = noop_llseek,
-       .mmap           = drm_gem_mmap,
+       .mmap           = amdxdna_drm_gem_mmap,
        .show_fdinfo    = drm_show_fdinfo,
        .fop_flags      = FOP_UNSIGNED_OFFSET,
 };
index caed11c09e55c202a50c3c2d41dbb59c2ed1a2e8..471b72299aeef02d6a30338eee2a7fa501235c59 100644 (file)
@@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
        int (*resume)(struct amdxdna_dev *xdna);
        int (*suspend)(struct amdxdna_dev *xdna);
        int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
+       int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
        int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
        void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
        int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
        int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
        void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
        int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+       int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
        int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
        int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
        int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
@@ -85,6 +87,7 @@ struct amdxdna_dev_info {
        int                             sram_bar;
        int                             psp_bar;
        int                             smu_bar;
+       int                             doorbell_bar;
        int                             device_type;
        int                             first_col;
        u32                             dev_mem_buf_shift;
index 6d5da779232ba4995c0adcafdba8f28901f34bc7..d76b2e99c3089017328c74e6be91cc30c3fd9f23 100644 (file)
@@ -14,6 +14,9 @@
 #define NPU3_MBOX_BUFFER_BAR   2
 #define NPU3_MBOX_INFO_OFF     0x0
 
+#define NPU3_DOORBELL_BAR       2
+#define NPU3_DOORBELL_OFF       0x0
+
 /* PCIe BAR Index for NPU3 */
 #define NPU3_REG_BAR_INDEX     0
 #define NPU3_PSP_BAR_INDEX      4
@@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
        .mbox_bar               = NPU3_MBOX_BAR,
        .mbox_rbuf_bar          = NPU3_MBOX_BUFFER_BAR,
        .mbox_info_off          = NPU3_MBOX_INFO_OFF,
+       .doorbell_off           = NPU3_DOORBELL_OFF,
        .psp_regs_off   = {
                DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
                DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
@@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
 const struct amdxdna_dev_info dev_npu3_vf_info = {
        .mbox_bar               = NPU3_MBOX_BAR,
        .sram_bar               = NPU3_MBOX_BUFFER_BAR,
+       .doorbell_bar           = NPU3_DOORBELL_BAR,
        .default_vbnv           = "RyzenAI-npu3-vf",
        .device_type            = AMDXDNA_DEV_TYPE_UMQ,
        .dev_priv               = &npu3_dev_vf_priv,
index ad9b33dd7b13a99e58d39f7a174df01700e77f2f..51a507561df6a15ff76d6c236299a8bdc55d2465 100644 (file)
@@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
        DRM_AMDXDNA_EXEC_CMD,
        DRM_AMDXDNA_GET_INFO,
        DRM_AMDXDNA_SET_STATE,
-       DRM_AMDXDNA_GET_ARRAY = 10,
+       DRM_AMDXDNA_WAIT_CMD,
+       DRM_AMDXDNA_GET_ARRAY,
 };
 
 /**
@@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
        __u64 seq;
 };
 
+/**
+ * struct amdxdna_drm_wait_cmd - Wait execution command.
+ *
+ * @hwctx: Context handle.
+ * @timeout: timeout in ms, 0 implies infinite wait.
+ * @seq: sequence number of the command returned by execute command.
+ *
+ * Wait a command specified by seq to be completed.
+ */
+struct amdxdna_drm_wait_cmd {
+       __u32 hwctx;
+       __u32 timeout;
+       __u64 seq;
+};
+
 /**
  * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
  * @buffer: The user space buffer that will return the AIE status.
@@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
        DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
                 struct amdxdna_drm_get_array)
 
+#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
+       DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
+               struct amdxdna_drm_wait_cmd)
+
 #if defined(__cplusplus)
 } /* extern c end */
 #endif