]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/amdxdna: Support preemption requests
authorLizhi Hou <lizhi.hou@amd.com>
Tue, 4 Nov 2025 18:53:39 +0000 (10:53 -0800)
committerLizhi Hou <lizhi.hou@amd.com>
Wed, 5 Nov 2025 16:56:28 +0000 (08:56 -0800)
The driver checks the firmware version during initialization.If preemption
is supported, the driver configures preemption accordingly and handles
userspace preemption requests. Otherwise, the driver returns an error for
userspace preemption requests.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251104185340.897560-1-lizhi.hou@amd.com
drivers/accel/amdxdna/aie2_message.c
drivers/accel/amdxdna/aie2_msg_priv.h
drivers/accel/amdxdna/aie2_pci.c
drivers/accel/amdxdna/aie2_pci.h
drivers/accel/amdxdna/amdxdna_ctx.h
drivers/accel/amdxdna/amdxdna_pci_drv.c
drivers/accel/amdxdna/npu4_regs.c
include/uapi/drm/amdxdna_accel.h

index 69cdce9ff2083ceba6b9e5f370284b32273fa17f..d493bb1c33606fb094cc162c4b9d1eedd3f116bb 100644 (file)
@@ -210,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
        hwctx->fw_ctx_id = resp.context_id;
        WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
 
+       if (ndev->force_preempt_enabled) {
+               ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
+               if (ret) {
+                       XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
+                       return ret;
+               }
+       }
+
        cq_pair = &resp.cq_pair[0];
        x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
        x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
@@ -601,6 +609,11 @@ aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
        return 0;
 }
 
+static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+       return -EOPNOTSUPP;
+}
+
 static u32 aie2_get_chain_msg_op(u32 cmd_op)
 {
        switch (cmd_op) {
@@ -621,6 +634,8 @@ static struct aie2_exec_msg_ops legacy_exec_message_ops = {
        .init_chain_req = aie2_init_exec_chain_req,
        .fill_cf_slot = aie2_cmdlist_fill_cf,
        .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+       .fill_preempt_slot = aie2_cmdlist_unsupp,
+       .fill_elf_slot = aie2_cmdlist_unsupp,
        .get_chain_msg_op = aie2_get_chain_msg_op,
 };
 
@@ -680,6 +695,74 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
        return 0;
 }
 
+static int
+aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+       struct cmd_chain_slot_npu *npu_slot = slot;
+       struct amdxdna_cmd_preempt_data *pd;
+       u32 cmd_len;
+       u32 arg_sz;
+
+       pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       arg_sz = cmd_len - sizeof(*pd);
+       if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+               return -EINVAL;
+
+       if (*size < sizeof(*npu_slot) + arg_sz)
+               return -EINVAL;
+
+       npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+       if (npu_slot->cu_idx == INVALID_CU_IDX)
+               return -EINVAL;
+
+       memset(npu_slot, 0, sizeof(*npu_slot));
+       npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
+       npu_slot->inst_buf_addr = pd->inst_buf;
+       npu_slot->save_buf_addr = pd->save_buf;
+       npu_slot->restore_buf_addr = pd->restore_buf;
+       npu_slot->inst_size = pd->inst_size;
+       npu_slot->save_size = pd->save_size;
+       npu_slot->restore_size = pd->restore_size;
+       npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+       npu_slot->arg_cnt = arg_sz / sizeof(u32);
+       memcpy(npu_slot->args, pd->prop_args, arg_sz);
+
+       *size = sizeof(*npu_slot) + arg_sz;
+       return 0;
+}
+
+static int
+aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+       struct cmd_chain_slot_npu *npu_slot = slot;
+       struct amdxdna_cmd_preempt_data *pd;
+       u32 cmd_len;
+       u32 arg_sz;
+
+       pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+       arg_sz = cmd_len - sizeof(*pd);
+       if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
+               return -EINVAL;
+
+       if (*size < sizeof(*npu_slot) + arg_sz)
+               return -EINVAL;
+
+       memset(npu_slot, 0, sizeof(*npu_slot));
+       npu_slot->type = EXEC_NPU_TYPE_ELF;
+       npu_slot->inst_buf_addr = pd->inst_buf;
+       npu_slot->save_buf_addr = pd->save_buf;
+       npu_slot->restore_buf_addr = pd->restore_buf;
+       npu_slot->inst_size = pd->inst_size;
+       npu_slot->save_size = pd->save_size;
+       npu_slot->restore_size = pd->restore_size;
+       npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
+       npu_slot->arg_cnt = 1;
+       npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
+
+       *size = struct_size(npu_slot, args, npu_slot->arg_cnt);
+       return 0;
+}
+
 static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
 {
        return MSG_OP_CHAIN_EXEC_NPU;
@@ -691,6 +774,8 @@ static struct aie2_exec_msg_ops npu_exec_message_ops = {
        .init_chain_req = aie2_init_npu_chain_req,
        .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
        .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+       .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
+       .fill_elf_slot = aie2_cmdlist_fill_npu_elf,
        .get_chain_msg_op = aie2_get_npu_chain_msg_op,
 };
 
@@ -749,6 +834,16 @@ aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
        case ERT_START_NPU:
                ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
                break;
+       case ERT_START_NPU_PREEMPT:
+               if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+                       return -EOPNOTSUPP;
+               ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
+               break;
+       case ERT_START_NPU_PREEMPT_ELF:
+               if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
+                       return -EOPNOTSUPP;
+               ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
+               break;
        default:
                XDNA_INFO(xdna, "Unsupported op %d", op);
                ret = -EOPNOTSUPP;
index 947daa63f0643b48a23606fdda2c1a41929121c8..1c957a6298d39dfbe48635daa3f076aca6107aa4 100644 (file)
@@ -176,6 +176,8 @@ struct exec_dpu_req {
 enum exec_npu_type {
        EXEC_NPU_TYPE_NON_ELF           = 0x1,
        EXEC_NPU_TYPE_PARTIAL_ELF       = 0x2,
+       EXEC_NPU_TYPE_PREEMPT           = 0x3,
+       EXEC_NPU_TYPE_ELF               = 0x4,
 };
 
 union exec_req {
@@ -372,6 +374,7 @@ struct cmd_chain_slot_dpu {
 };
 
 #define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3
 struct cmd_chain_slot_npu {
        enum exec_npu_type type;
        u64 inst_buf_addr;
index d7ccbdaf47f5f30c16c6a97d20eddd4b88063069..ceef1c502e9e2c42d080701609fb211b9ed1cfe4 100644 (file)
@@ -183,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
                if (cfg->category != category)
                        continue;
 
+               if (cfg->feature_mask &&
+                   bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX))
+                       continue;
+
                value = val ? *val : cfg->value;
                ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
                if (ret) {
@@ -932,6 +936,25 @@ static int aie2_get_telemetry(struct amdxdna_client *client,
        return 0;
 }
 
+static int aie2_get_preempt_state(struct amdxdna_client *client,
+                                 struct amdxdna_drm_get_info *args)
+{
+       struct amdxdna_drm_attribute_state state = {};
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_dev_hdl *ndev;
+
+       ndev = xdna->dev_handle;
+       if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE)
+               state.state = ndev->force_preempt_enabled;
+       else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE)
+               state.state = ndev->frame_boundary_preempt;
+
+       if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state)))
+               return -EFAULT;
+
+       return 0;
+}
+
 static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
 {
        struct amdxdna_dev *xdna = client->xdna;
@@ -972,6 +995,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
        case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
                ret = aie2_query_resource_info(client, args);
                break;
+       case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE:
+       case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE:
+               ret = aie2_get_preempt_state(client, args);
+               break;
        default:
                XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
                ret = -EOPNOTSUPP;
@@ -1078,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client,
        return aie2_pm_set_mode(xdna->dev_handle, power_mode);
 }
 
+static int aie2_set_preempt_state(struct amdxdna_client *client,
+                                 struct amdxdna_drm_set_state *args)
+{
+       struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
+       struct amdxdna_drm_attribute_state state;
+       u32 val;
+       int ret;
+
+       if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state)))
+               return -EFAULT;
+
+       if (state.state > 1)
+               return -EINVAL;
+
+       if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad)))
+               return -EINVAL;
+
+       if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) {
+               ndev->force_preempt_enabled = state.state;
+       } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) {
+               val = state.state;
+               ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
+                                      &val);
+               if (ret)
+                       return ret;
+
+               ndev->frame_boundary_preempt = state.state;
+       }
+
+       return 0;
+}
+
 static int aie2_set_state(struct amdxdna_client *client,
                          struct amdxdna_drm_set_state *args)
 {
@@ -1095,6 +1154,10 @@ static int aie2_set_state(struct amdxdna_client *client,
        case DRM_AMDXDNA_SET_POWER_MODE:
                ret = aie2_set_power_mode(client, args);
                break;
+       case DRM_AMDXDNA_SET_FORCE_PREEMPT:
+       case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT:
+               ret = aie2_set_preempt_state(client, args);
+               break;
        default:
                XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
                ret = -EOPNOTSUPP;
index 9793cd1e0c558c73c9239f207d8d56377fc06703..a5f9c42155d178766e6b59bc22d1043d66fb4e92 100644 (file)
@@ -110,12 +110,15 @@ struct aie_metadata {
 enum rt_config_category {
        AIE2_RT_CFG_INIT,
        AIE2_RT_CFG_CLK_GATING,
+       AIE2_RT_CFG_FORCE_PREEMPT,
+       AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
 };
 
 struct rt_config {
        u32     type;
        u32     value;
        u32     category;
+       unsigned long feature_mask;
 };
 
 struct dpm_clk_freq {
@@ -164,6 +167,8 @@ struct aie2_exec_msg_ops {
        void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
        int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+       int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+       int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        u32 (*get_chain_msg_op)(u32 cmd_op);
 };
 
@@ -197,6 +202,8 @@ struct amdxdna_dev_hdl {
        u32                             hclk_freq;
        u32                             max_tops;
        u32                             curr_tops;
+       u32                             force_preempt_enabled;
+       u32                             frame_boundary_preempt;
 
        /* Mailbox and the management channel */
        struct mailbox                  *mbox;
@@ -223,6 +230,7 @@ struct aie2_hw_ops {
 
 enum aie2_fw_feature {
        AIE2_NPU_COMMAND,
+       AIE2_PREEMPT,
        AIE2_FEATURE_MAX
 };
 
index d02fb32499fa0cc2e93dc01b245fe2ad0a8a9eac..b6151244d64fe2642009a60f43f78e8c38cf39aa 100644 (file)
@@ -16,6 +16,8 @@ enum ert_cmd_opcode {
        ERT_START_CU = 0,
        ERT_CMD_CHAIN = 19,
        ERT_START_NPU = 20,
+       ERT_START_NPU_PREEMPT = 21,
+       ERT_START_NPU_PREEMPT_ELF = 22,
        ERT_INVALID_CMD = ~0U,
 };
 
@@ -55,6 +57,21 @@ struct amdxdna_cmd_chain {
        u64 data[] __counted_by(command_count);
 };
 
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_preempt_data {
+       u64 inst_buf;       /* instruction buffer address */
+       u64 save_buf;       /* save buffer address */
+       u64 restore_buf;    /* restore buffer address */
+       u32 inst_size;      /* size of instruction buffer in bytes */
+       u32 save_size;      /* size of save buffer in bytes */
+       u32 restore_size;   /* size of restore buffer in bytes */
+       u32 inst_prop_cnt;  /* properties count */
+       u32 prop_args[];    /* properties and regular kernel arguments */
+};
+
 /* Exec buffer command header format */
 #define AMDXDNA_CMD_STATE              GENMASK(3, 0)
 #define AMDXDNA_CMD_EXTRA_CU_MASK      GENMASK(11, 10)
index 7590265d4485eebb82eed92a2c876cfbd5453fcd..1973ab67721be00a677432ec2e320caa9d70f0e3 100644 (file)
@@ -31,9 +31,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
  * 0.3: Support firmware debug buffer
  * 0.4: Support getting resource information
  * 0.5: Support getting telemetry data
+ * 0.6: Support preemption
  */
 #define AMDXDNA_DRIVER_MAJOR           0
-#define AMDXDNA_DRIVER_MINOR           5
+#define AMDXDNA_DRIVER_MINOR           6
 
 /*
  * Bind the driver base on (vendor_id, device_id) pair and later use the
index d90777275a9f8481fe247380070e18d999827c24..986a5f28ba2452a1137a4ce31e9f09fb986a94ba 100644 (file)
 const struct rt_config npu4_default_rt_cfg[] = {
        { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
        { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
+       { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
        { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
        { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
        { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
        { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
+       { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
        { 0 },
 };
 
@@ -85,6 +88,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
 
 const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
        { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+       { .feature = AIE2_PREEMPT, .min_minor = 12 },
        { 0 }
 };
 
index 8ad254bc35a5b9fd3f625a65df5eac6dfd91f077..62c917fd4f7ba43a6d3523fb91a0c8762d3cb4b6 100644 (file)
@@ -443,7 +443,9 @@ enum amdxdna_drm_get_param {
        DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
        DRM_AMDXDNA_GET_POWER_MODE,
        DRM_AMDXDNA_QUERY_TELEMETRY,
-       DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12,
+       DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE,
+       DRM_AMDXDNA_QUERY_RESOURCE_INFO,
+       DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE,
 };
 
 /**
@@ -462,6 +464,16 @@ struct amdxdna_drm_get_resource_info {
        __u64 npu_task_curr;
 };
 
+/**
+ * struct amdxdna_drm_attribute_state - State of an attribute
+ */
+struct amdxdna_drm_attribute_state {
+       /** @state: enabled or disabled */
+       __u8 state;
+       /** @pad: MBZ */
+       __u8 pad[7];
+};
+
 /**
  * struct amdxdna_drm_query_telemetry_header - Telemetry data header
  */
@@ -613,6 +625,8 @@ enum amdxdna_drm_set_param {
        DRM_AMDXDNA_SET_POWER_MODE,
        DRM_AMDXDNA_WRITE_AIE_MEM,
        DRM_AMDXDNA_WRITE_AIE_REG,
+       DRM_AMDXDNA_SET_FORCE_PREEMPT,
+       DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT,
 };
 
 /**