From: Max Zhen Date: Tue, 21 Apr 2026 18:15:02 +0000 (-0700) Subject: accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8711eb2dde2ed44c98714b875dcf7329950c71ba;p=thirdparty%2Fkernel%2Flinux.git accel/amdxdna: Improve tracing for job lifecycle and mailbox RX worker Add more trace coverage to amdxdna job handling and mailbox receive processing to make driver execution easier to debug. Extend the xdna_job trace event to record the command opcode in addition to the job sequence number. Use the enhanced tracepoint in the job run, sent-to-device, signaled-fence, and job-free paths so that trace output can be correlated with the command being executed. Also add debug-point tracing when a command is received through the submit ioctl path, and add a trace event when the mailbox RX worker runs. These changes improve visibility into job lifetime transitions and mailbox activity, which helps debug command flow and scheduler issues. Signed-off-by: Max Zhen Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260421181502.1970263-1-lizhi.hou@amd.com --- diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index d37123d925b6c..3b0feba448c49 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -64,6 +64,7 @@ static void aie2_job_release(struct kref *ref) struct amdxdna_sched_job *job; job = container_of(ref, struct amdxdna_sched_job, refcnt); + amdxdna_sched_job_cleanup(job); atomic64_inc(&job->hwctx->job_free_cnt); wake_up(&job->hwctx->priv->job_free_wq); @@ -195,7 +196,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job) { struct dma_fence *fence = job->fence; - trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); + trace_xdna_job(&job->base, job->hwctx->name, "signaling fence", + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); aie2_tdr_signal(job->hwctx->client->xdna); job->hwctx->priv->completed++; @@ -366,6 +368,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; + trace_xdna_job(sched_job, hwctx->name, "job run", + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); + if (!hwctx->priv->mbox_chann) return NULL; @@ -409,7 +414,8 @@ out: } else { aie2_tdr_signal(hwctx->client->xdna); } - trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); + trace_xdna_job(sched_job, hwctx->name, "sent to device", + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); return fence; } @@ -419,7 +425,8 @@ static void aie2_sched_job_free(struct drm_sched_job *sched_job) struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); struct amdxdna_hwctx *hwctx = job->hwctx; - trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); + trace_xdna_job(sched_job, hwctx->name, "job free", + job->seq, job->drv_cmd ? job->drv_cmd->opcode : DEFAULT_IO); if (!job->job_done) up(&hwctx->priv->job_sem); @@ -437,7 +444,6 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) int ret; xdna = hwctx->client->xdna; - trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); guard(mutex)(&xdna->dev_lock); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index ff6c3e8e5a15b..2c2c21992c874 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -514,7 +514,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, goto unlock_srcu; } - job->hwctx = hwctx; job->mm = current->mm; @@ -612,6 +611,8 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_ if (args->ext || args->ext_flags) return -EINVAL; + trace_amdxdna_debug_point(current->comm, args->type, "job received"); + switch (args->type) { case AMDXDNA_CMD_SUBMIT_EXEC_BUF: return amdxdna_drm_submit_execbuf(client, args); diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index a8557d7e89238..3557986873766 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -119,6 +119,7 @@ struct amdxdna_hwctx { container_of(j, struct amdxdna_sched_job, base) enum amdxdna_job_opcode { + DEFAULT_IO, SYNC_DEBUG_BO, ATTACH_DEBUG_BO, DETACH_DEBUG_BO, diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 37771bdb24a17..cc8865f4e79c9 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -361,6 +361,7 @@ static void mailbox_rx_worker(struct work_struct *rx_work) int ret; mb_chann = container_of(rx_work, struct mailbox_channel, rx_work); + trace_mbox_rx_worker(MAILBOX_NAME, mb_chann->msix_irq); if (READ_ONCE(mb_chann->bad_state)) { MB_ERR(mb_chann, "Channel in bad state, work aborted"); diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h index c6cb2da7b706c..71da24267e52e 100644 --- a/include/trace/events/amdxdna.h +++ b/include/trace/events/amdxdna.h @@ -30,26 +30,30 @@ TRACE_EVENT(amdxdna_debug_point, ); TRACE_EVENT(xdna_job, - TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq), + TP_PROTO(struct drm_sched_job *sched_job, const char *name, + const char *str, u64 seq, u32 op), - TP_ARGS(sched_job, name, str, seq), + TP_ARGS(sched_job, name, str, seq, op), TP_STRUCT__entry(__string(name, name) __string(str, str) __field(u64, fence_context) __field(u64, fence_seqno) - __field(u64, seq)), + __field(u64, seq) + __field(u32, op)), TP_fast_assign(__assign_str(name); __assign_str(str); __entry->fence_context = sched_job->s_fence->finished.context; __entry->fence_seqno = sched_job->s_fence->finished.seqno; - __entry->seq = seq;), + __entry->seq = seq; + __entry->op = op;), - TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s", + TP_printk("fence=(context:%llu, seqno:%llu), %s seq#:%llu %s, op=%u", __entry->fence_context, __entry->fence_seqno, __get_str(name), __entry->seq, - __get_str(str)) + __get_str(str), + __entry->op) ); DECLARE_EVENT_CLASS(xdna_mbox_msg, @@ -81,18 +85,28 @@ DEFINE_EVENT(xdna_mbox_msg, mbox_set_head, TP_ARGS(name, chann_id, opcode, id) ); -TRACE_EVENT(mbox_irq_handle, - TP_PROTO(char *name, int irq), +DECLARE_EVENT_CLASS(xdna_mbox_name_id, + TP_PROTO(char *name, int irq), - TP_ARGS(name, irq), + TP_ARGS(name, irq), - TP_STRUCT__entry(__string(name, name) - __field(int, irq)), + TP_STRUCT__entry(__string(name, name) + __field(int, irq)), - TP_fast_assign(__assign_str(name); - __entry->irq = irq;), + TP_fast_assign(__assign_str(name); + __entry->irq = irq;), + + TP_printk("%s.%d", __get_str(name), __entry->irq) +); + +DEFINE_EVENT(xdna_mbox_name_id, mbox_irq_handle, + TP_PROTO(char *name, int irq), + TP_ARGS(name, irq) +); - TP_printk("%s.%d", __get_str(name), __entry->irq) +DEFINE_EVENT(xdna_mbox_name_id, mbox_rx_worker, + TP_PROTO(char *name, int irq), + TP_ARGS(name, irq) ); #endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */