ndev->mgmt_chann = NULL;
}
- if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
+ if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
msg->opcode, *hdl->data);
ret = -EINVAL;
goto fail;
}
- if (resp.status != AIE2_STATUS_SUCCESS) {
- XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
- ret = -EINVAL;
- goto fail;
- }
XDNA_DBG(xdna, "Query NPU status completed");
if (size < resp.size) {
return ret;
}
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header)
+{
+ DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
+ struct amdxdna_dev *xdna = ndev->xdna;
+ dma_addr_t dma_addr;
+ u8 *addr;
+ int ret;
+
+ if (header->type >= MAX_TELEMETRY_TYPE)
+ return -EINVAL;
+
+ addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+
+ req.buf_addr = dma_addr;
+ req.buf_size = size;
+ req.type = header->type;
+
+ drm_clflush_virt_range(addr, size); /* device can access */
+ ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
+ goto free_buf;
+ }
+
+ if (size < resp.size) {
+ ret = -EINVAL;
+ XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
+ goto free_buf;
+ }
+
+ if (copy_to_user(buf, addr, resp.size)) {
+ ret = -EFAULT;
+ XDNA_ERR(xdna, "Failed to copy telemetry to user space");
+ goto free_buf;
+ }
+
+ header->major = resp.major;
+ header->minor = resp.minor;
+
+free_buf:
+ dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
+ return ret;
+}
+
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t))
{
enum aie2_msg_opcode {
MSG_OP_CREATE_CONTEXT = 0x2,
MSG_OP_DESTROY_CONTEXT = 0x3,
- MSG_OP_SYNC_BO = 0x7,
+ MSG_OP_GET_TELEMETRY = 0x4,
+ MSG_OP_SYNC_BO = 0x7,
MSG_OP_EXECUTE_BUFFER_CF = 0xC,
MSG_OP_QUERY_COL_STATUS = 0xD,
MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
enum aie2_msg_status status;
} __packed;
+enum telemetry_type {
+ TELEMETRY_TYPE_DISABLED,
+ TELEMETRY_TYPE_HEALTH,
+ TELEMETRY_TYPE_ERROR_INFO,
+ TELEMETRY_TYPE_PROFILING,
+ TELEMETRY_TYPE_DEBUG,
+ MAX_TELEMETRY_TYPE
+};
+
+struct get_telemetry_req {
+ enum telemetry_type type;
+ __u64 buf_addr;
+ __u32 buf_size;
+} __packed;
+
+struct get_telemetry_resp {
+ __u32 major;
+ __u32 minor;
+ __u32 size;
+ enum aie2_msg_status status;
+} __packed;
+
struct execute_buffer_req {
__u32 cu_idx;
__u32 payload[19];
return 0;
}
+static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ u32 *map = arg;
+
+ if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) {
+ XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id,
+ xdna->dev_handle->priv->hwctx_limit);
+ return -EINVAL;
+ }
+
+ map[hwctx->fw_ctx_id] = hwctx->id;
+ return 0;
+}
+
+static int aie2_get_telemetry(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL;
+ u32 telemetry_data_sz, header_sz, elem_num;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_client *tmp_client;
+ int ret;
+
+ elem_num = xdna->dev_handle->priv->hwctx_limit;
+ header_sz = struct_size(header, map, elem_num);
+ if (args->buffer_size <= header_sz) {
+ XDNA_ERR(xdna, "Invalid buffer size");
+ return -EINVAL;
+ }
+
+ telemetry_data_sz = args->buffer_size - header_sz;
+ if (telemetry_data_sz > SZ_4M) {
+ XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz);
+ return -EINVAL;
+ }
+
+ header = kzalloc(header_sz, GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+
+ if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) {
+ XDNA_ERR(xdna, "Failed to copy telemetry header from user");
+ return -EFAULT;
+ }
+
+ header->map_num_elements = elem_num;
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ ret = amdxdna_hwctx_walk(tmp_client, &header->map,
+ aie2_fill_hwctx_map);
+ if (ret)
+ return ret;
+ }
+
+ ret = aie2_query_telemetry(xdna->dev_handle,
+ u64_to_user_ptr(args->buffer + header_sz),
+ telemetry_data_sz, header);
+ if (ret) {
+ XDNA_ERR(xdna, "Query telemetry failed ret %d", ret);
+ return ret;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) {
+ XDNA_ERR(xdna, "Copy header failed");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
{
struct amdxdna_dev *xdna = client->xdna;
case DRM_AMDXDNA_GET_POWER_MODE:
ret = aie2_get_power_mode(client, args);
break;
+ case DRM_AMDXDNA_QUERY_TELEMETRY:
+ ret = aie2_get_telemetry(client, args);
+ break;
case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
ret = aie2_query_resource_info(client, args);
break;
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
+int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
+ char __user *buf, u32 size,
+ struct amdxdna_drm_query_telemetry_header *header);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t));
int aie2_config_cu(struct amdxdna_hwctx *hwctx,
u32 *data;
size_t size;
int error;
+ u32 *status;
};
-#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
+#define DECLARE_XDNA_MSG_COMMON(name, op, s) \
struct name##_req req = { 0 }; \
- struct name##_resp resp = { status }; \
+ struct name##_resp resp = { .status = s }; \
struct xdna_notify hdl = { \
.error = 0, \
.data = (u32 *)&resp, \
.size = sizeof(resp), \
.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
+ .status = (u32 *)&resp.status, \
}; \
struct xdna_mailbox_msg msg = { \
.send_data = (u8 *)&req, \
* 0.2: Support getting last error hardware error
* 0.3: Support firmware debug buffer
* 0.4: Support getting resource information
+ * 0.5: Support getting telemetry data
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 4
+#define AMDXDNA_DRIVER_MINOR 5
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
DRM_AMDXDNA_QUERY_HW_CONTEXTS,
DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
DRM_AMDXDNA_GET_POWER_MODE,
+ DRM_AMDXDNA_QUERY_TELEMETRY,
DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12,
};
__u64 npu_task_curr;
};
+/**
+ * struct amdxdna_drm_query_telemetry_header - Telemetry data header
+ */
+struct amdxdna_drm_query_telemetry_header {
+ /** @major: Firmware telemetry interface major version number */
+ __u32 major;
+ /** @minor: Firmware telemetry interface minor version number */
+ __u32 minor;
+ /** @type: Telemetry query type */
+ __u32 type;
+ /** @map_num_elements: Total number of elements in the map table */
+ __u32 map_num_elements;
+ /** @map: Element map */
+ __u32 map[];
+};
+
/**
* struct amdxdna_drm_get_info - Get some information from the AIE hardware.
* @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed in the buffer.