1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
13 #include <linux/pci.h>
14 #include <linux/genalloc.h>
15 #include <linux/hwmon.h>
16 #include <linux/io-64-nonatomic-lo-hi.h>
17 #include <linux/iommu.h>
18 #include <linux/seq_file.h>
21 * GOYA security scheme:
23 * 1. Host is protected by:
24 * - Range registers (When MMU is enabled, DMA RR does NOT protect host)
27 * 2. DRAM is protected by:
28 * - Range registers (protect the first 512MB)
29 * - MMU (isolation between users)
31 * 3. Configuration is protected by:
35 * When MMU is disabled:
37 * QMAN DMA: PQ, CQ, CP, DMA are secured.
38 * PQ, CB and the data are on the host.
41 * PQ, CQ and CP are not secured.
42 * PQ, CB and the data are on the SRAM/DRAM.
44 * Since QMAN DMA is secured, KMD is parsing the DMA CB:
45 * - KMD checks DMA pointer
46 * - WREG, MSG_PROT are not allowed.
47 * - MSG_LONG/SHORT are allowed.
49 * A read/write transaction by the QMAN to a protected area will succeed if
50 * and only if the QMAN's CP is secured and MSG_PROT is used
53 * When MMU is enabled:
55 * QMAN DMA: PQ, CQ and CP are secured.
56 * MMU is set to bypass on the Secure props register of the QMAN.
57 * The reasons we don't enable MMU for PQ, CQ and CP are:
58 * - PQ entry is in kernel address space and KMD doesn't map it.
59 * - CP writes to MSIX register and to kernel address space (completion
62 * DMA is not secured but because CP is secured, KMD still needs to parse the
63 * CB, but doesn't need to check the DMA addresses.
65 * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
66 * doesn't map memory in MMU.
68 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
70 * DMA RR does NOT protect host because DMA is not secured
74 #define GOYA_MMU_REGS_NUM 63
76 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
78 #define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
79 #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
80 #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
81 #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
82 #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
83 #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
84 #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
85 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
87 #define GOYA_QMAN0_FENCE_VAL 0xD169B243
89 #define GOYA_MAX_STRING_LEN 20
91 #define GOYA_CB_POOL_CB_CNT 512
92 #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
94 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
95 (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
96 #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
97 #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
98 #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
100 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
101 (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
102 engine##_CMDQ_IDLE_MASK)
103 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
104 IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
105 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
106 IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
108 #define IS_DMA_IDLE(dma_core_sts0) \
109 !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
111 #define IS_TPC_IDLE(tpc_cfg_sts) \
112 (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
114 #define IS_MME_IDLE(mme_arch_sts) \
115 (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
118 static const char goya_irq_name
[GOYA_MSIX_ENTRIES
][GOYA_MAX_STRING_LEN
] = {
119 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
120 "goya cq 4", "goya cpu eq"
123 static u16 goya_packet_sizes
[MAX_PACKET_ID
] = {
124 [PACKET_WREG_32
] = sizeof(struct packet_wreg32
),
125 [PACKET_WREG_BULK
] = sizeof(struct packet_wreg_bulk
),
126 [PACKET_MSG_LONG
] = sizeof(struct packet_msg_long
),
127 [PACKET_MSG_SHORT
] = sizeof(struct packet_msg_short
),
128 [PACKET_CP_DMA
] = sizeof(struct packet_cp_dma
),
129 [PACKET_MSG_PROT
] = sizeof(struct packet_msg_prot
),
130 [PACKET_FENCE
] = sizeof(struct packet_fence
),
131 [PACKET_LIN_DMA
] = sizeof(struct packet_lin_dma
),
132 [PACKET_NOP
] = sizeof(struct packet_nop
),
133 [PACKET_STOP
] = sizeof(struct packet_stop
)
136 static u64 goya_mmu_regs
[GOYA_MMU_REGS_NUM
] = {
137 mmDMA_QM_0_GLBL_NON_SECURE_PROPS
,
138 mmDMA_QM_1_GLBL_NON_SECURE_PROPS
,
139 mmDMA_QM_2_GLBL_NON_SECURE_PROPS
,
140 mmDMA_QM_3_GLBL_NON_SECURE_PROPS
,
141 mmDMA_QM_4_GLBL_NON_SECURE_PROPS
,
142 mmTPC0_QM_GLBL_SECURE_PROPS
,
143 mmTPC0_QM_GLBL_NON_SECURE_PROPS
,
144 mmTPC0_CMDQ_GLBL_SECURE_PROPS
,
145 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS
,
148 mmTPC1_QM_GLBL_SECURE_PROPS
,
149 mmTPC1_QM_GLBL_NON_SECURE_PROPS
,
150 mmTPC1_CMDQ_GLBL_SECURE_PROPS
,
151 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS
,
154 mmTPC2_QM_GLBL_SECURE_PROPS
,
155 mmTPC2_QM_GLBL_NON_SECURE_PROPS
,
156 mmTPC2_CMDQ_GLBL_SECURE_PROPS
,
157 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS
,
160 mmTPC3_QM_GLBL_SECURE_PROPS
,
161 mmTPC3_QM_GLBL_NON_SECURE_PROPS
,
162 mmTPC3_CMDQ_GLBL_SECURE_PROPS
,
163 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS
,
166 mmTPC4_QM_GLBL_SECURE_PROPS
,
167 mmTPC4_QM_GLBL_NON_SECURE_PROPS
,
168 mmTPC4_CMDQ_GLBL_SECURE_PROPS
,
169 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS
,
172 mmTPC5_QM_GLBL_SECURE_PROPS
,
173 mmTPC5_QM_GLBL_NON_SECURE_PROPS
,
174 mmTPC5_CMDQ_GLBL_SECURE_PROPS
,
175 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS
,
178 mmTPC6_QM_GLBL_SECURE_PROPS
,
179 mmTPC6_QM_GLBL_NON_SECURE_PROPS
,
180 mmTPC6_CMDQ_GLBL_SECURE_PROPS
,
181 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS
,
184 mmTPC7_QM_GLBL_SECURE_PROPS
,
185 mmTPC7_QM_GLBL_NON_SECURE_PROPS
,
186 mmTPC7_CMDQ_GLBL_SECURE_PROPS
,
187 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS
,
190 mmMME_QM_GLBL_SECURE_PROPS
,
191 mmMME_QM_GLBL_NON_SECURE_PROPS
,
192 mmMME_CMDQ_GLBL_SECURE_PROPS
,
193 mmMME_CMDQ_GLBL_NON_SECURE_PROPS
,
194 mmMME_SBA_CONTROL_DATA
,
195 mmMME_SBB_CONTROL_DATA
,
196 mmMME_SBC_CONTROL_DATA
,
197 mmMME_WBC_CONTROL_DATA
,
198 mmPCIE_WRAP_PSOC_ARUSER
,
199 mmPCIE_WRAP_PSOC_AWUSER
202 static u32 goya_all_events
[] = {
203 GOYA_ASYNC_EVENT_ID_PCIE_IF
,
204 GOYA_ASYNC_EVENT_ID_TPC0_ECC
,
205 GOYA_ASYNC_EVENT_ID_TPC1_ECC
,
206 GOYA_ASYNC_EVENT_ID_TPC2_ECC
,
207 GOYA_ASYNC_EVENT_ID_TPC3_ECC
,
208 GOYA_ASYNC_EVENT_ID_TPC4_ECC
,
209 GOYA_ASYNC_EVENT_ID_TPC5_ECC
,
210 GOYA_ASYNC_EVENT_ID_TPC6_ECC
,
211 GOYA_ASYNC_EVENT_ID_TPC7_ECC
,
212 GOYA_ASYNC_EVENT_ID_MME_ECC
,
213 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
,
214 GOYA_ASYNC_EVENT_ID_MMU_ECC
,
215 GOYA_ASYNC_EVENT_ID_DMA_MACRO
,
216 GOYA_ASYNC_EVENT_ID_DMA_ECC
,
217 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
,
218 GOYA_ASYNC_EVENT_ID_PSOC_MEM
,
219 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
,
220 GOYA_ASYNC_EVENT_ID_SRAM0
,
221 GOYA_ASYNC_EVENT_ID_SRAM1
,
222 GOYA_ASYNC_EVENT_ID_SRAM2
,
223 GOYA_ASYNC_EVENT_ID_SRAM3
,
224 GOYA_ASYNC_EVENT_ID_SRAM4
,
225 GOYA_ASYNC_EVENT_ID_SRAM5
,
226 GOYA_ASYNC_EVENT_ID_SRAM6
,
227 GOYA_ASYNC_EVENT_ID_SRAM7
,
228 GOYA_ASYNC_EVENT_ID_SRAM8
,
229 GOYA_ASYNC_EVENT_ID_SRAM9
,
230 GOYA_ASYNC_EVENT_ID_SRAM10
,
231 GOYA_ASYNC_EVENT_ID_SRAM11
,
232 GOYA_ASYNC_EVENT_ID_SRAM12
,
233 GOYA_ASYNC_EVENT_ID_SRAM13
,
234 GOYA_ASYNC_EVENT_ID_SRAM14
,
235 GOYA_ASYNC_EVENT_ID_SRAM15
,
236 GOYA_ASYNC_EVENT_ID_SRAM16
,
237 GOYA_ASYNC_EVENT_ID_SRAM17
,
238 GOYA_ASYNC_EVENT_ID_SRAM18
,
239 GOYA_ASYNC_EVENT_ID_SRAM19
,
240 GOYA_ASYNC_EVENT_ID_SRAM20
,
241 GOYA_ASYNC_EVENT_ID_SRAM21
,
242 GOYA_ASYNC_EVENT_ID_SRAM22
,
243 GOYA_ASYNC_EVENT_ID_SRAM23
,
244 GOYA_ASYNC_EVENT_ID_SRAM24
,
245 GOYA_ASYNC_EVENT_ID_SRAM25
,
246 GOYA_ASYNC_EVENT_ID_SRAM26
,
247 GOYA_ASYNC_EVENT_ID_SRAM27
,
248 GOYA_ASYNC_EVENT_ID_SRAM28
,
249 GOYA_ASYNC_EVENT_ID_SRAM29
,
250 GOYA_ASYNC_EVENT_ID_GIC500
,
251 GOYA_ASYNC_EVENT_ID_PLL0
,
252 GOYA_ASYNC_EVENT_ID_PLL1
,
253 GOYA_ASYNC_EVENT_ID_PLL3
,
254 GOYA_ASYNC_EVENT_ID_PLL4
,
255 GOYA_ASYNC_EVENT_ID_PLL5
,
256 GOYA_ASYNC_EVENT_ID_PLL6
,
257 GOYA_ASYNC_EVENT_ID_AXI_ECC
,
258 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
,
259 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
,
260 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
,
261 GOYA_ASYNC_EVENT_ID_PCIE_DEC
,
262 GOYA_ASYNC_EVENT_ID_TPC0_DEC
,
263 GOYA_ASYNC_EVENT_ID_TPC1_DEC
,
264 GOYA_ASYNC_EVENT_ID_TPC2_DEC
,
265 GOYA_ASYNC_EVENT_ID_TPC3_DEC
,
266 GOYA_ASYNC_EVENT_ID_TPC4_DEC
,
267 GOYA_ASYNC_EVENT_ID_TPC5_DEC
,
268 GOYA_ASYNC_EVENT_ID_TPC6_DEC
,
269 GOYA_ASYNC_EVENT_ID_TPC7_DEC
,
270 GOYA_ASYNC_EVENT_ID_MME_WACS
,
271 GOYA_ASYNC_EVENT_ID_MME_WACSD
,
272 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
,
273 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
,
274 GOYA_ASYNC_EVENT_ID_PSOC
,
275 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
,
276 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
,
277 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
,
278 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
,
279 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
,
280 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
,
281 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
,
282 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
,
283 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
,
284 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ
,
285 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ
,
286 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ
,
287 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ
,
288 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ
,
289 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ
,
290 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
,
291 GOYA_ASYNC_EVENT_ID_TPC0_QM
,
292 GOYA_ASYNC_EVENT_ID_TPC1_QM
,
293 GOYA_ASYNC_EVENT_ID_TPC2_QM
,
294 GOYA_ASYNC_EVENT_ID_TPC3_QM
,
295 GOYA_ASYNC_EVENT_ID_TPC4_QM
,
296 GOYA_ASYNC_EVENT_ID_TPC5_QM
,
297 GOYA_ASYNC_EVENT_ID_TPC6_QM
,
298 GOYA_ASYNC_EVENT_ID_TPC7_QM
,
299 GOYA_ASYNC_EVENT_ID_MME_QM
,
300 GOYA_ASYNC_EVENT_ID_MME_CMDQ
,
301 GOYA_ASYNC_EVENT_ID_DMA0_QM
,
302 GOYA_ASYNC_EVENT_ID_DMA1_QM
,
303 GOYA_ASYNC_EVENT_ID_DMA2_QM
,
304 GOYA_ASYNC_EVENT_ID_DMA3_QM
,
305 GOYA_ASYNC_EVENT_ID_DMA4_QM
,
306 GOYA_ASYNC_EVENT_ID_DMA0_CH
,
307 GOYA_ASYNC_EVENT_ID_DMA1_CH
,
308 GOYA_ASYNC_EVENT_ID_DMA2_CH
,
309 GOYA_ASYNC_EVENT_ID_DMA3_CH
,
310 GOYA_ASYNC_EVENT_ID_DMA4_CH
,
311 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
,
312 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
,
313 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
,
314 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
,
315 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
,
316 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
,
317 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
,
318 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
,
319 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
,
320 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1
,
321 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2
,
322 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3
,
323 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
326 static int goya_mmu_clear_pgt_range(struct hl_device
*hdev
);
327 static int goya_mmu_set_dram_default_page(struct hl_device
*hdev
);
328 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device
*hdev
);
329 static void goya_mmu_prepare(struct hl_device
*hdev
, u32 asid
);
331 void goya_get_fixed_properties(struct hl_device
*hdev
)
333 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
336 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++) {
337 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_EXT
;
338 prop
->hw_queues_props
[i
].kmd_only
= 0;
341 for (; i
< NUMBER_OF_EXT_HW_QUEUES
+ NUMBER_OF_CPU_HW_QUEUES
; i
++) {
342 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_CPU
;
343 prop
->hw_queues_props
[i
].kmd_only
= 1;
346 for (; i
< NUMBER_OF_EXT_HW_QUEUES
+ NUMBER_OF_CPU_HW_QUEUES
+
347 NUMBER_OF_INT_HW_QUEUES
; i
++) {
348 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_INT
;
349 prop
->hw_queues_props
[i
].kmd_only
= 0;
352 for (; i
< HL_MAX_QUEUES
; i
++)
353 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_NA
;
355 prop
->completion_queues_count
= NUMBER_OF_CMPLT_QUEUES
;
357 prop
->dram_base_address
= DRAM_PHYS_BASE
;
358 prop
->dram_size
= DRAM_PHYS_DEFAULT_SIZE
;
359 prop
->dram_end_address
= prop
->dram_base_address
+ prop
->dram_size
;
360 prop
->dram_user_base_address
= DRAM_BASE_ADDR_USER
;
362 prop
->sram_base_address
= SRAM_BASE_ADDR
;
363 prop
->sram_size
= SRAM_SIZE
;
364 prop
->sram_end_address
= prop
->sram_base_address
+ prop
->sram_size
;
365 prop
->sram_user_base_address
= prop
->sram_base_address
+
366 SRAM_USER_BASE_OFFSET
;
368 prop
->mmu_pgt_addr
= MMU_PAGE_TABLES_ADDR
;
369 prop
->mmu_dram_default_page_addr
= MMU_DRAM_DEFAULT_PAGE_ADDR
;
371 prop
->mmu_pgt_size
= 0x800000; /* 8MB */
373 prop
->mmu_pgt_size
= MMU_PAGE_TABLES_SIZE
;
374 prop
->mmu_pte_size
= HL_PTE_SIZE
;
375 prop
->mmu_hop_table_size
= HOP_TABLE_SIZE
;
376 prop
->mmu_hop0_tables_total_size
= HOP0_TABLES_TOTAL_SIZE
;
377 prop
->dram_page_size
= PAGE_SIZE_2MB
;
379 prop
->va_space_host_start_address
= VA_HOST_SPACE_START
;
380 prop
->va_space_host_end_address
= VA_HOST_SPACE_END
;
381 prop
->va_space_dram_start_address
= VA_DDR_SPACE_START
;
382 prop
->va_space_dram_end_address
= VA_DDR_SPACE_END
;
383 prop
->dram_size_for_default_page_mapping
=
384 prop
->va_space_dram_end_address
;
385 prop
->cfg_size
= CFG_SIZE
;
386 prop
->max_asid
= MAX_ASID
;
387 prop
->num_of_events
= GOYA_ASYNC_EVENT_ID_SIZE
;
388 prop
->high_pll
= PLL_HIGH_DEFAULT
;
389 prop
->cb_pool_cb_cnt
= GOYA_CB_POOL_CB_CNT
;
390 prop
->cb_pool_cb_size
= GOYA_CB_POOL_CB_SIZE
;
391 prop
->max_power_default
= MAX_POWER_DEFAULT
;
392 prop
->tpc_enabled_mask
= TPC_ENABLED_MASK
;
393 prop
->pcie_dbi_base_address
= mmPCIE_DBI_BASE
;
394 prop
->pcie_aux_dbi_reg_addr
= CFG_BASE
+ mmPCIE_AUX_DBI
;
398 * goya_pci_bars_map - Map PCI BARS of Goya device
400 * @hdev: pointer to hl_device structure
402 * Request PCI regions and map them to kernel virtual addresses.
403 * Returns 0 on success
406 static int goya_pci_bars_map(struct hl_device
*hdev
)
408 static const char * const name
[] = {"SRAM_CFG", "MSIX", "DDR"};
409 bool is_wc
[3] = {false, false, true};
412 rc
= hl_pci_bars_map(hdev
, name
, is_wc
);
416 hdev
->rmmio
= hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
417 (CFG_BASE
- SRAM_BASE_ADDR
);
422 static u64
goya_set_ddr_bar_base(struct hl_device
*hdev
, u64 addr
)
424 struct goya_device
*goya
= hdev
->asic_specific
;
428 if ((goya
) && (goya
->ddr_bar_cur_addr
== addr
))
431 /* Inbound Region 1 - Bar 4 - Point to DDR */
432 rc
= hl_pci_set_dram_bar_base(hdev
, 1, 4, addr
);
437 old_addr
= goya
->ddr_bar_cur_addr
;
438 goya
->ddr_bar_cur_addr
= addr
;
445 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
447 * @hdev: pointer to hl_device structure
449 * This is needed in case the firmware doesn't initialize the iATU
452 static int goya_init_iatu(struct hl_device
*hdev
)
454 return hl_pci_init_iatu(hdev
, SRAM_BASE_ADDR
, DRAM_PHYS_BASE
,
455 HOST_PHYS_BASE
, HOST_PHYS_SIZE
);
459 * goya_early_init - GOYA early initialization code
461 * @hdev: pointer to hl_device structure
465 * PCI controller initialization
469 static int goya_early_init(struct hl_device
*hdev
)
471 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
472 struct pci_dev
*pdev
= hdev
->pdev
;
476 goya_get_fixed_properties(hdev
);
478 /* Check BAR sizes */
479 if (pci_resource_len(pdev
, SRAM_CFG_BAR_ID
) != CFG_BAR_SIZE
) {
481 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
483 (unsigned long long) pci_resource_len(pdev
,
489 if (pci_resource_len(pdev
, MSIX_BAR_ID
) != MSIX_BAR_SIZE
) {
491 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
493 (unsigned long long) pci_resource_len(pdev
,
499 prop
->dram_pci_bar_size
= pci_resource_len(pdev
, DDR_BAR_ID
);
501 rc
= hl_pci_init(hdev
, 48);
506 val
= RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS
);
507 if (val
& PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK
)
509 "PCI strap is not configured correctly, PCI bus errors may occur\n");
516 * goya_early_fini - GOYA early finalization code
518 * @hdev: pointer to hl_device structure
523 static int goya_early_fini(struct hl_device
*hdev
)
530 static void goya_mmu_prepare_reg(struct hl_device
*hdev
, u64 reg
, u32 asid
)
532 /* mask to zero the MMBP and ASID bits */
533 WREG32_AND(reg
, ~0x7FF);
534 WREG32_OR(reg
, asid
);
537 static void goya_qman0_set_security(struct hl_device
*hdev
, bool secure
)
539 struct goya_device
*goya
= hdev
->asic_specific
;
541 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
545 WREG32(mmDMA_QM_0_GLBL_PROT
, QMAN_DMA_FULLY_TRUSTED
);
547 WREG32(mmDMA_QM_0_GLBL_PROT
, QMAN_DMA_PARTLY_TRUSTED
);
549 RREG32(mmDMA_QM_0_GLBL_PROT
);
553 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
555 * @hdev: pointer to hl_device structure
558 static void goya_fetch_psoc_frequency(struct hl_device
*hdev
)
560 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
562 prop
->psoc_pci_pll_nr
= RREG32(mmPSOC_PCI_PLL_NR
);
563 prop
->psoc_pci_pll_nf
= RREG32(mmPSOC_PCI_PLL_NF
);
564 prop
->psoc_pci_pll_od
= RREG32(mmPSOC_PCI_PLL_OD
);
565 prop
->psoc_pci_pll_div_factor
= RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1
);
568 int goya_late_init(struct hl_device
*hdev
)
570 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
573 goya_fetch_psoc_frequency(hdev
);
575 rc
= goya_mmu_clear_pgt_range(hdev
);
578 "Failed to clear MMU page tables range %d\n", rc
);
582 rc
= goya_mmu_set_dram_default_page(hdev
);
584 dev_err(hdev
->dev
, "Failed to set DRAM default page %d\n", rc
);
588 rc
= goya_mmu_add_mappings_for_device_cpu(hdev
);
592 rc
= goya_init_cpu_queues(hdev
);
596 rc
= goya_test_cpu_queue(hdev
);
600 rc
= goya_armcp_info_get(hdev
);
602 dev_err(hdev
->dev
, "Failed to get armcp info %d\n", rc
);
606 /* Now that we have the DRAM size in ASIC prop, we need to check
607 * its size and configure the DMA_IF DDR wrap protection (which is in
608 * the MMU block) accordingly. The value is the log2 of the DRAM size
610 WREG32(mmMMU_LOG2_DDR_SIZE
, ilog2(prop
->dram_size
));
612 rc
= hl_fw_send_pci_access_msg(hdev
, ARMCP_PACKET_ENABLE_PCI_ACCESS
);
615 "Failed to enable PCI access from CPU %d\n", rc
);
619 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
620 GOYA_ASYNC_EVENT_ID_INTS_REGISTER
);
626 * goya_late_fini - GOYA late tear-down code
628 * @hdev: pointer to hl_device structure
630 * Free sensors allocated structures
632 void goya_late_fini(struct hl_device
*hdev
)
634 const struct hwmon_channel_info
**channel_info_arr
;
637 if (!hdev
->hl_chip_info
->info
)
640 channel_info_arr
= hdev
->hl_chip_info
->info
;
642 while (channel_info_arr
[i
]) {
643 kfree(channel_info_arr
[i
]->config
);
644 kfree(channel_info_arr
[i
]);
648 kfree(channel_info_arr
);
650 hdev
->hl_chip_info
->info
= NULL
;
654 * goya_sw_init - Goya software initialization code
656 * @hdev: pointer to hl_device structure
659 static int goya_sw_init(struct hl_device
*hdev
)
661 struct goya_device
*goya
;
664 /* Allocate device structure */
665 goya
= kzalloc(sizeof(*goya
), GFP_KERNEL
);
669 /* according to goya_init_iatu */
670 goya
->ddr_bar_cur_addr
= DRAM_PHYS_BASE
;
672 goya
->mme_clk
= GOYA_PLL_FREQ_LOW
;
673 goya
->tpc_clk
= GOYA_PLL_FREQ_LOW
;
674 goya
->ic_clk
= GOYA_PLL_FREQ_LOW
;
676 hdev
->asic_specific
= goya
;
678 /* Create DMA pool for small allocations */
679 hdev
->dma_pool
= dma_pool_create(dev_name(hdev
->dev
),
680 &hdev
->pdev
->dev
, GOYA_DMA_POOL_BLK_SIZE
, 8, 0);
681 if (!hdev
->dma_pool
) {
682 dev_err(hdev
->dev
, "failed to create DMA pool\n");
684 goto free_goya_device
;
687 hdev
->cpu_accessible_dma_mem
=
688 hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
689 HL_CPU_ACCESSIBLE_MEM_SIZE
,
690 &hdev
->cpu_accessible_dma_address
,
691 GFP_KERNEL
| __GFP_ZERO
);
693 if (!hdev
->cpu_accessible_dma_mem
) {
698 dev_dbg(hdev
->dev
, "cpu accessible memory at bus address 0x%llx\n",
699 hdev
->cpu_accessible_dma_address
);
701 hdev
->cpu_accessible_dma_pool
= gen_pool_create(ilog2(32), -1);
702 if (!hdev
->cpu_accessible_dma_pool
) {
704 "Failed to create CPU accessible DMA pool\n");
706 goto free_cpu_dma_mem
;
709 rc
= gen_pool_add(hdev
->cpu_accessible_dma_pool
,
710 (uintptr_t) hdev
->cpu_accessible_dma_mem
,
711 HL_CPU_ACCESSIBLE_MEM_SIZE
, -1);
714 "Failed to add memory to CPU accessible DMA pool\n");
716 goto free_cpu_accessible_dma_pool
;
719 spin_lock_init(&goya
->hw_queues_lock
);
723 free_cpu_accessible_dma_pool
:
724 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
726 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
727 HL_CPU_ACCESSIBLE_MEM_SIZE
,
728 hdev
->cpu_accessible_dma_mem
,
729 hdev
->cpu_accessible_dma_address
);
731 dma_pool_destroy(hdev
->dma_pool
);
739 * goya_sw_fini - Goya software tear-down code
741 * @hdev: pointer to hl_device structure
744 static int goya_sw_fini(struct hl_device
*hdev
)
746 struct goya_device
*goya
= hdev
->asic_specific
;
748 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
750 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
751 HL_CPU_ACCESSIBLE_MEM_SIZE
,
752 hdev
->cpu_accessible_dma_mem
,
753 hdev
->cpu_accessible_dma_address
);
755 dma_pool_destroy(hdev
->dma_pool
);
762 static void goya_init_dma_qman(struct hl_device
*hdev
, int dma_id
,
763 dma_addr_t bus_address
)
765 struct goya_device
*goya
= hdev
->asic_specific
;
766 u32 mtr_base_lo
, mtr_base_hi
;
767 u32 so_base_lo
, so_base_hi
;
768 u32 gic_base_lo
, gic_base_hi
;
769 u32 reg_off
= dma_id
* (mmDMA_QM_1_PQ_PI
- mmDMA_QM_0_PQ_PI
);
771 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
772 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
773 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
774 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
777 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
779 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
781 WREG32(mmDMA_QM_0_PQ_BASE_LO
+ reg_off
, lower_32_bits(bus_address
));
782 WREG32(mmDMA_QM_0_PQ_BASE_HI
+ reg_off
, upper_32_bits(bus_address
));
784 WREG32(mmDMA_QM_0_PQ_SIZE
+ reg_off
, ilog2(HL_QUEUE_LENGTH
));
785 WREG32(mmDMA_QM_0_PQ_PI
+ reg_off
, 0);
786 WREG32(mmDMA_QM_0_PQ_CI
+ reg_off
, 0);
788 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
789 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
790 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
791 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
792 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
793 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
794 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA
+ reg_off
,
795 GOYA_ASYNC_EVENT_ID_DMA0_QM
+ dma_id
);
797 /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
798 WREG32(mmDMA_QM_0_PQ_CFG1
+ reg_off
, 0x00020002);
799 WREG32(mmDMA_QM_0_CQ_CFG1
+ reg_off
, 0x00080008);
801 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
802 WREG32(mmDMA_QM_0_GLBL_PROT
+ reg_off
, QMAN_DMA_PARTLY_TRUSTED
);
804 WREG32(mmDMA_QM_0_GLBL_PROT
+ reg_off
, QMAN_DMA_FULLY_TRUSTED
);
806 WREG32(mmDMA_QM_0_GLBL_ERR_CFG
+ reg_off
, QMAN_DMA_ERR_MSG_EN
);
807 WREG32(mmDMA_QM_0_GLBL_CFG0
+ reg_off
, QMAN_DMA_ENABLE
);
810 static void goya_init_dma_ch(struct hl_device
*hdev
, int dma_id
)
812 u32 gic_base_lo
, gic_base_hi
;
814 u32 reg_off
= dma_id
* (mmDMA_CH_1_CFG1
- mmDMA_CH_0_CFG1
);
817 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
819 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
821 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO
+ reg_off
, gic_base_lo
);
822 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI
+ reg_off
, gic_base_hi
);
823 WREG32(mmDMA_CH_0_ERRMSG_WDATA
+ reg_off
,
824 GOYA_ASYNC_EVENT_ID_DMA0_CH
+ dma_id
);
827 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1000
+
830 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1007
;
832 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI
+ reg_off
, upper_32_bits(sob_addr
));
833 WREG32(mmDMA_CH_0_WR_COMP_WDATA
+ reg_off
, 0x80000001);
837 * goya_init_dma_qmans - Initialize QMAN DMA registers
839 * @hdev: pointer to hl_device structure
841 * Initialize the H/W registers of the QMAN DMA channels
844 void goya_init_dma_qmans(struct hl_device
*hdev
)
846 struct goya_device
*goya
= hdev
->asic_specific
;
847 struct hl_hw_queue
*q
;
850 if (goya
->hw_cap_initialized
& HW_CAP_DMA
)
853 q
= &hdev
->kernel_queues
[0];
855 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++, q
++) {
856 goya_init_dma_qman(hdev
, i
, q
->bus_address
);
857 goya_init_dma_ch(hdev
, i
);
860 goya
->hw_cap_initialized
|= HW_CAP_DMA
;
864 * goya_disable_external_queues - Disable external queues
866 * @hdev: pointer to hl_device structure
869 static void goya_disable_external_queues(struct hl_device
*hdev
)
871 WREG32(mmDMA_QM_0_GLBL_CFG0
, 0);
872 WREG32(mmDMA_QM_1_GLBL_CFG0
, 0);
873 WREG32(mmDMA_QM_2_GLBL_CFG0
, 0);
874 WREG32(mmDMA_QM_3_GLBL_CFG0
, 0);
875 WREG32(mmDMA_QM_4_GLBL_CFG0
, 0);
878 static int goya_stop_queue(struct hl_device
*hdev
, u32 cfg_reg
,
879 u32 cp_sts_reg
, u32 glbl_sts0_reg
)
884 /* use the values of TPC0 as they are all the same*/
886 WREG32(cfg_reg
, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
888 status
= RREG32(cp_sts_reg
);
889 if (status
& TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK
) {
890 rc
= hl_poll_timeout(
894 !(status
& TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK
),
896 QMAN_FENCE_TIMEOUT_USEC
);
898 /* if QMAN is stuck in fence no need to check for stop */
903 rc
= hl_poll_timeout(
907 (status
& TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK
),
909 QMAN_STOP_TIMEOUT_USEC
);
913 "Timeout while waiting for QMAN to stop\n");
921 * goya_stop_external_queues - Stop external queues
923 * @hdev: pointer to hl_device structure
925 * Returns 0 on success
928 static int goya_stop_external_queues(struct hl_device
*hdev
)
932 rc
= goya_stop_queue(hdev
,
933 mmDMA_QM_0_GLBL_CFG1
,
935 mmDMA_QM_0_GLBL_STS0
);
938 dev_err(hdev
->dev
, "failed to stop DMA QMAN 0\n");
942 rc
= goya_stop_queue(hdev
,
943 mmDMA_QM_1_GLBL_CFG1
,
945 mmDMA_QM_1_GLBL_STS0
);
948 dev_err(hdev
->dev
, "failed to stop DMA QMAN 1\n");
952 rc
= goya_stop_queue(hdev
,
953 mmDMA_QM_2_GLBL_CFG1
,
955 mmDMA_QM_2_GLBL_STS0
);
958 dev_err(hdev
->dev
, "failed to stop DMA QMAN 2\n");
962 rc
= goya_stop_queue(hdev
,
963 mmDMA_QM_3_GLBL_CFG1
,
965 mmDMA_QM_3_GLBL_STS0
);
968 dev_err(hdev
->dev
, "failed to stop DMA QMAN 3\n");
972 rc
= goya_stop_queue(hdev
,
973 mmDMA_QM_4_GLBL_CFG1
,
975 mmDMA_QM_4_GLBL_STS0
);
978 dev_err(hdev
->dev
, "failed to stop DMA QMAN 4\n");
986 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
988 * @hdev: pointer to hl_device structure
990 * Returns 0 on success
993 int goya_init_cpu_queues(struct hl_device
*hdev
)
995 struct goya_device
*goya
= hdev
->asic_specific
;
998 struct hl_hw_queue
*cpu_pq
= &hdev
->kernel_queues
[GOYA_QUEUE_ID_CPU_PQ
];
1001 if (!hdev
->cpu_queues_enable
)
1004 if (goya
->hw_cap_initialized
& HW_CAP_CPU_Q
)
1007 eq
= &hdev
->event_queue
;
1009 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
,
1010 lower_32_bits(cpu_pq
->bus_address
));
1011 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
,
1012 upper_32_bits(cpu_pq
->bus_address
));
1014 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
, lower_32_bits(eq
->bus_address
));
1015 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
, upper_32_bits(eq
->bus_address
));
1017 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
,
1018 lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR
));
1019 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
,
1020 upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR
));
1022 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
, HL_QUEUE_SIZE_IN_BYTES
);
1023 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
, HL_EQ_SIZE_IN_BYTES
);
1024 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
, HL_CPU_ACCESSIBLE_MEM_SIZE
);
1026 /* Used for EQ CI */
1027 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
, 0);
1029 WREG32(mmCPU_IF_PF_PQ_PI
, 0);
1031 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
, PQ_INIT_STATUS_READY_FOR_CP
);
1033 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
1034 GOYA_ASYNC_EVENT_ID_PI_UPDATE
);
1036 err
= hl_poll_timeout(
1038 mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
,
1040 (status
== PQ_INIT_STATUS_READY_FOR_HOST
),
1042 GOYA_CPU_TIMEOUT_USEC
);
1046 "Failed to setup communication with device CPU\n");
1050 goya
->hw_cap_initialized
|= HW_CAP_CPU_Q
;
1054 static void goya_set_pll_refclk(struct hl_device
*hdev
)
1056 WREG32(mmCPU_PLL_DIV_SEL_0
, 0x0);
1057 WREG32(mmCPU_PLL_DIV_SEL_1
, 0x0);
1058 WREG32(mmCPU_PLL_DIV_SEL_2
, 0x0);
1059 WREG32(mmCPU_PLL_DIV_SEL_3
, 0x0);
1061 WREG32(mmIC_PLL_DIV_SEL_0
, 0x0);
1062 WREG32(mmIC_PLL_DIV_SEL_1
, 0x0);
1063 WREG32(mmIC_PLL_DIV_SEL_2
, 0x0);
1064 WREG32(mmIC_PLL_DIV_SEL_3
, 0x0);
1066 WREG32(mmMC_PLL_DIV_SEL_0
, 0x0);
1067 WREG32(mmMC_PLL_DIV_SEL_1
, 0x0);
1068 WREG32(mmMC_PLL_DIV_SEL_2
, 0x0);
1069 WREG32(mmMC_PLL_DIV_SEL_3
, 0x0);
1071 WREG32(mmPSOC_MME_PLL_DIV_SEL_0
, 0x0);
1072 WREG32(mmPSOC_MME_PLL_DIV_SEL_1
, 0x0);
1073 WREG32(mmPSOC_MME_PLL_DIV_SEL_2
, 0x0);
1074 WREG32(mmPSOC_MME_PLL_DIV_SEL_3
, 0x0);
1076 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0
, 0x0);
1077 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1
, 0x0);
1078 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2
, 0x0);
1079 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3
, 0x0);
1081 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0
, 0x0);
1082 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1
, 0x0);
1083 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2
, 0x0);
1084 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3
, 0x0);
1086 WREG32(mmTPC_PLL_DIV_SEL_0
, 0x0);
1087 WREG32(mmTPC_PLL_DIV_SEL_1
, 0x0);
1088 WREG32(mmTPC_PLL_DIV_SEL_2
, 0x0);
1089 WREG32(mmTPC_PLL_DIV_SEL_3
, 0x0);
1092 static void goya_disable_clk_rlx(struct hl_device
*hdev
)
1094 WREG32(mmPSOC_MME_PLL_CLK_RLX_0
, 0x100010);
1095 WREG32(mmIC_PLL_CLK_RLX_0
, 0x100010);
1098 static void _goya_tpc_mbist_workaround(struct hl_device
*hdev
, u8 tpc_id
)
1100 u64 tpc_eml_address
;
1101 u32 val
, tpc_offset
, tpc_eml_offset
, tpc_slm_offset
;
1104 tpc_offset
= tpc_id
* 0x40000;
1105 tpc_eml_offset
= tpc_id
* 0x200000;
1106 tpc_eml_address
= (mmTPC0_EML_CFG_BASE
+ tpc_eml_offset
- CFG_BASE
);
1107 tpc_slm_offset
= tpc_eml_address
+ 0x100000;
1110 * Workaround for Bug H2 #2443 :
1111 * "TPC SB is not initialized on chip reset"
1114 val
= RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
);
1115 if (val
& TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK
)
1116 dev_warn(hdev
->dev
, "TPC%d MBIST ACTIVE is not cleared\n",
1119 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT
+ tpc_offset
, val
& 0xFFFFF000);
1121 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0
+ tpc_offset
, 0x37FF);
1122 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1
+ tpc_offset
, 0x303F);
1123 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2
+ tpc_offset
, 0x71FF);
1124 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3
+ tpc_offset
, 0x71FF);
1125 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4
+ tpc_offset
, 0x70FF);
1126 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5
+ tpc_offset
, 0x70FF);
1127 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6
+ tpc_offset
, 0x70FF);
1128 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7
+ tpc_offset
, 0x70FF);
1129 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8
+ tpc_offset
, 0x70FF);
1130 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9
+ tpc_offset
, 0x70FF);
1132 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
,
1133 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT
);
1135 err
= hl_poll_timeout(
1137 mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
,
1139 (val
& TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK
),
1141 HL_DEVICE_TIMEOUT_USEC
);
1145 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id
);
1147 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT
+ tpc_eml_offset
,
1148 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT
);
1150 msleep(GOYA_RESET_WAIT_MSEC
);
1152 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT
+ tpc_eml_offset
,
1153 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT
));
1155 msleep(GOYA_RESET_WAIT_MSEC
);
1157 for (slm_index
= 0 ; slm_index
< 256 ; slm_index
++)
1158 WREG32(tpc_slm_offset
+ (slm_index
<< 2), 0);
1160 val
= RREG32(tpc_slm_offset
);
1163 static void goya_tpc_mbist_workaround(struct hl_device
*hdev
)
1165 struct goya_device
*goya
= hdev
->asic_specific
;
1171 if (goya
->hw_cap_initialized
& HW_CAP_TPC_MBIST
)
1174 /* Workaround for H2 #2443 */
1176 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++)
1177 _goya_tpc_mbist_workaround(hdev
, i
);
1179 goya
->hw_cap_initialized
|= HW_CAP_TPC_MBIST
;
1183 * goya_init_golden_registers - Initialize golden registers
1185 * @hdev: pointer to hl_device structure
1187 * Initialize the H/W registers of the device
1190 static void goya_init_golden_registers(struct hl_device
*hdev
)
1192 struct goya_device
*goya
= hdev
->asic_specific
;
1193 u32 polynom
[10], tpc_intr_mask
, offset
;
1196 if (goya
->hw_cap_initialized
& HW_CAP_GOLDEN
)
1199 polynom
[0] = 0x00020080;
1200 polynom
[1] = 0x00401000;
1201 polynom
[2] = 0x00200800;
1202 polynom
[3] = 0x00002000;
1203 polynom
[4] = 0x00080200;
1204 polynom
[5] = 0x00040100;
1205 polynom
[6] = 0x00100400;
1206 polynom
[7] = 0x00004000;
1207 polynom
[8] = 0x00010000;
1208 polynom
[9] = 0x00008000;
1210 /* Mask all arithmetic interrupts from TPC */
1211 tpc_intr_mask
= 0x7FFF;
1213 for (i
= 0, offset
= 0 ; i
< 6 ; i
++, offset
+= 0x20000) {
1214 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1215 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1216 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1217 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1218 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1220 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1221 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1222 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1223 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1224 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1227 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1228 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1229 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1230 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB
+ offset
, 0x207);
1231 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB
+ offset
, 0x207);
1233 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB
+ offset
, 0x207);
1234 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB
+ offset
, 0x207);
1235 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1236 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1237 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1239 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x101);
1240 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x102);
1241 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x103);
1242 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x104);
1243 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x105);
1245 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x105);
1246 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x104);
1247 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x103);
1248 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x102);
1249 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x101);
1252 WREG32(mmMME_STORE_MAX_CREDIT
, 0x21);
1253 WREG32(mmMME_AGU
, 0x0f0f0f10);
1254 WREG32(mmMME_SEI_MASK
, ~0x0);
1256 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1257 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB
, 0x01040101);
1258 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB
, 0x01030101);
1259 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB
, 0x01020101);
1260 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1261 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB
, 0x07010701);
1262 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB
, 0x04010401);
1263 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB
, 0x04050401);
1264 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB
, 0x03070301);
1265 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB
, 0x01030101);
1266 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB
, 0x01040101);
1267 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB
, 0x01050105);
1268 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB
, 0x01010501);
1269 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB
, 0x01010501);
1270 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB
, 0x01040301);
1271 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB
, 0x01030401);
1272 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB
, 0x01040101);
1273 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB
, 0x01050101);
1274 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB
, 0x02020202);
1275 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB
, 0x01070101);
1276 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB
, 0x02020201);
1277 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB
, 0x07020701);
1278 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB
, 0x01020101);
1279 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1280 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1281 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1282 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB
, 0x07020701);
1283 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB
, 0x02020201);
1284 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1285 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB
, 0x01020102);
1286 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB
, 0x01020701);
1287 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB
, 0x01020701);
1288 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB
, 0x07020707);
1289 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB
, 0x01020201);
1290 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB
, 0x01070201);
1291 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB
, 0x01070201);
1292 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB
, 0x01070102);
1293 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB
, 0x01070102);
1294 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB
, 0x01060102);
1295 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB
, 0x01040102);
1296 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB
, 0x01020102);
1297 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB
, 0x01020107);
1298 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB
, 0x01020106);
1299 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB
, 0x01020102);
1300 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB
, 0x01040102);
1301 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB
, 0x01060102);
1302 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB
, 0x01070102);
1303 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB
, 0x01070102);
1304 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB
, 0x01020702);
1305 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB
, 0x01020702);
1306 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB
, 0x01040602);
1307 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB
, 0x01060402);
1308 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB
, 0x01070202);
1309 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB
, 0x01070102);
1310 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1311 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1312 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1313 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1314 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1315 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1316 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB
, 0x01050101);
1317 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB
, 0x01040101);
1318 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB
, 0x01030101);
1319 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB
, 0x01020101);
1320 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1321 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB
, 0x01010107);
1322 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB
, 0x01010107);
1323 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1324 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB
, 0x01020101);
1325 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB
, 0x01030101);
1326 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB
, 0x01040101);
1327 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB
, 0x01050101);
1328 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB
, 0x01010501);
1329 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB
, 0x01010501);
1330 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB
, 0x01040301);
1331 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB
, 0x01030401);
1332 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB
, 0x01040101);
1333 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB
, 0x01050101);
1334 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1335 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1336 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1337 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1338 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1339 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1341 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1342 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB
, 0x01010101);
1343 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB
, 0x01060101);
1344 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB
, 0x02020102);
1345 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1346 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB
, 0x02070202);
1347 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB
, 0x01020201);
1348 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB
, 0x01070201);
1349 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB
, 0x01070202);
1350 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1351 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB
, 0x01050101);
1352 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB
, 0x01050101);
1354 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB
, 0x01020101);
1355 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB
, 0x01050101);
1356 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB
, 0x01010201);
1357 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB
, 0x02040102);
1358 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB
, 0x01050101);
1359 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB
, 0x02060202);
1360 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB
, 0x01020201);
1361 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB
, 0x01070201);
1362 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB
, 0x01070202);
1363 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1364 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB
, 0x01040101);
1365 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB
, 0x01040101);
1367 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB
, 0x01030101);
1368 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB
, 0x01040101);
1369 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB
, 0x01040301);
1370 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB
, 0x02060102);
1371 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB
, 0x01040101);
1372 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB
, 0x01040301);
1373 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB
, 0x01040201);
1374 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB
, 0x01060201);
1375 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB
, 0x01060402);
1376 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB
, 0x01020101);
1377 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB
, 0x01030101);
1378 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB
, 0x01030401);
1380 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB
, 0x01040101);
1381 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB
, 0x01030101);
1382 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB
, 0x01030401);
1383 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB
, 0x02070102);
1384 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB
, 0x01030101);
1385 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB
, 0x02060702);
1386 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB
, 0x01060201);
1387 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB
, 0x01040201);
1388 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB
, 0x01040602);
1389 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB
, 0x01030101);
1390 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB
, 0x01020101);
1391 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB
, 0x01040301);
1393 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB
, 0x01050101);
1394 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB
, 0x01020101);
1395 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB
, 0x01200501);
1396 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB
, 0x02070102);
1397 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB
, 0x01020101);
1398 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB
, 0x02020602);
1399 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB
, 0x01070201);
1400 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB
, 0x01020201);
1401 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB
, 0x01020702);
1402 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB
, 0x01040101);
1403 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1404 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB
, 0x01010501);
1406 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1407 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB
, 0x01010101);
1408 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB
, 0x01010601);
1409 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB
, 0x01010101);
1410 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1411 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB
, 0x02020702);
1412 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB
, 0x01010101);
1413 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB
, 0x01010101);
1414 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB
, 0x01020702);
1415 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB
, 0x01050101);
1416 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1417 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB
, 0x01010501);
1419 for (i
= 0, offset
= 0 ; i
< 10 ; i
++, offset
+= 4) {
1420 WREG32(mmMME1_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1421 WREG32(mmMME2_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1422 WREG32(mmMME3_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1423 WREG32(mmMME4_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1424 WREG32(mmMME5_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1425 WREG32(mmMME6_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1427 WREG32(mmTPC0_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1428 WREG32(mmTPC1_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1429 WREG32(mmTPC2_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1430 WREG32(mmTPC3_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1431 WREG32(mmTPC4_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1432 WREG32(mmTPC5_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1433 WREG32(mmTPC6_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1434 WREG32(mmTPC7_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1436 WREG32(mmPCI_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1437 WREG32(mmDMA_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1440 for (i
= 0, offset
= 0 ; i
< 6 ; i
++, offset
+= 0x40000) {
1441 WREG32(mmMME1_RTR_SCRAMB_EN
+ offset
,
1442 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT
);
1443 WREG32(mmMME1_RTR_NON_LIN_SCRAMB
+ offset
,
1444 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT
);
1447 for (i
= 0, offset
= 0 ; i
< 8 ; i
++, offset
+= 0x40000) {
1449 * Workaround for Bug H2 #2441 :
1450 * "ST.NOP set trace event illegal opcode"
1452 WREG32(mmTPC0_CFG_TPC_INTR_MASK
+ offset
, tpc_intr_mask
);
1454 WREG32(mmTPC0_NRTR_SCRAMB_EN
+ offset
,
1455 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT
);
1456 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB
+ offset
,
1457 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1460 WREG32(mmDMA_NRTR_SCRAMB_EN
, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT
);
1461 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB
,
1462 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1464 WREG32(mmPCI_NRTR_SCRAMB_EN
, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT
);
1465 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB
,
1466 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1469 * Workaround for H2 #HW-23 bug
1470 * Set DMA max outstanding read requests to 240 on DMA CH 1.
1471 * This limitation is still large enough to not affect Gen4 bandwidth.
1472 * We need to only limit that DMA channel because the user can only read
1473 * from Host using DMA CH 1
1475 WREG32(mmDMA_CH_1_CFG0
, 0x0fff00F0);
1477 WREG32(mmTPC_PLL_CLK_RLX_0
, 0x200020);
1479 goya
->hw_cap_initialized
|= HW_CAP_GOLDEN
;
1482 static void goya_init_mme_qman(struct hl_device
*hdev
)
1484 u32 mtr_base_lo
, mtr_base_hi
;
1485 u32 so_base_lo
, so_base_hi
;
1486 u32 gic_base_lo
, gic_base_hi
;
1489 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1490 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1491 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1492 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1495 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1497 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1499 qman_base_addr
= hdev
->asic_prop
.sram_base_address
+
1500 MME_QMAN_BASE_OFFSET
;
1502 WREG32(mmMME_QM_PQ_BASE_LO
, lower_32_bits(qman_base_addr
));
1503 WREG32(mmMME_QM_PQ_BASE_HI
, upper_32_bits(qman_base_addr
));
1504 WREG32(mmMME_QM_PQ_SIZE
, ilog2(MME_QMAN_LENGTH
));
1505 WREG32(mmMME_QM_PQ_PI
, 0);
1506 WREG32(mmMME_QM_PQ_CI
, 0);
1507 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET
, 0x10C0);
1508 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET
, 0x10C4);
1509 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET
, 0x10C8);
1510 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET
, 0x10CC);
1512 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO
, mtr_base_lo
);
1513 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI
, mtr_base_hi
);
1514 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO
, so_base_lo
);
1515 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI
, so_base_hi
);
1517 /* QMAN CQ has 8 cache lines */
1518 WREG32(mmMME_QM_CQ_CFG1
, 0x00080008);
1520 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO
, gic_base_lo
);
1521 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI
, gic_base_hi
);
1523 WREG32(mmMME_QM_GLBL_ERR_WDATA
, GOYA_ASYNC_EVENT_ID_MME_QM
);
1525 WREG32(mmMME_QM_GLBL_ERR_CFG
, QMAN_MME_ERR_MSG_EN
);
1527 WREG32(mmMME_QM_GLBL_PROT
, QMAN_MME_ERR_PROT
);
1529 WREG32(mmMME_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
1532 static void goya_init_mme_cmdq(struct hl_device
*hdev
)
1534 u32 mtr_base_lo
, mtr_base_hi
;
1535 u32 so_base_lo
, so_base_hi
;
1536 u32 gic_base_lo
, gic_base_hi
;
1539 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1540 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1541 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1542 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1545 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1547 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1549 qman_base_addr
= hdev
->asic_prop
.sram_base_address
+
1550 MME_QMAN_BASE_OFFSET
;
1552 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO
, mtr_base_lo
);
1553 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI
, mtr_base_hi
);
1554 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO
, so_base_lo
);
1555 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI
, so_base_hi
);
1557 /* CMDQ CQ has 20 cache lines */
1558 WREG32(mmMME_CMDQ_CQ_CFG1
, 0x00140014);
1560 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO
, gic_base_lo
);
1561 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI
, gic_base_hi
);
1563 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA
, GOYA_ASYNC_EVENT_ID_MME_CMDQ
);
1565 WREG32(mmMME_CMDQ_GLBL_ERR_CFG
, CMDQ_MME_ERR_MSG_EN
);
1567 WREG32(mmMME_CMDQ_GLBL_PROT
, CMDQ_MME_ERR_PROT
);
1569 WREG32(mmMME_CMDQ_GLBL_CFG0
, CMDQ_MME_ENABLE
);
1572 void goya_init_mme_qmans(struct hl_device
*hdev
)
1574 struct goya_device
*goya
= hdev
->asic_specific
;
1575 u32 so_base_lo
, so_base_hi
;
1577 if (goya
->hw_cap_initialized
& HW_CAP_MME
)
1580 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1581 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1583 WREG32(mmMME_SM_BASE_ADDRESS_LOW
, so_base_lo
);
1584 WREG32(mmMME_SM_BASE_ADDRESS_HIGH
, so_base_hi
);
1586 goya_init_mme_qman(hdev
);
1587 goya_init_mme_cmdq(hdev
);
1589 goya
->hw_cap_initialized
|= HW_CAP_MME
;
1592 static void goya_init_tpc_qman(struct hl_device
*hdev
, u32 base_off
, int tpc_id
)
1594 u32 mtr_base_lo
, mtr_base_hi
;
1595 u32 so_base_lo
, so_base_hi
;
1596 u32 gic_base_lo
, gic_base_hi
;
1598 u32 reg_off
= tpc_id
* (mmTPC1_QM_PQ_PI
- mmTPC0_QM_PQ_PI
);
1600 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1601 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1602 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1603 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1606 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1608 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1610 qman_base_addr
= hdev
->asic_prop
.sram_base_address
+ base_off
;
1612 WREG32(mmTPC0_QM_PQ_BASE_LO
+ reg_off
, lower_32_bits(qman_base_addr
));
1613 WREG32(mmTPC0_QM_PQ_BASE_HI
+ reg_off
, upper_32_bits(qman_base_addr
));
1614 WREG32(mmTPC0_QM_PQ_SIZE
+ reg_off
, ilog2(TPC_QMAN_LENGTH
));
1615 WREG32(mmTPC0_QM_PQ_PI
+ reg_off
, 0);
1616 WREG32(mmTPC0_QM_PQ_CI
+ reg_off
, 0);
1617 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET
+ reg_off
, 0x10C0);
1618 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET
+ reg_off
, 0x10C4);
1619 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET
+ reg_off
, 0x10C8);
1620 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET
+ reg_off
, 0x10CC);
1622 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
1623 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
1624 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
1625 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
1627 WREG32(mmTPC0_QM_CQ_CFG1
+ reg_off
, 0x00080008);
1629 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
1630 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
1632 WREG32(mmTPC0_QM_GLBL_ERR_WDATA
+ reg_off
,
1633 GOYA_ASYNC_EVENT_ID_TPC0_QM
+ tpc_id
);
1635 WREG32(mmTPC0_QM_GLBL_ERR_CFG
+ reg_off
, QMAN_TPC_ERR_MSG_EN
);
1637 WREG32(mmTPC0_QM_GLBL_PROT
+ reg_off
, QMAN_TPC_ERR_PROT
);
1639 WREG32(mmTPC0_QM_GLBL_CFG0
+ reg_off
, QMAN_TPC_ENABLE
);
1642 static void goya_init_tpc_cmdq(struct hl_device
*hdev
, int tpc_id
)
1644 u32 mtr_base_lo
, mtr_base_hi
;
1645 u32 so_base_lo
, so_base_hi
;
1646 u32 gic_base_lo
, gic_base_hi
;
1647 u32 reg_off
= tpc_id
* (mmTPC1_CMDQ_CQ_CFG1
- mmTPC0_CMDQ_CQ_CFG1
);
1649 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1650 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1651 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1652 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1655 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1657 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1659 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
1660 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
1661 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
1662 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
1664 WREG32(mmTPC0_CMDQ_CQ_CFG1
+ reg_off
, 0x00140014);
1666 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
1667 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
1669 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA
+ reg_off
,
1670 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
+ tpc_id
);
1672 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG
+ reg_off
, CMDQ_TPC_ERR_MSG_EN
);
1674 WREG32(mmTPC0_CMDQ_GLBL_PROT
+ reg_off
, CMDQ_TPC_ERR_PROT
);
1676 WREG32(mmTPC0_CMDQ_GLBL_CFG0
+ reg_off
, CMDQ_TPC_ENABLE
);
1679 void goya_init_tpc_qmans(struct hl_device
*hdev
)
1681 struct goya_device
*goya
= hdev
->asic_specific
;
1682 u32 so_base_lo
, so_base_hi
;
1683 u32 cfg_off
= mmTPC1_CFG_SM_BASE_ADDRESS_LOW
-
1684 mmTPC0_CFG_SM_BASE_ADDRESS_LOW
;
1687 if (goya
->hw_cap_initialized
& HW_CAP_TPC
)
1690 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1691 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1693 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++) {
1694 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW
+ i
* cfg_off
,
1696 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
+ i
* cfg_off
,
1700 goya_init_tpc_qman(hdev
, TPC0_QMAN_BASE_OFFSET
, 0);
1701 goya_init_tpc_qman(hdev
, TPC1_QMAN_BASE_OFFSET
, 1);
1702 goya_init_tpc_qman(hdev
, TPC2_QMAN_BASE_OFFSET
, 2);
1703 goya_init_tpc_qman(hdev
, TPC3_QMAN_BASE_OFFSET
, 3);
1704 goya_init_tpc_qman(hdev
, TPC4_QMAN_BASE_OFFSET
, 4);
1705 goya_init_tpc_qman(hdev
, TPC5_QMAN_BASE_OFFSET
, 5);
1706 goya_init_tpc_qman(hdev
, TPC6_QMAN_BASE_OFFSET
, 6);
1707 goya_init_tpc_qman(hdev
, TPC7_QMAN_BASE_OFFSET
, 7);
1709 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++)
1710 goya_init_tpc_cmdq(hdev
, i
);
1712 goya
->hw_cap_initialized
|= HW_CAP_TPC
;
1716 * goya_disable_internal_queues - Disable internal queues
1718 * @hdev: pointer to hl_device structure
1721 static void goya_disable_internal_queues(struct hl_device
*hdev
)
1723 WREG32(mmMME_QM_GLBL_CFG0
, 0);
1724 WREG32(mmMME_CMDQ_GLBL_CFG0
, 0);
1726 WREG32(mmTPC0_QM_GLBL_CFG0
, 0);
1727 WREG32(mmTPC0_CMDQ_GLBL_CFG0
, 0);
1729 WREG32(mmTPC1_QM_GLBL_CFG0
, 0);
1730 WREG32(mmTPC1_CMDQ_GLBL_CFG0
, 0);
1732 WREG32(mmTPC2_QM_GLBL_CFG0
, 0);
1733 WREG32(mmTPC2_CMDQ_GLBL_CFG0
, 0);
1735 WREG32(mmTPC3_QM_GLBL_CFG0
, 0);
1736 WREG32(mmTPC3_CMDQ_GLBL_CFG0
, 0);
1738 WREG32(mmTPC4_QM_GLBL_CFG0
, 0);
1739 WREG32(mmTPC4_CMDQ_GLBL_CFG0
, 0);
1741 WREG32(mmTPC5_QM_GLBL_CFG0
, 0);
1742 WREG32(mmTPC5_CMDQ_GLBL_CFG0
, 0);
1744 WREG32(mmTPC6_QM_GLBL_CFG0
, 0);
1745 WREG32(mmTPC6_CMDQ_GLBL_CFG0
, 0);
1747 WREG32(mmTPC7_QM_GLBL_CFG0
, 0);
1748 WREG32(mmTPC7_CMDQ_GLBL_CFG0
, 0);
1752 * goya_stop_internal_queues - Stop internal queues
1754 * @hdev: pointer to hl_device structure
1756 * Returns 0 on success
1759 static int goya_stop_internal_queues(struct hl_device
*hdev
)
1764 * Each queue (QMAN) is a separate H/W logic. That means that each
1765 * QMAN can be stopped independently and failure to stop one does NOT
1766 * mandate we should not try to stop other QMANs
1769 rc
= goya_stop_queue(hdev
,
1772 mmMME_QM_GLBL_STS0
);
1775 dev_err(hdev
->dev
, "failed to stop MME QMAN\n");
1779 rc
= goya_stop_queue(hdev
,
1780 mmMME_CMDQ_GLBL_CFG1
,
1782 mmMME_CMDQ_GLBL_STS0
);
1785 dev_err(hdev
->dev
, "failed to stop MME CMDQ\n");
1789 rc
= goya_stop_queue(hdev
,
1790 mmTPC0_QM_GLBL_CFG1
,
1792 mmTPC0_QM_GLBL_STS0
);
1795 dev_err(hdev
->dev
, "failed to stop TPC 0 QMAN\n");
1799 rc
= goya_stop_queue(hdev
,
1800 mmTPC0_CMDQ_GLBL_CFG1
,
1802 mmTPC0_CMDQ_GLBL_STS0
);
1805 dev_err(hdev
->dev
, "failed to stop TPC 0 CMDQ\n");
1809 rc
= goya_stop_queue(hdev
,
1810 mmTPC1_QM_GLBL_CFG1
,
1812 mmTPC1_QM_GLBL_STS0
);
1815 dev_err(hdev
->dev
, "failed to stop TPC 1 QMAN\n");
1819 rc
= goya_stop_queue(hdev
,
1820 mmTPC1_CMDQ_GLBL_CFG1
,
1822 mmTPC1_CMDQ_GLBL_STS0
);
1825 dev_err(hdev
->dev
, "failed to stop TPC 1 CMDQ\n");
1829 rc
= goya_stop_queue(hdev
,
1830 mmTPC2_QM_GLBL_CFG1
,
1832 mmTPC2_QM_GLBL_STS0
);
1835 dev_err(hdev
->dev
, "failed to stop TPC 2 QMAN\n");
1839 rc
= goya_stop_queue(hdev
,
1840 mmTPC2_CMDQ_GLBL_CFG1
,
1842 mmTPC2_CMDQ_GLBL_STS0
);
1845 dev_err(hdev
->dev
, "failed to stop TPC 2 CMDQ\n");
1849 rc
= goya_stop_queue(hdev
,
1850 mmTPC3_QM_GLBL_CFG1
,
1852 mmTPC3_QM_GLBL_STS0
);
1855 dev_err(hdev
->dev
, "failed to stop TPC 3 QMAN\n");
1859 rc
= goya_stop_queue(hdev
,
1860 mmTPC3_CMDQ_GLBL_CFG1
,
1862 mmTPC3_CMDQ_GLBL_STS0
);
1865 dev_err(hdev
->dev
, "failed to stop TPC 3 CMDQ\n");
1869 rc
= goya_stop_queue(hdev
,
1870 mmTPC4_QM_GLBL_CFG1
,
1872 mmTPC4_QM_GLBL_STS0
);
1875 dev_err(hdev
->dev
, "failed to stop TPC 4 QMAN\n");
1879 rc
= goya_stop_queue(hdev
,
1880 mmTPC4_CMDQ_GLBL_CFG1
,
1882 mmTPC4_CMDQ_GLBL_STS0
);
1885 dev_err(hdev
->dev
, "failed to stop TPC 4 CMDQ\n");
1889 rc
= goya_stop_queue(hdev
,
1890 mmTPC5_QM_GLBL_CFG1
,
1892 mmTPC5_QM_GLBL_STS0
);
1895 dev_err(hdev
->dev
, "failed to stop TPC 5 QMAN\n");
1899 rc
= goya_stop_queue(hdev
,
1900 mmTPC5_CMDQ_GLBL_CFG1
,
1902 mmTPC5_CMDQ_GLBL_STS0
);
1905 dev_err(hdev
->dev
, "failed to stop TPC 5 CMDQ\n");
1909 rc
= goya_stop_queue(hdev
,
1910 mmTPC6_QM_GLBL_CFG1
,
1912 mmTPC6_QM_GLBL_STS0
);
1915 dev_err(hdev
->dev
, "failed to stop TPC 6 QMAN\n");
1919 rc
= goya_stop_queue(hdev
,
1920 mmTPC6_CMDQ_GLBL_CFG1
,
1922 mmTPC6_CMDQ_GLBL_STS0
);
1925 dev_err(hdev
->dev
, "failed to stop TPC 6 CMDQ\n");
1929 rc
= goya_stop_queue(hdev
,
1930 mmTPC7_QM_GLBL_CFG1
,
1932 mmTPC7_QM_GLBL_STS0
);
1935 dev_err(hdev
->dev
, "failed to stop TPC 7 QMAN\n");
1939 rc
= goya_stop_queue(hdev
,
1940 mmTPC7_CMDQ_GLBL_CFG1
,
1942 mmTPC7_CMDQ_GLBL_STS0
);
1945 dev_err(hdev
->dev
, "failed to stop TPC 7 CMDQ\n");
1952 static void goya_dma_stall(struct hl_device
*hdev
)
1954 WREG32(mmDMA_QM_0_GLBL_CFG1
, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
);
1955 WREG32(mmDMA_QM_1_GLBL_CFG1
, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT
);
1956 WREG32(mmDMA_QM_2_GLBL_CFG1
, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT
);
1957 WREG32(mmDMA_QM_3_GLBL_CFG1
, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT
);
1958 WREG32(mmDMA_QM_4_GLBL_CFG1
, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT
);
1961 static void goya_tpc_stall(struct hl_device
*hdev
)
1963 WREG32(mmTPC0_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
1964 WREG32(mmTPC1_CFG_TPC_STALL
, 1 << TPC1_CFG_TPC_STALL_V_SHIFT
);
1965 WREG32(mmTPC2_CFG_TPC_STALL
, 1 << TPC2_CFG_TPC_STALL_V_SHIFT
);
1966 WREG32(mmTPC3_CFG_TPC_STALL
, 1 << TPC3_CFG_TPC_STALL_V_SHIFT
);
1967 WREG32(mmTPC4_CFG_TPC_STALL
, 1 << TPC4_CFG_TPC_STALL_V_SHIFT
);
1968 WREG32(mmTPC5_CFG_TPC_STALL
, 1 << TPC5_CFG_TPC_STALL_V_SHIFT
);
1969 WREG32(mmTPC6_CFG_TPC_STALL
, 1 << TPC6_CFG_TPC_STALL_V_SHIFT
);
1970 WREG32(mmTPC7_CFG_TPC_STALL
, 1 << TPC7_CFG_TPC_STALL_V_SHIFT
);
1973 static void goya_mme_stall(struct hl_device
*hdev
)
1975 WREG32(mmMME_STALL
, 0xFFFFFFFF);
1978 static int goya_enable_msix(struct hl_device
*hdev
)
1980 struct goya_device
*goya
= hdev
->asic_specific
;
1981 int cq_cnt
= hdev
->asic_prop
.completion_queues_count
;
1982 int rc
, i
, irq_cnt_init
, irq
;
1984 if (goya
->hw_cap_initialized
& HW_CAP_MSIX
)
1987 rc
= pci_alloc_irq_vectors(hdev
->pdev
, GOYA_MSIX_ENTRIES
,
1988 GOYA_MSIX_ENTRIES
, PCI_IRQ_MSIX
);
1991 "MSI-X: Failed to enable support -- %d/%d\n",
1992 GOYA_MSIX_ENTRIES
, rc
);
1996 for (i
= 0, irq_cnt_init
= 0 ; i
< cq_cnt
; i
++, irq_cnt_init
++) {
1997 irq
= pci_irq_vector(hdev
->pdev
, i
);
1998 rc
= request_irq(irq
, hl_irq_handler_cq
, 0, goya_irq_name
[i
],
1999 &hdev
->completion_queue
[i
]);
2001 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
2006 irq
= pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
);
2008 rc
= request_irq(irq
, hl_irq_handler_eq
, 0,
2009 goya_irq_name
[GOYA_EVENT_QUEUE_MSIX_IDX
],
2010 &hdev
->event_queue
);
2012 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
2016 goya
->hw_cap_initialized
|= HW_CAP_MSIX
;
2020 for (i
= 0 ; i
< irq_cnt_init
; i
++)
2021 free_irq(pci_irq_vector(hdev
->pdev
, i
),
2022 &hdev
->completion_queue
[i
]);
2024 pci_free_irq_vectors(hdev
->pdev
);
2028 static void goya_sync_irqs(struct hl_device
*hdev
)
2030 struct goya_device
*goya
= hdev
->asic_specific
;
2033 if (!(goya
->hw_cap_initialized
& HW_CAP_MSIX
))
2036 /* Wait for all pending IRQs to be finished */
2037 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++)
2038 synchronize_irq(pci_irq_vector(hdev
->pdev
, i
));
2040 synchronize_irq(pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
));
2043 static void goya_disable_msix(struct hl_device
*hdev
)
2045 struct goya_device
*goya
= hdev
->asic_specific
;
2048 if (!(goya
->hw_cap_initialized
& HW_CAP_MSIX
))
2051 goya_sync_irqs(hdev
);
2053 irq
= pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
);
2054 free_irq(irq
, &hdev
->event_queue
);
2056 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++) {
2057 irq
= pci_irq_vector(hdev
->pdev
, i
);
2058 free_irq(irq
, &hdev
->completion_queue
[i
]);
2061 pci_free_irq_vectors(hdev
->pdev
);
2063 goya
->hw_cap_initialized
&= ~HW_CAP_MSIX
;
2066 static void goya_halt_engines(struct hl_device
*hdev
, bool hard_reset
)
2068 u32 wait_timeout_ms
, cpu_timeout_ms
;
2071 "Halting compute engines and disabling interrupts\n");
2074 wait_timeout_ms
= GOYA_PLDM_RESET_WAIT_MSEC
;
2075 cpu_timeout_ms
= GOYA_PLDM_RESET_WAIT_MSEC
;
2077 wait_timeout_ms
= GOYA_RESET_WAIT_MSEC
;
2078 cpu_timeout_ms
= GOYA_CPU_RESET_WAIT_MSEC
;
2083 * I don't know what is the state of the CPU so make sure it is
2084 * stopped in any means necessary
2086 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
, KMD_MSG_GOTO_WFE
);
2087 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2088 GOYA_ASYNC_EVENT_ID_HALT_MACHINE
);
2089 msleep(cpu_timeout_ms
);
2092 goya_stop_external_queues(hdev
);
2093 goya_stop_internal_queues(hdev
);
2095 msleep(wait_timeout_ms
);
2097 goya_dma_stall(hdev
);
2098 goya_tpc_stall(hdev
);
2099 goya_mme_stall(hdev
);
2101 msleep(wait_timeout_ms
);
2103 goya_disable_external_queues(hdev
);
2104 goya_disable_internal_queues(hdev
);
2107 goya_disable_msix(hdev
);
2108 goya_mmu_remove_device_cpu_mappings(hdev
);
2110 goya_sync_irqs(hdev
);
2115 * goya_push_uboot_to_device() - Push u-boot FW code to device.
2116 * @hdev: Pointer to hl_device structure.
2118 * Copy u-boot fw code from firmware file to SRAM BAR.
2120 * Return: 0 on success, non-zero for failure.
2122 static int goya_push_uboot_to_device(struct hl_device
*hdev
)
2127 snprintf(fw_name
, sizeof(fw_name
), "habanalabs/goya/goya-u-boot.bin");
2128 dst
= hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] + UBOOT_FW_OFFSET
;
2130 return hl_fw_push_fw_to_device(hdev
, fw_name
, dst
);
2134 * goya_push_linux_to_device() - Push LINUX FW code to device.
2135 * @hdev: Pointer to hl_device structure.
2137 * Copy LINUX fw code from firmware file to HBM BAR.
2139 * Return: 0 on success, non-zero for failure.
2141 static int goya_push_linux_to_device(struct hl_device
*hdev
)
2146 snprintf(fw_name
, sizeof(fw_name
), "habanalabs/goya/goya-fit.itb");
2147 dst
= hdev
->pcie_bar
[DDR_BAR_ID
] + LINUX_FW_OFFSET
;
2149 return hl_fw_push_fw_to_device(hdev
, fw_name
, dst
);
2152 static int goya_pldm_init_cpu(struct hl_device
*hdev
)
2154 u32 val
, unit_rst_val
;
2157 /* Must initialize SRAM scrambler before pushing u-boot to SRAM */
2158 goya_init_golden_registers(hdev
);
2160 /* Put ARM cores into reset */
2161 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL
, CPU_RESET_ASSERT
);
2162 val
= RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL
);
2164 /* Reset the CA53 MACRO */
2165 unit_rst_val
= RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N
);
2166 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N
, CA53_RESET
);
2167 val
= RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N
);
2168 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N
, unit_rst_val
);
2169 val
= RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N
);
2171 rc
= goya_push_uboot_to_device(hdev
);
2175 rc
= goya_push_linux_to_device(hdev
);
2179 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
, KMD_MSG_FIT_RDY
);
2180 WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT
, CPU_BOOT_STATUS_NA
);
2182 WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0
,
2183 lower_32_bits(SRAM_BASE_ADDR
+ UBOOT_FW_OFFSET
));
2184 WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0
,
2185 upper_32_bits(SRAM_BASE_ADDR
+ UBOOT_FW_OFFSET
));
2187 /* Release ARM core 0 from reset */
2188 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL
,
2189 CPU_RESET_CORE0_DEASSERT
);
2190 val
= RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL
);
2196 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2197 * The version string should be located by that offset.
2199 static void goya_read_device_fw_version(struct hl_device
*hdev
,
2200 enum goya_fw_component fwc
)
2208 ver_off
= RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
);
2209 dest
= hdev
->asic_prop
.uboot_ver
;
2212 case FW_COMP_PREBOOT
:
2213 ver_off
= RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
);
2214 dest
= hdev
->asic_prop
.preboot_ver
;
2218 dev_warn(hdev
->dev
, "Undefined FW component: %d\n", fwc
);
2222 ver_off
&= ~((u32
)SRAM_BASE_ADDR
);
2224 if (ver_off
< SRAM_SIZE
- VERSION_MAX_LEN
) {
2225 memcpy_fromio(dest
, hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] + ver_off
,
2228 dev_err(hdev
->dev
, "%s version offset (0x%x) is above SRAM\n",
2230 strcpy(dest
, "unavailable");
2234 static int goya_init_cpu(struct hl_device
*hdev
, u32 cpu_timeout
)
2236 struct goya_device
*goya
= hdev
->asic_specific
;
2240 if (!hdev
->cpu_enable
)
2243 if (goya
->hw_cap_initialized
& HW_CAP_CPU
)
2247 * Before pushing u-boot/linux to device, need to set the ddr bar to
2248 * base address of dram
2250 if (goya_set_ddr_bar_base(hdev
, DRAM_PHYS_BASE
) == U64_MAX
) {
2252 "failed to map DDR bar to DRAM base address\n");
2257 rc
= goya_pldm_init_cpu(hdev
);
2264 /* Make sure CPU boot-loader is running */
2265 rc
= hl_poll_timeout(
2267 mmPSOC_GLOBAL_CONF_WARM_REBOOT
,
2269 (status
== CPU_BOOT_STATUS_DRAM_RDY
) ||
2270 (status
== CPU_BOOT_STATUS_SRAM_AVAIL
),
2275 dev_err(hdev
->dev
, "Error in ARM u-boot!");
2277 case CPU_BOOT_STATUS_NA
:
2279 "ARM status %d - BTL did NOT run\n", status
);
2281 case CPU_BOOT_STATUS_IN_WFE
:
2283 "ARM status %d - Inside WFE loop\n", status
);
2285 case CPU_BOOT_STATUS_IN_BTL
:
2287 "ARM status %d - Stuck in BTL\n", status
);
2289 case CPU_BOOT_STATUS_IN_PREBOOT
:
2291 "ARM status %d - Stuck in Preboot\n", status
);
2293 case CPU_BOOT_STATUS_IN_SPL
:
2295 "ARM status %d - Stuck in SPL\n", status
);
2297 case CPU_BOOT_STATUS_IN_UBOOT
:
2299 "ARM status %d - Stuck in u-boot\n", status
);
2301 case CPU_BOOT_STATUS_DRAM_INIT_FAIL
:
2303 "ARM status %d - DDR initialization failed\n",
2306 case CPU_BOOT_STATUS_UBOOT_NOT_READY
:
2308 "ARM status %d - u-boot stopped by user\n",
2313 "ARM status %d - Invalid status code\n",
2320 /* Read U-Boot version now in case we will later fail */
2321 goya_read_device_fw_version(hdev
, FW_COMP_UBOOT
);
2322 goya_read_device_fw_version(hdev
, FW_COMP_PREBOOT
);
2324 if (!hdev
->fw_loading
) {
2325 dev_info(hdev
->dev
, "Skip loading FW\n");
2329 if (status
== CPU_BOOT_STATUS_SRAM_AVAIL
)
2332 rc
= goya_push_linux_to_device(hdev
);
2336 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
, KMD_MSG_FIT_RDY
);
2338 rc
= hl_poll_timeout(
2340 mmPSOC_GLOBAL_CONF_WARM_REBOOT
,
2342 (status
== CPU_BOOT_STATUS_SRAM_AVAIL
),
2347 if (status
== CPU_BOOT_STATUS_FIT_CORRUPTED
)
2349 "ARM u-boot reports FIT image is corrupted\n");
2352 "ARM Linux failed to load, %d\n", status
);
2353 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
, KMD_MSG_NA
);
2357 dev_info(hdev
->dev
, "Successfully loaded firmware to device\n");
2360 goya
->hw_cap_initialized
|= HW_CAP_CPU
;
2365 static int goya_mmu_update_asid_hop0_addr(struct hl_device
*hdev
, u32 asid
,
2368 u32 status
, timeout_usec
;
2372 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
2374 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
2376 WREG32(MMU_HOP0_PA43_12
, phys_addr
>> MMU_HOP0_PA43_12_SHIFT
);
2377 WREG32(MMU_HOP0_PA49_44
, phys_addr
>> MMU_HOP0_PA49_44_SHIFT
);
2378 WREG32(MMU_ASID_BUSY
, 0x80000000 | asid
);
2380 rc
= hl_poll_timeout(
2384 !(status
& 0x80000000),
2390 "Timeout during MMU hop0 config of asid %d\n", asid
);
2397 int goya_mmu_init(struct hl_device
*hdev
)
2399 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
2400 struct goya_device
*goya
= hdev
->asic_specific
;
2404 if (!hdev
->mmu_enable
)
2407 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
2410 hdev
->dram_supports_virtual_memory
= true;
2411 hdev
->dram_default_page_mapping
= true;
2413 for (i
= 0 ; i
< prop
->max_asid
; i
++) {
2414 hop0_addr
= prop
->mmu_pgt_addr
+
2415 (i
* prop
->mmu_hop_table_size
);
2417 rc
= goya_mmu_update_asid_hop0_addr(hdev
, i
, hop0_addr
);
2420 "failed to set hop0 addr for asid %d\n", i
);
2425 goya
->hw_cap_initialized
|= HW_CAP_MMU
;
2427 /* init MMU cache manage page */
2428 WREG32(mmSTLB_CACHE_INV_BASE_39_8
,
2429 lower_32_bits(MMU_CACHE_MNG_ADDR
>> 8));
2430 WREG32(mmSTLB_CACHE_INV_BASE_49_40
, MMU_CACHE_MNG_ADDR
>> 40);
2432 /* Remove follower feature due to performance bug */
2433 WREG32_AND(mmSTLB_STLB_FEATURE_EN
,
2434 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK
));
2436 hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, true);
2438 WREG32(mmMMU_MMU_ENABLE
, 1);
2439 WREG32(mmMMU_SPI_MASK
, 0xF);
2448 * goya_hw_init - Goya hardware initialization code
2450 * @hdev: pointer to hl_device structure
2452 * Returns 0 on success
2455 static int goya_hw_init(struct hl_device
*hdev
)
2457 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
2461 dev_info(hdev
->dev
, "Starting initialization of H/W\n");
2463 /* Perform read from the device to make sure device is up */
2464 val
= RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG
);
2467 * Let's mark in the H/W that we have reached this point. We check
2468 * this value in the reset_before_init function to understand whether
2469 * we need to reset the chip before doing H/W init. This register is
2470 * cleared by the H/W upon H/W reset
2472 WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS
, HL_DEVICE_HW_STATE_DIRTY
);
2474 rc
= goya_init_cpu(hdev
, GOYA_CPU_TIMEOUT_USEC
);
2476 dev_err(hdev
->dev
, "failed to initialize CPU\n");
2480 goya_tpc_mbist_workaround(hdev
);
2482 goya_init_golden_registers(hdev
);
2485 * After CPU initialization is finished, change DDR bar mapping inside
2486 * iATU to point to the start address of the MMU page tables
2488 if (goya_set_ddr_bar_base(hdev
, DRAM_PHYS_BASE
+
2489 (MMU_PAGE_TABLES_ADDR
&
2490 ~(prop
->dram_pci_bar_size
- 0x1ull
))) == U64_MAX
) {
2492 "failed to map DDR bar to MMU page tables\n");
2496 rc
= goya_mmu_init(hdev
);
2500 goya_init_security(hdev
);
2502 goya_init_dma_qmans(hdev
);
2504 goya_init_mme_qmans(hdev
);
2506 goya_init_tpc_qmans(hdev
);
2508 /* MSI-X must be enabled before CPU queues are initialized */
2509 rc
= goya_enable_msix(hdev
);
2511 goto disable_queues
;
2513 /* Perform read from the device to flush all MSI-X configuration */
2514 val
= RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG
);
2519 goya_disable_internal_queues(hdev
);
2520 goya_disable_external_queues(hdev
);
2526 * goya_hw_fini - Goya hardware tear-down code
2528 * @hdev: pointer to hl_device structure
2529 * @hard_reset: should we do hard reset to all engines or just reset the
2530 * compute/dma engines
2532 static void goya_hw_fini(struct hl_device
*hdev
, bool hard_reset
)
2534 struct goya_device
*goya
= hdev
->asic_specific
;
2535 u32 reset_timeout_ms
, status
;
2538 reset_timeout_ms
= GOYA_PLDM_RESET_TIMEOUT_MSEC
;
2540 reset_timeout_ms
= GOYA_RESET_TIMEOUT_MSEC
;
2543 goya_set_ddr_bar_base(hdev
, DRAM_PHYS_BASE
);
2544 goya_disable_clk_rlx(hdev
);
2545 goya_set_pll_refclk(hdev
);
2547 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG
, RESET_ALL
);
2549 "Issued HARD reset command, going to wait %dms\n",
2552 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG
, DMA_MME_TPC_RESET
);
2554 "Issued SOFT reset command, going to wait %dms\n",
2559 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2560 * itself is in reset. In either reset we need to wait until the reset
2563 msleep(reset_timeout_ms
);
2565 status
= RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM
);
2566 if (status
& PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK
)
2568 "Timeout while waiting for device to reset 0x%x\n",
2572 goya
->hw_cap_initialized
&= ~(HW_CAP_DMA
| HW_CAP_MME
|
2573 HW_CAP_GOLDEN
| HW_CAP_TPC
);
2574 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2575 GOYA_ASYNC_EVENT_ID_SOFT_RESET
);
2579 /* Chicken bit to re-initiate boot sequencer flow */
2580 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START
,
2581 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT
);
2582 /* Move boot manager FSM to pre boot sequencer init state */
2583 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM
,
2584 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT
);
2586 goya
->hw_cap_initialized
&= ~(HW_CAP_CPU
| HW_CAP_CPU_Q
|
2587 HW_CAP_DDR_0
| HW_CAP_DDR_1
|
2588 HW_CAP_DMA
| HW_CAP_MME
|
2589 HW_CAP_MMU
| HW_CAP_TPC_MBIST
|
2590 HW_CAP_GOLDEN
| HW_CAP_TPC
);
2591 memset(goya
->events_stat
, 0, sizeof(goya
->events_stat
));
2595 /* In case we are running inside VM and the VM is
2596 * shutting down, we need to make sure CPU boot-loader
2597 * is running before we can continue the VM shutdown.
2598 * That is because the VM will send an FLR signal that
2602 "Going to wait up to %ds for CPU boot loader\n",
2603 GOYA_CPU_TIMEOUT_USEC
/ 1000 / 1000);
2605 rc
= hl_poll_timeout(
2607 mmPSOC_GLOBAL_CONF_WARM_REBOOT
,
2609 (status
== CPU_BOOT_STATUS_DRAM_RDY
),
2611 GOYA_CPU_TIMEOUT_USEC
);
2614 "failed to wait for CPU boot loader\n");
2618 int goya_suspend(struct hl_device
*hdev
)
2622 rc
= hl_fw_send_pci_access_msg(hdev
, ARMCP_PACKET_DISABLE_PCI_ACCESS
);
2624 dev_err(hdev
->dev
, "Failed to disable PCI access from CPU\n");
2629 int goya_resume(struct hl_device
*hdev
)
2631 return goya_init_iatu(hdev
);
2634 static int goya_cb_mmap(struct hl_device
*hdev
, struct vm_area_struct
*vma
,
2635 u64 kaddress
, phys_addr_t paddress
, u32 size
)
2639 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
|
2640 VM_DONTCOPY
| VM_NORESERVE
;
2642 rc
= remap_pfn_range(vma
, vma
->vm_start
, paddress
>> PAGE_SHIFT
,
2643 size
, vma
->vm_page_prot
);
2645 dev_err(hdev
->dev
, "remap_pfn_range error %d", rc
);
2650 void goya_ring_doorbell(struct hl_device
*hdev
, u32 hw_queue_id
, u32 pi
)
2652 u32 db_reg_offset
, db_value
;
2654 switch (hw_queue_id
) {
2655 case GOYA_QUEUE_ID_DMA_0
:
2656 db_reg_offset
= mmDMA_QM_0_PQ_PI
;
2659 case GOYA_QUEUE_ID_DMA_1
:
2660 db_reg_offset
= mmDMA_QM_1_PQ_PI
;
2663 case GOYA_QUEUE_ID_DMA_2
:
2664 db_reg_offset
= mmDMA_QM_2_PQ_PI
;
2667 case GOYA_QUEUE_ID_DMA_3
:
2668 db_reg_offset
= mmDMA_QM_3_PQ_PI
;
2671 case GOYA_QUEUE_ID_DMA_4
:
2672 db_reg_offset
= mmDMA_QM_4_PQ_PI
;
2675 case GOYA_QUEUE_ID_CPU_PQ
:
2676 db_reg_offset
= mmCPU_IF_PF_PQ_PI
;
2679 case GOYA_QUEUE_ID_MME
:
2680 db_reg_offset
= mmMME_QM_PQ_PI
;
2683 case GOYA_QUEUE_ID_TPC0
:
2684 db_reg_offset
= mmTPC0_QM_PQ_PI
;
2687 case GOYA_QUEUE_ID_TPC1
:
2688 db_reg_offset
= mmTPC1_QM_PQ_PI
;
2691 case GOYA_QUEUE_ID_TPC2
:
2692 db_reg_offset
= mmTPC2_QM_PQ_PI
;
2695 case GOYA_QUEUE_ID_TPC3
:
2696 db_reg_offset
= mmTPC3_QM_PQ_PI
;
2699 case GOYA_QUEUE_ID_TPC4
:
2700 db_reg_offset
= mmTPC4_QM_PQ_PI
;
2703 case GOYA_QUEUE_ID_TPC5
:
2704 db_reg_offset
= mmTPC5_QM_PQ_PI
;
2707 case GOYA_QUEUE_ID_TPC6
:
2708 db_reg_offset
= mmTPC6_QM_PQ_PI
;
2711 case GOYA_QUEUE_ID_TPC7
:
2712 db_reg_offset
= mmTPC7_QM_PQ_PI
;
2716 /* Should never get here */
2717 dev_err(hdev
->dev
, "H/W queue %d is invalid. Can't set pi\n",
2724 /* ring the doorbell */
2725 WREG32(db_reg_offset
, db_value
);
2727 if (hw_queue_id
== GOYA_QUEUE_ID_CPU_PQ
)
2728 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2729 GOYA_ASYNC_EVENT_ID_PI_UPDATE
);
2732 void goya_flush_pq_write(struct hl_device
*hdev
, u64
*pq
, u64 exp_val
)
2734 /* Not needed in Goya */
2737 static void *goya_dma_alloc_coherent(struct hl_device
*hdev
, size_t size
,
2738 dma_addr_t
*dma_handle
, gfp_t flags
)
2740 void *kernel_addr
= dma_alloc_coherent(&hdev
->pdev
->dev
, size
,
2743 /* Shift to the device's base physical address of host memory */
2745 *dma_handle
+= HOST_PHYS_BASE
;
2750 static void goya_dma_free_coherent(struct hl_device
*hdev
, size_t size
,
2751 void *cpu_addr
, dma_addr_t dma_handle
)
2753 /* Cancel the device's base physical address of host memory */
2754 dma_addr_t fixed_dma_handle
= dma_handle
- HOST_PHYS_BASE
;
2756 dma_free_coherent(&hdev
->pdev
->dev
, size
, cpu_addr
, fixed_dma_handle
);
2759 void *goya_get_int_queue_base(struct hl_device
*hdev
, u32 queue_id
,
2760 dma_addr_t
*dma_handle
, u16
*queue_len
)
2765 *dma_handle
= hdev
->asic_prop
.sram_base_address
;
2767 base
= (void *) hdev
->pcie_bar
[SRAM_CFG_BAR_ID
];
2770 case GOYA_QUEUE_ID_MME
:
2771 offset
= MME_QMAN_BASE_OFFSET
;
2772 *queue_len
= MME_QMAN_LENGTH
;
2774 case GOYA_QUEUE_ID_TPC0
:
2775 offset
= TPC0_QMAN_BASE_OFFSET
;
2776 *queue_len
= TPC_QMAN_LENGTH
;
2778 case GOYA_QUEUE_ID_TPC1
:
2779 offset
= TPC1_QMAN_BASE_OFFSET
;
2780 *queue_len
= TPC_QMAN_LENGTH
;
2782 case GOYA_QUEUE_ID_TPC2
:
2783 offset
= TPC2_QMAN_BASE_OFFSET
;
2784 *queue_len
= TPC_QMAN_LENGTH
;
2786 case GOYA_QUEUE_ID_TPC3
:
2787 offset
= TPC3_QMAN_BASE_OFFSET
;
2788 *queue_len
= TPC_QMAN_LENGTH
;
2790 case GOYA_QUEUE_ID_TPC4
:
2791 offset
= TPC4_QMAN_BASE_OFFSET
;
2792 *queue_len
= TPC_QMAN_LENGTH
;
2794 case GOYA_QUEUE_ID_TPC5
:
2795 offset
= TPC5_QMAN_BASE_OFFSET
;
2796 *queue_len
= TPC_QMAN_LENGTH
;
2798 case GOYA_QUEUE_ID_TPC6
:
2799 offset
= TPC6_QMAN_BASE_OFFSET
;
2800 *queue_len
= TPC_QMAN_LENGTH
;
2802 case GOYA_QUEUE_ID_TPC7
:
2803 offset
= TPC7_QMAN_BASE_OFFSET
;
2804 *queue_len
= TPC_QMAN_LENGTH
;
2807 dev_err(hdev
->dev
, "Got invalid queue id %d\n", queue_id
);
2812 *dma_handle
+= offset
;
2817 static int goya_send_job_on_qman0(struct hl_device
*hdev
, struct hl_cs_job
*job
)
2819 struct packet_msg_prot
*fence_pkt
;
2821 dma_addr_t fence_dma_addr
;
2827 timeout
= GOYA_PLDM_QMAN0_TIMEOUT_USEC
;
2829 timeout
= HL_DEVICE_TIMEOUT_USEC
;
2831 if (!hdev
->asic_funcs
->is_device_idle(hdev
, NULL
, NULL
)) {
2832 dev_err_ratelimited(hdev
->dev
,
2833 "Can't send KMD job on QMAN0 because the device is not idle\n");
2837 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
2841 "Failed to allocate fence memory for QMAN0\n");
2845 goya_qman0_set_security(hdev
, true);
2847 cb
= job
->patched_cb
;
2849 fence_pkt
= (struct packet_msg_prot
*) (uintptr_t) (cb
->kernel_address
+
2850 job
->job_cb_size
- sizeof(struct packet_msg_prot
));
2852 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
2853 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
2854 (1 << GOYA_PKT_CTL_MB_SHIFT
);
2855 fence_pkt
->ctl
= cpu_to_le32(tmp
);
2856 fence_pkt
->value
= cpu_to_le32(GOYA_QMAN0_FENCE_VAL
);
2857 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
2859 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, GOYA_QUEUE_ID_DMA_0
,
2860 job
->job_cb_size
, cb
->bus_address
);
2862 dev_err(hdev
->dev
, "Failed to send CB on QMAN0, %d\n", rc
);
2863 goto free_fence_ptr
;
2866 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
,
2867 (tmp
== GOYA_QMAN0_FENCE_VAL
), 1000, timeout
);
2869 hl_hw_queue_inc_ci_kernel(hdev
, GOYA_QUEUE_ID_DMA_0
);
2871 if (rc
== -ETIMEDOUT
) {
2872 dev_err(hdev
->dev
, "QMAN0 Job timeout (0x%x)\n", tmp
);
2873 goto free_fence_ptr
;
2877 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
2880 goya_qman0_set_security(hdev
, false);
2885 int goya_send_cpu_message(struct hl_device
*hdev
, u32
*msg
, u16 len
,
2886 u32 timeout
, long *result
)
2888 struct goya_device
*goya
= hdev
->asic_specific
;
2890 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
)) {
2896 return hl_fw_send_cpu_message(hdev
, GOYA_QUEUE_ID_CPU_PQ
, msg
, len
,
2900 int goya_test_queue(struct hl_device
*hdev
, u32 hw_queue_id
)
2902 struct packet_msg_prot
*fence_pkt
;
2903 dma_addr_t pkt_dma_addr
;
2905 dma_addr_t fence_dma_addr
;
2909 fence_val
= GOYA_QMAN0_FENCE_VAL
;
2911 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
2915 "Failed to allocate memory for queue testing\n");
2921 fence_pkt
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
,
2922 sizeof(struct packet_msg_prot
),
2923 GFP_KERNEL
, &pkt_dma_addr
);
2926 "Failed to allocate packet for queue testing\n");
2928 goto free_fence_ptr
;
2931 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
2932 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
2933 (1 << GOYA_PKT_CTL_MB_SHIFT
);
2934 fence_pkt
->ctl
= cpu_to_le32(tmp
);
2935 fence_pkt
->value
= cpu_to_le32(fence_val
);
2936 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
2938 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, hw_queue_id
,
2939 sizeof(struct packet_msg_prot
),
2943 "Failed to send fence packet\n");
2947 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
, (tmp
== fence_val
),
2948 1000, GOYA_TEST_QUEUE_WAIT_USEC
);
2950 hl_hw_queue_inc_ci_kernel(hdev
, hw_queue_id
);
2952 if (rc
== -ETIMEDOUT
) {
2954 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2955 hw_queue_id
, (unsigned long long) fence_dma_addr
, tmp
);
2958 dev_info(hdev
->dev
, "queue test on H/W queue %d succeeded\n",
2963 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_pkt
,
2966 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
2971 int goya_test_cpu_queue(struct hl_device
*hdev
)
2973 struct goya_device
*goya
= hdev
->asic_specific
;
2976 * check capability here as send_cpu_message() won't update the result
2977 * value if no capability
2979 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
2982 return hl_fw_test_cpu_queue(hdev
);
2985 int goya_test_queues(struct hl_device
*hdev
)
2987 int i
, rc
, ret_val
= 0;
2989 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++) {
2990 rc
= goya_test_queue(hdev
, i
);
2998 static void *goya_dma_pool_zalloc(struct hl_device
*hdev
, size_t size
,
2999 gfp_t mem_flags
, dma_addr_t
*dma_handle
)
3003 if (size
> GOYA_DMA_POOL_BLK_SIZE
)
3006 kernel_addr
= dma_pool_zalloc(hdev
->dma_pool
, mem_flags
, dma_handle
);
3008 /* Shift to the device's base physical address of host memory */
3010 *dma_handle
+= HOST_PHYS_BASE
;
3015 static void goya_dma_pool_free(struct hl_device
*hdev
, void *vaddr
,
3016 dma_addr_t dma_addr
)
3018 /* Cancel the device's base physical address of host memory */
3019 dma_addr_t fixed_dma_addr
= dma_addr
- HOST_PHYS_BASE
;
3021 dma_pool_free(hdev
->dma_pool
, vaddr
, fixed_dma_addr
);
3024 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device
*hdev
, size_t size
,
3025 dma_addr_t
*dma_handle
)
3029 vaddr
= hl_fw_cpu_accessible_dma_pool_alloc(hdev
, size
, dma_handle
);
3030 *dma_handle
= (*dma_handle
) - hdev
->cpu_accessible_dma_address
+
3031 VA_CPU_ACCESSIBLE_MEM_ADDR
;
3036 void goya_cpu_accessible_dma_pool_free(struct hl_device
*hdev
, size_t size
,
3039 hl_fw_cpu_accessible_dma_pool_free(hdev
, size
, vaddr
);
3042 static int goya_dma_map_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
3043 int nents
, enum dma_data_direction dir
)
3045 struct scatterlist
*sg
;
3048 if (!dma_map_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
))
3051 /* Shift to the device's base physical address of host memory */
3052 for_each_sg(sgl
, sg
, nents
, i
)
3053 sg
->dma_address
+= HOST_PHYS_BASE
;
3058 static void goya_dma_unmap_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
3059 int nents
, enum dma_data_direction dir
)
3061 struct scatterlist
*sg
;
3064 /* Cancel the device's base physical address of host memory */
3065 for_each_sg(sgl
, sg
, nents
, i
)
3066 sg
->dma_address
-= HOST_PHYS_BASE
;
3068 dma_unmap_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
);
3071 u32
goya_get_dma_desc_list_size(struct hl_device
*hdev
, struct sg_table
*sgt
)
3073 struct scatterlist
*sg
, *sg_next_iter
;
3074 u32 count
, dma_desc_cnt
;
3076 dma_addr_t addr
, addr_next
;
3080 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
3082 len
= sg_dma_len(sg
);
3083 addr
= sg_dma_address(sg
);
3088 while ((count
+ 1) < sgt
->nents
) {
3089 sg_next_iter
= sg_next(sg
);
3090 len_next
= sg_dma_len(sg_next_iter
);
3091 addr_next
= sg_dma_address(sg_next_iter
);
3096 if ((addr
+ len
== addr_next
) &&
3097 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
3109 return dma_desc_cnt
* sizeof(struct packet_lin_dma
);
3112 static int goya_pin_memory_before_cs(struct hl_device
*hdev
,
3113 struct hl_cs_parser
*parser
,
3114 struct packet_lin_dma
*user_dma_pkt
,
3115 u64 addr
, enum dma_data_direction dir
)
3117 struct hl_userptr
*userptr
;
3120 if (hl_userptr_is_pinned(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
3121 parser
->job_userptr_list
, &userptr
))
3122 goto already_pinned
;
3124 userptr
= kzalloc(sizeof(*userptr
), GFP_ATOMIC
);
3128 rc
= hl_pin_host_memory(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
3133 list_add_tail(&userptr
->job_node
, parser
->job_userptr_list
);
3135 rc
= hdev
->asic_funcs
->asic_dma_map_sg(hdev
, userptr
->sgt
->sgl
,
3136 userptr
->sgt
->nents
, dir
);
3138 dev_err(hdev
->dev
, "failed to map sgt with DMA region\n");
3142 userptr
->dma_mapped
= true;
3146 parser
->patched_cb_size
+=
3147 goya_get_dma_desc_list_size(hdev
, userptr
->sgt
);
3152 hl_unpin_host_memory(hdev
, userptr
);
3158 static int goya_validate_dma_pkt_host(struct hl_device
*hdev
,
3159 struct hl_cs_parser
*parser
,
3160 struct packet_lin_dma
*user_dma_pkt
)
3162 u64 device_memory_addr
, addr
;
3163 enum dma_data_direction dir
;
3164 enum goya_dma_direction user_dir
;
3165 bool sram_addr
= true;
3166 bool skip_host_mem_pin
= false;
3171 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3173 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3174 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3176 user_memset
= (ctl
& GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
3177 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
3180 case DMA_HOST_TO_DRAM
:
3181 dev_dbg(hdev
->dev
, "DMA direction is HOST --> DRAM\n");
3182 dir
= DMA_TO_DEVICE
;
3184 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3185 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3187 skip_host_mem_pin
= true;
3190 case DMA_DRAM_TO_HOST
:
3191 dev_dbg(hdev
->dev
, "DMA direction is DRAM --> HOST\n");
3192 dir
= DMA_FROM_DEVICE
;
3194 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3195 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3198 case DMA_HOST_TO_SRAM
:
3199 dev_dbg(hdev
->dev
, "DMA direction is HOST --> SRAM\n");
3200 dir
= DMA_TO_DEVICE
;
3201 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3202 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3204 skip_host_mem_pin
= true;
3207 case DMA_SRAM_TO_HOST
:
3208 dev_dbg(hdev
->dev
, "DMA direction is SRAM --> HOST\n");
3209 dir
= DMA_FROM_DEVICE
;
3210 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3211 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3214 dev_err(hdev
->dev
, "DMA direction is undefined\n");
3219 if (!hl_mem_area_inside_range(device_memory_addr
,
3220 le32_to_cpu(user_dma_pkt
->tsize
),
3221 hdev
->asic_prop
.sram_user_base_address
,
3222 hdev
->asic_prop
.sram_end_address
)) {
3225 "SRAM address 0x%llx + 0x%x is invalid\n",
3227 user_dma_pkt
->tsize
);
3231 if (!hl_mem_area_inside_range(device_memory_addr
,
3232 le32_to_cpu(user_dma_pkt
->tsize
),
3233 hdev
->asic_prop
.dram_user_base_address
,
3234 hdev
->asic_prop
.dram_end_address
)) {
3237 "DRAM address 0x%llx + 0x%x is invalid\n",
3239 user_dma_pkt
->tsize
);
3244 if (skip_host_mem_pin
)
3245 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3247 if ((dir
== DMA_TO_DEVICE
) &&
3248 (parser
->hw_queue_id
> GOYA_QUEUE_ID_DMA_1
)) {
3250 "Can't DMA from host on queue other then 1\n");
3254 rc
= goya_pin_memory_before_cs(hdev
, parser
, user_dma_pkt
,
3261 static int goya_validate_dma_pkt_no_host(struct hl_device
*hdev
,
3262 struct hl_cs_parser
*parser
,
3263 struct packet_lin_dma
*user_dma_pkt
)
3265 u64 sram_memory_addr
, dram_memory_addr
;
3266 enum goya_dma_direction user_dir
;
3269 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3270 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3271 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3273 if (user_dir
== DMA_DRAM_TO_SRAM
) {
3274 dev_dbg(hdev
->dev
, "DMA direction is DRAM --> SRAM\n");
3275 dram_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3276 sram_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3278 dev_dbg(hdev
->dev
, "DMA direction is SRAM --> DRAM\n");
3279 sram_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3280 dram_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3283 if (!hl_mem_area_inside_range(sram_memory_addr
,
3284 le32_to_cpu(user_dma_pkt
->tsize
),
3285 hdev
->asic_prop
.sram_user_base_address
,
3286 hdev
->asic_prop
.sram_end_address
)) {
3287 dev_err(hdev
->dev
, "SRAM address 0x%llx + 0x%x is invalid\n",
3288 sram_memory_addr
, user_dma_pkt
->tsize
);
3292 if (!hl_mem_area_inside_range(dram_memory_addr
,
3293 le32_to_cpu(user_dma_pkt
->tsize
),
3294 hdev
->asic_prop
.dram_user_base_address
,
3295 hdev
->asic_prop
.dram_end_address
)) {
3296 dev_err(hdev
->dev
, "DRAM address 0x%llx + 0x%x is invalid\n",
3297 dram_memory_addr
, user_dma_pkt
->tsize
);
3301 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3306 static int goya_validate_dma_pkt_no_mmu(struct hl_device
*hdev
,
3307 struct hl_cs_parser
*parser
,
3308 struct packet_lin_dma
*user_dma_pkt
)
3310 enum goya_dma_direction user_dir
;
3314 dev_dbg(hdev
->dev
, "DMA packet details:\n");
3315 dev_dbg(hdev
->dev
, "source == 0x%llx\n", user_dma_pkt
->src_addr
);
3316 dev_dbg(hdev
->dev
, "destination == 0x%llx\n", user_dma_pkt
->dst_addr
);
3317 dev_dbg(hdev
->dev
, "size == %u\n", user_dma_pkt
->tsize
);
3319 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3320 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3321 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3324 * Special handling for DMA with size 0. The H/W has a bug where
3325 * this can cause the QMAN DMA to get stuck, so block it here.
3327 if (user_dma_pkt
->tsize
== 0) {
3329 "Got DMA with size 0, might reset the device\n");
3333 if ((user_dir
== DMA_DRAM_TO_SRAM
) || (user_dir
== DMA_SRAM_TO_DRAM
))
3334 rc
= goya_validate_dma_pkt_no_host(hdev
, parser
, user_dma_pkt
);
3336 rc
= goya_validate_dma_pkt_host(hdev
, parser
, user_dma_pkt
);
3341 static int goya_validate_dma_pkt_mmu(struct hl_device
*hdev
,
3342 struct hl_cs_parser
*parser
,
3343 struct packet_lin_dma
*user_dma_pkt
)
3345 dev_dbg(hdev
->dev
, "DMA packet details:\n");
3346 dev_dbg(hdev
->dev
, "source == 0x%llx\n", user_dma_pkt
->src_addr
);
3347 dev_dbg(hdev
->dev
, "destination == 0x%llx\n", user_dma_pkt
->dst_addr
);
3348 dev_dbg(hdev
->dev
, "size == %u\n", user_dma_pkt
->tsize
);
3352 * We can't allow user to read from Host using QMANs other than 1.
3354 if (parser
->hw_queue_id
!= GOYA_QUEUE_ID_DMA_1
&&
3355 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt
->src_addr
),
3356 le32_to_cpu(user_dma_pkt
->tsize
),
3357 hdev
->asic_prop
.va_space_host_start_address
,
3358 hdev
->asic_prop
.va_space_host_end_address
)) {
3360 "Can't DMA from host on queue other then 1\n");
3364 if (user_dma_pkt
->tsize
== 0) {
3366 "Got DMA with size 0, might reset the device\n");
3370 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3375 static int goya_validate_wreg32(struct hl_device
*hdev
,
3376 struct hl_cs_parser
*parser
,
3377 struct packet_wreg32
*wreg_pkt
)
3379 struct goya_device
*goya
= hdev
->asic_specific
;
3380 u32 sob_start_addr
, sob_end_addr
;
3383 reg_offset
= le32_to_cpu(wreg_pkt
->ctl
) &
3384 GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK
;
3386 dev_dbg(hdev
->dev
, "WREG32 packet details:\n");
3387 dev_dbg(hdev
->dev
, "reg_offset == 0x%x\n", reg_offset
);
3388 dev_dbg(hdev
->dev
, "value == 0x%x\n", wreg_pkt
->value
);
3390 if (reg_offset
!= (mmDMA_CH_0_WR_COMP_ADDR_LO
& 0x1FFF)) {
3391 dev_err(hdev
->dev
, "WREG32 packet with illegal address 0x%x\n",
3397 * With MMU, DMA channels are not secured, so it doesn't matter where
3398 * the WR COMP will be written to because it will go out with
3399 * non-secured property
3401 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
3404 sob_start_addr
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
3405 sob_end_addr
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1023
);
3407 if ((le32_to_cpu(wreg_pkt
->value
) < sob_start_addr
) ||
3408 (le32_to_cpu(wreg_pkt
->value
) > sob_end_addr
)) {
3410 dev_err(hdev
->dev
, "WREG32 packet with illegal value 0x%x\n",
3418 static int goya_validate_cb(struct hl_device
*hdev
,
3419 struct hl_cs_parser
*parser
, bool is_mmu
)
3421 u32 cb_parsed_length
= 0;
3424 parser
->patched_cb_size
= 0;
3426 /* cb_user_size is more than 0 so loop will always be executed */
3427 while (cb_parsed_length
< parser
->user_cb_size
) {
3428 enum packet_id pkt_id
;
3432 user_pkt
= (void *) (uintptr_t)
3433 (parser
->user_cb
->kernel_address
+ cb_parsed_length
);
3435 pkt_id
= (enum packet_id
) (((*(u64
*) user_pkt
) &
3436 PACKET_HEADER_PACKET_ID_MASK
) >>
3437 PACKET_HEADER_PACKET_ID_SHIFT
);
3439 pkt_size
= goya_packet_sizes
[pkt_id
];
3440 cb_parsed_length
+= pkt_size
;
3441 if (cb_parsed_length
> parser
->user_cb_size
) {
3443 "packet 0x%x is out of CB boundary\n", pkt_id
);
3449 case PACKET_WREG_32
:
3451 * Although it is validated after copy in patch_cb(),
3452 * need to validate here as well because patch_cb() is
3453 * not called in MMU path while this function is called
3455 rc
= goya_validate_wreg32(hdev
, parser
, user_pkt
);
3458 case PACKET_WREG_BULK
:
3460 "User not allowed to use WREG_BULK\n");
3464 case PACKET_MSG_PROT
:
3466 "User not allowed to use MSG_PROT\n");
3471 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
3476 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
3480 case PACKET_LIN_DMA
:
3482 rc
= goya_validate_dma_pkt_mmu(hdev
, parser
,
3485 rc
= goya_validate_dma_pkt_no_mmu(hdev
, parser
,
3489 case PACKET_MSG_LONG
:
3490 case PACKET_MSG_SHORT
:
3493 parser
->patched_cb_size
+= pkt_size
;
3497 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
3508 * The new CB should have space at the end for two MSG_PROT packets:
3509 * 1. A packet that will act as a completion packet
3510 * 2. A packet that will generate MSI-X interrupt
3512 parser
->patched_cb_size
+= sizeof(struct packet_msg_prot
) * 2;
3517 static int goya_patch_dma_packet(struct hl_device
*hdev
,
3518 struct hl_cs_parser
*parser
,
3519 struct packet_lin_dma
*user_dma_pkt
,
3520 struct packet_lin_dma
*new_dma_pkt
,
3521 u32
*new_dma_pkt_size
)
3523 struct hl_userptr
*userptr
;
3524 struct scatterlist
*sg
, *sg_next_iter
;
3525 u32 count
, dma_desc_cnt
;
3527 dma_addr_t dma_addr
, dma_addr_next
;
3528 enum goya_dma_direction user_dir
;
3529 u64 device_memory_addr
, addr
;
3530 enum dma_data_direction dir
;
3531 struct sg_table
*sgt
;
3532 bool skip_host_mem_pin
= false;
3534 u32 user_rdcomp_mask
, user_wrcomp_mask
, ctl
;
3536 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3538 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3539 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3541 user_memset
= (ctl
& GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
3542 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
3544 if ((user_dir
== DMA_DRAM_TO_SRAM
) || (user_dir
== DMA_SRAM_TO_DRAM
) ||
3545 (user_dma_pkt
->tsize
== 0)) {
3546 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*new_dma_pkt
));
3547 *new_dma_pkt_size
= sizeof(*new_dma_pkt
);
3551 if ((user_dir
== DMA_HOST_TO_DRAM
) || (user_dir
== DMA_HOST_TO_SRAM
)) {
3552 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3553 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3554 dir
= DMA_TO_DEVICE
;
3556 skip_host_mem_pin
= true;
3558 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3559 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3560 dir
= DMA_FROM_DEVICE
;
3563 if ((!skip_host_mem_pin
) &&
3564 (hl_userptr_is_pinned(hdev
, addr
,
3565 le32_to_cpu(user_dma_pkt
->tsize
),
3566 parser
->job_userptr_list
, &userptr
) == false)) {
3567 dev_err(hdev
->dev
, "Userptr 0x%llx + 0x%x NOT mapped\n",
3568 addr
, user_dma_pkt
->tsize
);
3572 if ((user_memset
) && (dir
== DMA_TO_DEVICE
)) {
3573 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*user_dma_pkt
));
3574 *new_dma_pkt_size
= sizeof(*user_dma_pkt
);
3578 user_rdcomp_mask
= ctl
& GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK
;
3580 user_wrcomp_mask
= ctl
& GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK
;
3585 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
3586 len
= sg_dma_len(sg
);
3587 dma_addr
= sg_dma_address(sg
);
3592 while ((count
+ 1) < sgt
->nents
) {
3593 sg_next_iter
= sg_next(sg
);
3594 len_next
= sg_dma_len(sg_next_iter
);
3595 dma_addr_next
= sg_dma_address(sg_next_iter
);
3600 if ((dma_addr
+ len
== dma_addr_next
) &&
3601 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
3610 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3611 if (likely(dma_desc_cnt
))
3612 ctl
&= ~GOYA_PKT_CTL_EB_MASK
;
3613 ctl
&= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK
|
3614 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK
);
3615 new_dma_pkt
->ctl
= cpu_to_le32(ctl
);
3616 new_dma_pkt
->tsize
= cpu_to_le32((u32
) len
);
3618 if (dir
== DMA_TO_DEVICE
) {
3619 new_dma_pkt
->src_addr
= cpu_to_le64(dma_addr
);
3620 new_dma_pkt
->dst_addr
= cpu_to_le64(device_memory_addr
);
3622 new_dma_pkt
->src_addr
= cpu_to_le64(device_memory_addr
);
3623 new_dma_pkt
->dst_addr
= cpu_to_le64(dma_addr
);
3627 device_memory_addr
+= len
;
3632 if (!dma_desc_cnt
) {
3634 "Error of 0 SG entries when patching DMA packet\n");
3638 /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3640 new_dma_pkt
->ctl
|= cpu_to_le32(user_rdcomp_mask
| user_wrcomp_mask
);
3642 *new_dma_pkt_size
= dma_desc_cnt
* sizeof(struct packet_lin_dma
);
3647 static int goya_patch_cb(struct hl_device
*hdev
,
3648 struct hl_cs_parser
*parser
)
3650 u32 cb_parsed_length
= 0;
3651 u32 cb_patched_cur_length
= 0;
3654 /* cb_user_size is more than 0 so loop will always be executed */
3655 while (cb_parsed_length
< parser
->user_cb_size
) {
3656 enum packet_id pkt_id
;
3658 u32 new_pkt_size
= 0;
3659 void *user_pkt
, *kernel_pkt
;
3661 user_pkt
= (void *) (uintptr_t)
3662 (parser
->user_cb
->kernel_address
+ cb_parsed_length
);
3663 kernel_pkt
= (void *) (uintptr_t)
3664 (parser
->patched_cb
->kernel_address
+
3665 cb_patched_cur_length
);
3667 pkt_id
= (enum packet_id
) (((*(u64
*) user_pkt
) &
3668 PACKET_HEADER_PACKET_ID_MASK
) >>
3669 PACKET_HEADER_PACKET_ID_SHIFT
);
3671 pkt_size
= goya_packet_sizes
[pkt_id
];
3672 cb_parsed_length
+= pkt_size
;
3673 if (cb_parsed_length
> parser
->user_cb_size
) {
3675 "packet 0x%x is out of CB boundary\n", pkt_id
);
3681 case PACKET_LIN_DMA
:
3682 rc
= goya_patch_dma_packet(hdev
, parser
, user_pkt
,
3683 kernel_pkt
, &new_pkt_size
);
3684 cb_patched_cur_length
+= new_pkt_size
;
3687 case PACKET_WREG_32
:
3688 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
3689 cb_patched_cur_length
+= pkt_size
;
3690 rc
= goya_validate_wreg32(hdev
, parser
, kernel_pkt
);
3693 case PACKET_WREG_BULK
:
3695 "User not allowed to use WREG_BULK\n");
3699 case PACKET_MSG_PROT
:
3701 "User not allowed to use MSG_PROT\n");
3706 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
3711 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
3715 case PACKET_MSG_LONG
:
3716 case PACKET_MSG_SHORT
:
3719 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
3720 cb_patched_cur_length
+= pkt_size
;
3724 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
3737 static int goya_parse_cb_mmu(struct hl_device
*hdev
,
3738 struct hl_cs_parser
*parser
)
3740 u64 patched_cb_handle
;
3741 u32 patched_cb_size
;
3742 struct hl_cb
*user_cb
;
3746 * The new CB should have space at the end for two MSG_PROT pkt:
3747 * 1. A packet that will act as a completion packet
3748 * 2. A packet that will generate MSI-X interrupt
3750 parser
->patched_cb_size
= parser
->user_cb_size
+
3751 sizeof(struct packet_msg_prot
) * 2;
3753 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
,
3754 parser
->patched_cb_size
,
3755 &patched_cb_handle
, HL_KERNEL_ASID_ID
);
3759 "Failed to allocate patched CB for DMA CS %d\n",
3764 patched_cb_handle
>>= PAGE_SHIFT
;
3765 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
3766 (u32
) patched_cb_handle
);
3767 /* hl_cb_get should never fail here so use kernel WARN */
3768 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
3769 (u32
) patched_cb_handle
);
3770 if (!parser
->patched_cb
) {
3776 * The check that parser->user_cb_size <= parser->user_cb->size was done
3777 * in validate_queue_index().
3779 memcpy((void *) (uintptr_t) parser
->patched_cb
->kernel_address
,
3780 (void *) (uintptr_t) parser
->user_cb
->kernel_address
,
3781 parser
->user_cb_size
);
3783 patched_cb_size
= parser
->patched_cb_size
;
3785 /* validate patched CB instead of user CB */
3786 user_cb
= parser
->user_cb
;
3787 parser
->user_cb
= parser
->patched_cb
;
3788 rc
= goya_validate_cb(hdev
, parser
, true);
3789 parser
->user_cb
= user_cb
;
3792 hl_cb_put(parser
->patched_cb
);
3796 if (patched_cb_size
!= parser
->patched_cb_size
) {
3797 dev_err(hdev
->dev
, "user CB size mismatch\n");
3798 hl_cb_put(parser
->patched_cb
);
3805 * Always call cb destroy here because we still have 1 reference
3806 * to it by calling cb_get earlier. After the job will be completed,
3807 * cb_put will release it, but here we want to remove it from the
3810 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
3811 patched_cb_handle
<< PAGE_SHIFT
);
3816 static int goya_parse_cb_no_mmu(struct hl_device
*hdev
,
3817 struct hl_cs_parser
*parser
)
3819 u64 patched_cb_handle
;
3822 rc
= goya_validate_cb(hdev
, parser
, false);
3827 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
,
3828 parser
->patched_cb_size
,
3829 &patched_cb_handle
, HL_KERNEL_ASID_ID
);
3832 "Failed to allocate patched CB for DMA CS %d\n", rc
);
3836 patched_cb_handle
>>= PAGE_SHIFT
;
3837 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
3838 (u32
) patched_cb_handle
);
3839 /* hl_cb_get should never fail here so use kernel WARN */
3840 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
3841 (u32
) patched_cb_handle
);
3842 if (!parser
->patched_cb
) {
3847 rc
= goya_patch_cb(hdev
, parser
);
3850 hl_cb_put(parser
->patched_cb
);
3854 * Always call cb destroy here because we still have 1 reference
3855 * to it by calling cb_get earlier. After the job will be completed,
3856 * cb_put will release it, but here we want to remove it from the
3859 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
3860 patched_cb_handle
<< PAGE_SHIFT
);
3864 hl_userptr_delete_list(hdev
, parser
->job_userptr_list
);
3868 static int goya_parse_cb_no_ext_queue(struct hl_device
*hdev
,
3869 struct hl_cs_parser
*parser
)
3871 struct asic_fixed_properties
*asic_prop
= &hdev
->asic_prop
;
3872 struct goya_device
*goya
= hdev
->asic_specific
;
3874 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
3877 /* For internal queue jobs, just check if CB address is valid */
3878 if (hl_mem_area_inside_range(
3879 (u64
) (uintptr_t) parser
->user_cb
,
3880 parser
->user_cb_size
,
3881 asic_prop
->sram_user_base_address
,
3882 asic_prop
->sram_end_address
))
3885 if (hl_mem_area_inside_range(
3886 (u64
) (uintptr_t) parser
->user_cb
,
3887 parser
->user_cb_size
,
3888 asic_prop
->dram_user_base_address
,
3889 asic_prop
->dram_end_address
))
3893 "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
3894 parser
->user_cb
, parser
->user_cb_size
);
3899 int goya_cs_parser(struct hl_device
*hdev
, struct hl_cs_parser
*parser
)
3901 struct goya_device
*goya
= hdev
->asic_specific
;
3903 if (!parser
->ext_queue
)
3904 return goya_parse_cb_no_ext_queue(hdev
, parser
);
3906 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
3907 return goya_parse_cb_mmu(hdev
, parser
);
3909 return goya_parse_cb_no_mmu(hdev
, parser
);
3912 void goya_add_end_of_cb_packets(struct hl_device
*hdev
, u64 kernel_address
,
3913 u32 len
, u64 cq_addr
, u32 cq_val
, u32 msix_vec
)
3915 struct packet_msg_prot
*cq_pkt
;
3918 cq_pkt
= (struct packet_msg_prot
*) (uintptr_t)
3919 (kernel_address
+ len
- (sizeof(struct packet_msg_prot
) * 2));
3921 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
3922 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
3923 (1 << GOYA_PKT_CTL_MB_SHIFT
);
3924 cq_pkt
->ctl
= cpu_to_le32(tmp
);
3925 cq_pkt
->value
= cpu_to_le32(cq_val
);
3926 cq_pkt
->addr
= cpu_to_le64(cq_addr
);
3930 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
3931 (1 << GOYA_PKT_CTL_MB_SHIFT
);
3932 cq_pkt
->ctl
= cpu_to_le32(tmp
);
3933 cq_pkt
->value
= cpu_to_le32(msix_vec
& 0x7FF);
3934 cq_pkt
->addr
= cpu_to_le64(CFG_BASE
+ mmPCIE_DBI_MSIX_DOORBELL_OFF
);
3937 void goya_update_eq_ci(struct hl_device
*hdev
, u32 val
)
3939 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
, val
);
3942 void goya_restore_phase_topology(struct hl_device
*hdev
)
3947 static void goya_clear_sm_regs(struct hl_device
*hdev
)
3949 int i
, num_of_sob_in_longs
, num_of_mon_in_longs
;
3951 num_of_sob_in_longs
=
3952 ((mmSYNC_MNGR_SOB_OBJ_1023
- mmSYNC_MNGR_SOB_OBJ_0
) + 4);
3954 num_of_mon_in_longs
=
3955 ((mmSYNC_MNGR_MON_STATUS_255
- mmSYNC_MNGR_MON_STATUS_0
) + 4);
3957 for (i
= 0 ; i
< num_of_sob_in_longs
; i
+= 4)
3958 WREG32(mmSYNC_MNGR_SOB_OBJ_0
+ i
, 0);
3960 for (i
= 0 ; i
< num_of_mon_in_longs
; i
+= 4)
3961 WREG32(mmSYNC_MNGR_MON_STATUS_0
+ i
, 0);
3963 /* Flush all WREG to prevent race */
3964 i
= RREG32(mmSYNC_MNGR_SOB_OBJ_0
);
3968 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
3971 * @hdev: pointer to hl_device structure
3972 * @addr: device or host mapped address
3973 * @val: returned value
3975 * In case of DDR address that is not mapped into the default aperture that
3976 * the DDR bar exposes, the function will configure the iATU so that the DDR
3977 * bar will be positioned at a base address that allows reading from the
3978 * required address. Configuring the iATU during normal operation can
3979 * lead to undefined behavior and therefore, should be done with extreme care
3982 static int goya_debugfs_read32(struct hl_device
*hdev
, u64 addr
, u32
*val
)
3984 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
3988 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
3989 *val
= RREG32(addr
- CFG_BASE
);
3991 } else if ((addr
>= SRAM_BASE_ADDR
) &&
3992 (addr
< SRAM_BASE_ADDR
+ SRAM_SIZE
)) {
3994 *val
= readl(hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
3995 (addr
- SRAM_BASE_ADDR
));
3997 } else if ((addr
>= DRAM_PHYS_BASE
) &&
3998 (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
)) {
4000 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4001 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4003 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4004 if (ddr_bar_addr
!= U64_MAX
) {
4005 *val
= readl(hdev
->pcie_bar
[DDR_BAR_ID
] +
4006 (addr
- bar_base_addr
));
4008 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4011 if (ddr_bar_addr
== U64_MAX
)
4014 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4015 *val
= *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
);
4025 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4028 * @hdev: pointer to hl_device structure
4029 * @addr: device or host mapped address
4030 * @val: returned value
4032 * In case of DDR address that is not mapped into the default aperture that
4033 * the DDR bar exposes, the function will configure the iATU so that the DDR
4034 * bar will be positioned at a base address that allows writing to the
4035 * required address. Configuring the iATU during normal operation can
4036 * lead to undefined behavior and therefore, should be done with extreme care
4039 static int goya_debugfs_write32(struct hl_device
*hdev
, u64 addr
, u32 val
)
4041 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4045 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
4046 WREG32(addr
- CFG_BASE
, val
);
4048 } else if ((addr
>= SRAM_BASE_ADDR
) &&
4049 (addr
< SRAM_BASE_ADDR
+ SRAM_SIZE
)) {
4051 writel(val
, hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
4052 (addr
- SRAM_BASE_ADDR
));
4054 } else if ((addr
>= DRAM_PHYS_BASE
) &&
4055 (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
)) {
4057 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4058 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4060 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4061 if (ddr_bar_addr
!= U64_MAX
) {
4062 writel(val
, hdev
->pcie_bar
[DDR_BAR_ID
] +
4063 (addr
- bar_base_addr
));
4065 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4068 if (ddr_bar_addr
== U64_MAX
)
4071 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4072 *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
) = val
;
4081 static u64
goya_read_pte(struct hl_device
*hdev
, u64 addr
)
4083 struct goya_device
*goya
= hdev
->asic_specific
;
4085 if (hdev
->hard_reset_pending
)
4088 return readq(hdev
->pcie_bar
[DDR_BAR_ID
] +
4089 (addr
- goya
->ddr_bar_cur_addr
));
4092 static void goya_write_pte(struct hl_device
*hdev
, u64 addr
, u64 val
)
4094 struct goya_device
*goya
= hdev
->asic_specific
;
4096 if (hdev
->hard_reset_pending
)
4099 writeq(val
, hdev
->pcie_bar
[DDR_BAR_ID
] +
4100 (addr
- goya
->ddr_bar_cur_addr
));
4103 static const char *_goya_get_event_desc(u16 event_type
)
4105 switch (event_type
) {
4106 case GOYA_ASYNC_EVENT_ID_PCIE_IF
:
4108 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4109 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4110 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4111 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4112 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4113 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4114 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4115 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4117 case GOYA_ASYNC_EVENT_ID_MME_ECC
:
4119 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
:
4120 return "MME_ecc_ext";
4121 case GOYA_ASYNC_EVENT_ID_MMU_ECC
:
4123 case GOYA_ASYNC_EVENT_ID_DMA_MACRO
:
4125 case GOYA_ASYNC_EVENT_ID_DMA_ECC
:
4127 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
:
4128 return "CPU_if_ecc";
4129 case GOYA_ASYNC_EVENT_ID_PSOC_MEM
:
4131 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
:
4132 return "PSOC_coresight";
4133 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4135 case GOYA_ASYNC_EVENT_ID_GIC500
:
4137 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4139 case GOYA_ASYNC_EVENT_ID_AXI_ECC
:
4141 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
:
4142 return "L2_ram_ecc";
4143 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
:
4144 return "PSOC_gpio_05_sw_reset";
4145 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
:
4146 return "PSOC_gpio_10_vrhot_icrit";
4147 case GOYA_ASYNC_EVENT_ID_PCIE_DEC
:
4149 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4150 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4151 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4152 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4153 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4154 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4155 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4156 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4158 case GOYA_ASYNC_EVENT_ID_MME_WACS
:
4160 case GOYA_ASYNC_EVENT_ID_MME_WACSD
:
4162 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
:
4163 return "CPU_axi_splitter";
4164 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
:
4165 return "PSOC_axi_dec";
4166 case GOYA_ASYNC_EVENT_ID_PSOC
:
4168 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4169 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4170 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4171 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4172 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4173 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4174 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4175 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4176 return "TPC%d_krn_err";
4177 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
:
4179 case GOYA_ASYNC_EVENT_ID_TPC0_QM
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4181 case GOYA_ASYNC_EVENT_ID_MME_QM
:
4183 case GOYA_ASYNC_EVENT_ID_MME_CMDQ
:
4185 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4187 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4189 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4190 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4191 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4192 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4193 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4194 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4195 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4196 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4197 return "TPC%d_bmon_spmu";
4198 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4199 return "DMA_bm_ch%d";
4205 static void goya_get_event_desc(u16 event_type
, char *desc
, size_t size
)
4209 switch (event_type
) {
4210 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4211 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4212 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4213 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4214 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4215 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4216 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4217 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4218 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_ECC
) / 3;
4219 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4221 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4222 index
= event_type
- GOYA_ASYNC_EVENT_ID_SRAM0
;
4223 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4225 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4226 index
= event_type
- GOYA_ASYNC_EVENT_ID_PLL0
;
4227 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4229 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4230 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4231 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4232 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4233 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4234 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4235 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4236 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4237 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_DEC
) / 3;
4238 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4240 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4241 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4242 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4243 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4244 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4245 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4246 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4247 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4248 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
) / 10;
4249 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4251 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
:
4252 index
= event_type
- GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
;
4253 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4255 case GOYA_ASYNC_EVENT_ID_TPC0_QM
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4256 index
= event_type
- GOYA_ASYNC_EVENT_ID_TPC0_QM
;
4257 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4259 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4260 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA0_QM
;
4261 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4263 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4264 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA0_CH
;
4265 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4267 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4268 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4269 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4270 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4271 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4272 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4273 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4274 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4275 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
) / 10;
4276 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4278 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4279 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
;
4280 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4283 snprintf(desc
, size
, _goya_get_event_desc(event_type
));
4288 static void goya_print_razwi_info(struct hl_device
*hdev
)
4290 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD
)) {
4291 dev_err(hdev
->dev
, "Illegal write to LBW\n");
4292 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD
, 0);
4295 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD
)) {
4296 dev_err(hdev
->dev
, "Illegal read from LBW\n");
4297 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD
, 0);
4300 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD
)) {
4301 dev_err(hdev
->dev
, "Illegal write to HBW\n");
4302 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD
, 0);
4305 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD
)) {
4306 dev_err(hdev
->dev
, "Illegal read from HBW\n");
4307 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD
, 0);
4311 static void goya_print_mmu_error_info(struct hl_device
*hdev
)
4313 struct goya_device
*goya
= hdev
->asic_specific
;
4317 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4320 val
= RREG32(mmMMU_PAGE_ERROR_CAPTURE
);
4321 if (val
& MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
4322 addr
= val
& MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK
;
4324 addr
|= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA
);
4326 dev_err(hdev
->dev
, "MMU page fault on va 0x%llx\n", addr
);
4328 WREG32(mmMMU_PAGE_ERROR_CAPTURE
, 0);
4332 static void goya_print_irq_info(struct hl_device
*hdev
, u16 event_type
,
4337 goya_get_event_desc(event_type
, desc
, sizeof(desc
));
4338 dev_err(hdev
->dev
, "Received H/W interrupt %d [\"%s\"]\n",
4342 goya_print_razwi_info(hdev
);
4343 goya_print_mmu_error_info(hdev
);
4347 static int goya_unmask_irq_arr(struct hl_device
*hdev
, u32
*irq_arr
,
4348 size_t irq_arr_size
)
4350 struct armcp_unmask_irq_arr_packet
*pkt
;
4351 size_t total_pkt_size
;
4355 total_pkt_size
= sizeof(struct armcp_unmask_irq_arr_packet
) +
4358 /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4359 total_pkt_size
= (total_pkt_size
+ 0x7) & ~0x7;
4361 /* total_pkt_size is casted to u16 later on */
4362 if (total_pkt_size
> USHRT_MAX
) {
4363 dev_err(hdev
->dev
, "too many elements in IRQ array\n");
4367 pkt
= kzalloc(total_pkt_size
, GFP_KERNEL
);
4371 pkt
->length
= cpu_to_le32(irq_arr_size
/ sizeof(irq_arr
[0]));
4372 memcpy(&pkt
->irqs
, irq_arr
, irq_arr_size
);
4374 pkt
->armcp_pkt
.ctl
= cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY
<<
4375 ARMCP_PKT_CTL_OPCODE_SHIFT
);
4377 rc
= goya_send_cpu_message(hdev
, (u32
*) pkt
, total_pkt_size
,
4378 HL_DEVICE_TIMEOUT_USEC
, &result
);
4381 dev_err(hdev
->dev
, "failed to unmask IRQ array\n");
4388 static int goya_soft_reset_late_init(struct hl_device
*hdev
)
4391 * Unmask all IRQs since some could have been received
4392 * during the soft reset
4394 return goya_unmask_irq_arr(hdev
, goya_all_events
,
4395 sizeof(goya_all_events
));
4398 static int goya_unmask_irq(struct hl_device
*hdev
, u16 event_type
)
4400 struct armcp_packet pkt
;
4404 memset(&pkt
, 0, sizeof(pkt
));
4406 pkt
.ctl
= cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ
<<
4407 ARMCP_PKT_CTL_OPCODE_SHIFT
);
4408 pkt
.value
= cpu_to_le64(event_type
);
4410 rc
= goya_send_cpu_message(hdev
, (u32
*) &pkt
, sizeof(pkt
),
4411 HL_DEVICE_TIMEOUT_USEC
, &result
);
4414 dev_err(hdev
->dev
, "failed to unmask RAZWI IRQ %d", event_type
);
4419 void goya_handle_eqe(struct hl_device
*hdev
, struct hl_eq_entry
*eq_entry
)
4421 u32 ctl
= le32_to_cpu(eq_entry
->hdr
.ctl
);
4422 u16 event_type
= ((ctl
& EQ_CTL_EVENT_TYPE_MASK
)
4423 >> EQ_CTL_EVENT_TYPE_SHIFT
);
4424 struct goya_device
*goya
= hdev
->asic_specific
;
4426 goya
->events_stat
[event_type
]++;
4428 switch (event_type
) {
4429 case GOYA_ASYNC_EVENT_ID_PCIE_IF
:
4430 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4431 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4432 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4433 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4434 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4435 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4436 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4437 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4438 case GOYA_ASYNC_EVENT_ID_MME_ECC
:
4439 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
:
4440 case GOYA_ASYNC_EVENT_ID_MMU_ECC
:
4441 case GOYA_ASYNC_EVENT_ID_DMA_MACRO
:
4442 case GOYA_ASYNC_EVENT_ID_DMA_ECC
:
4443 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
:
4444 case GOYA_ASYNC_EVENT_ID_PSOC_MEM
:
4445 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
:
4446 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4447 case GOYA_ASYNC_EVENT_ID_GIC500
:
4448 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4449 case GOYA_ASYNC_EVENT_ID_AXI_ECC
:
4450 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
:
4451 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
:
4452 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
:
4453 goya_print_irq_info(hdev
, event_type
, false);
4454 hl_device_reset(hdev
, true, false);
4457 case GOYA_ASYNC_EVENT_ID_PCIE_DEC
:
4458 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4459 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4460 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4461 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4462 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4463 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4464 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4465 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4466 case GOYA_ASYNC_EVENT_ID_MME_WACS
:
4467 case GOYA_ASYNC_EVENT_ID_MME_WACSD
:
4468 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
:
4469 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
:
4470 case GOYA_ASYNC_EVENT_ID_PSOC
:
4471 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4472 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4473 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4474 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4475 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4476 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4477 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4478 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4479 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4480 case GOYA_ASYNC_EVENT_ID_MME_QM
:
4481 case GOYA_ASYNC_EVENT_ID_MME_CMDQ
:
4482 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4483 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4484 goya_print_irq_info(hdev
, event_type
, true);
4485 goya_unmask_irq(hdev
, event_type
);
4488 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4489 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4490 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4491 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4492 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4493 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4494 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4495 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4496 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4497 goya_print_irq_info(hdev
, event_type
, false);
4498 goya_unmask_irq(hdev
, event_type
);
4502 dev_err(hdev
->dev
, "Received invalid H/W interrupt %d\n",
4508 void *goya_get_events_stat(struct hl_device
*hdev
, u32
*size
)
4510 struct goya_device
*goya
= hdev
->asic_specific
;
4512 *size
= (u32
) sizeof(goya
->events_stat
);
4514 return goya
->events_stat
;
4517 static int goya_memset_device_memory(struct hl_device
*hdev
, u64 addr
, u64 size
,
4518 u64 val
, bool is_dram
)
4520 struct packet_lin_dma
*lin_dma_pkt
;
4521 struct hl_cs_job
*job
;
4524 int rc
, lin_dma_pkts_cnt
;
4526 lin_dma_pkts_cnt
= DIV_ROUND_UP_ULL(size
, SZ_2G
);
4527 cb_size
= lin_dma_pkts_cnt
* sizeof(struct packet_lin_dma
) +
4528 sizeof(struct packet_msg_prot
);
4529 cb
= hl_cb_kernel_create(hdev
, cb_size
);
4533 lin_dma_pkt
= (struct packet_lin_dma
*) (uintptr_t) cb
->kernel_address
;
4536 memset(lin_dma_pkt
, 0, sizeof(*lin_dma_pkt
));
4538 ctl
= ((PACKET_LIN_DMA
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
4539 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
) |
4540 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT
) |
4541 (1 << GOYA_PKT_CTL_RB_SHIFT
) |
4542 (1 << GOYA_PKT_CTL_MB_SHIFT
));
4543 ctl
|= (is_dram
? DMA_HOST_TO_DRAM
: DMA_HOST_TO_SRAM
) <<
4544 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
4545 lin_dma_pkt
->ctl
= cpu_to_le32(ctl
);
4547 lin_dma_pkt
->src_addr
= cpu_to_le64(val
);
4548 lin_dma_pkt
->dst_addr
= cpu_to_le64(addr
);
4549 if (lin_dma_pkts_cnt
> 1)
4550 lin_dma_pkt
->tsize
= cpu_to_le32(SZ_2G
);
4552 lin_dma_pkt
->tsize
= cpu_to_le32(size
);
4557 } while (--lin_dma_pkts_cnt
);
4559 job
= hl_cs_allocate_job(hdev
, true);
4561 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
4568 job
->user_cb
->cs_cnt
++;
4569 job
->user_cb_size
= cb_size
;
4570 job
->hw_queue_id
= GOYA_QUEUE_ID_DMA_0
;
4571 job
->patched_cb
= job
->user_cb
;
4572 job
->job_cb_size
= job
->user_cb_size
;
4574 hl_debugfs_add_job(hdev
, job
);
4576 rc
= goya_send_job_on_qman0(hdev
, job
);
4578 hl_cb_put(job
->patched_cb
);
4580 hl_debugfs_remove_job(hdev
, job
);
4586 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
, cb
->id
<< PAGE_SHIFT
);
4591 int goya_context_switch(struct hl_device
*hdev
, u32 asid
)
4593 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4594 u64 addr
= prop
->sram_base_address
, sob_addr
;
4595 u32 size
= hdev
->pldm
? 0x10000 : prop
->sram_size
;
4596 u64 val
= 0x7777777777777777ull
;
4598 u32 channel_off
= mmDMA_CH_1_WR_COMP_ADDR_LO
-
4599 mmDMA_CH_0_WR_COMP_ADDR_LO
;
4601 rc
= goya_memset_device_memory(hdev
, addr
, size
, val
, false);
4603 dev_err(hdev
->dev
, "Failed to clear SRAM in context switch\n");
4607 /* we need to reset registers that the user is allowed to change */
4608 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1007
;
4609 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO
, lower_32_bits(sob_addr
));
4611 for (dma_id
= 1 ; dma_id
< NUMBER_OF_EXT_HW_QUEUES
; dma_id
++) {
4612 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1000
+
4614 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO
+ channel_off
* dma_id
,
4615 lower_32_bits(sob_addr
));
4618 WREG32(mmTPC_PLL_CLK_RLX_0
, 0x200020);
4620 goya_mmu_prepare(hdev
, asid
);
4622 goya_clear_sm_regs(hdev
);
4627 static int goya_mmu_clear_pgt_range(struct hl_device
*hdev
)
4629 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4630 struct goya_device
*goya
= hdev
->asic_specific
;
4631 u64 addr
= prop
->mmu_pgt_addr
;
4632 u32 size
= prop
->mmu_pgt_size
+ MMU_DRAM_DEFAULT_PAGE_SIZE
+
4635 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4638 return goya_memset_device_memory(hdev
, addr
, size
, 0, true);
4641 static int goya_mmu_set_dram_default_page(struct hl_device
*hdev
)
4643 struct goya_device
*goya
= hdev
->asic_specific
;
4644 u64 addr
= hdev
->asic_prop
.mmu_dram_default_page_addr
;
4645 u32 size
= MMU_DRAM_DEFAULT_PAGE_SIZE
;
4646 u64 val
= 0x9999999999999999ull
;
4648 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4651 return goya_memset_device_memory(hdev
, addr
, size
, val
, true);
4654 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device
*hdev
)
4656 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4657 struct goya_device
*goya
= hdev
->asic_specific
;
4661 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4664 for (off
= 0 ; off
< CPU_FW_IMAGE_SIZE
; off
+= PAGE_SIZE_2MB
) {
4665 rc
= hl_mmu_map(hdev
->kernel_ctx
, prop
->dram_base_address
+ off
,
4666 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
);
4668 dev_err(hdev
->dev
, "Map failed for address 0x%llx\n",
4669 prop
->dram_base_address
+ off
);
4674 if (!(hdev
->cpu_accessible_dma_address
& (PAGE_SIZE_2MB
- 1))) {
4675 rc
= hl_mmu_map(hdev
->kernel_ctx
, VA_CPU_ACCESSIBLE_MEM_ADDR
,
4676 hdev
->cpu_accessible_dma_address
, PAGE_SIZE_2MB
);
4680 "Map failed for CPU accessible memory\n");
4681 off
-= PAGE_SIZE_2MB
;
4685 for (cpu_off
= 0 ; cpu_off
< SZ_2M
; cpu_off
+= PAGE_SIZE_4KB
) {
4686 rc
= hl_mmu_map(hdev
->kernel_ctx
,
4687 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
4688 hdev
->cpu_accessible_dma_address
+ cpu_off
,
4692 "Map failed for CPU accessible memory\n");
4693 cpu_off
-= PAGE_SIZE_4KB
;
4699 goya_mmu_prepare_reg(hdev
, mmCPU_IF_ARUSER_OVR
, HL_KERNEL_ASID_ID
);
4700 goya_mmu_prepare_reg(hdev
, mmCPU_IF_AWUSER_OVR
, HL_KERNEL_ASID_ID
);
4701 WREG32(mmCPU_IF_ARUSER_OVR_EN
, 0x7FF);
4702 WREG32(mmCPU_IF_AWUSER_OVR_EN
, 0x7FF);
4704 /* Make sure configuration is flushed to device */
4705 RREG32(mmCPU_IF_AWUSER_OVR_EN
);
4707 goya
->device_cpu_mmu_mappings_done
= true;
4712 for (; cpu_off
>= 0 ; cpu_off
-= PAGE_SIZE_4KB
)
4713 if (hl_mmu_unmap(hdev
->kernel_ctx
,
4714 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
4716 dev_warn_ratelimited(hdev
->dev
,
4717 "failed to unmap address 0x%llx\n",
4718 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
);
4720 for (; off
>= 0 ; off
-= PAGE_SIZE_2MB
)
4721 if (hl_mmu_unmap(hdev
->kernel_ctx
,
4722 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
))
4723 dev_warn_ratelimited(hdev
->dev
,
4724 "failed to unmap address 0x%llx\n",
4725 prop
->dram_base_address
+ off
);
4730 void goya_mmu_remove_device_cpu_mappings(struct hl_device
*hdev
)
4732 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4733 struct goya_device
*goya
= hdev
->asic_specific
;
4736 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4739 if (!goya
->device_cpu_mmu_mappings_done
)
4742 WREG32(mmCPU_IF_ARUSER_OVR_EN
, 0);
4743 WREG32(mmCPU_IF_AWUSER_OVR_EN
, 0);
4745 if (!(hdev
->cpu_accessible_dma_address
& (PAGE_SIZE_2MB
- 1))) {
4746 if (hl_mmu_unmap(hdev
->kernel_ctx
, VA_CPU_ACCESSIBLE_MEM_ADDR
,
4749 "Failed to unmap CPU accessible memory\n");
4751 for (cpu_off
= 0 ; cpu_off
< SZ_2M
; cpu_off
+= PAGE_SIZE_4KB
)
4752 if (hl_mmu_unmap(hdev
->kernel_ctx
,
4753 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
4755 dev_warn_ratelimited(hdev
->dev
,
4756 "failed to unmap address 0x%llx\n",
4757 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
);
4760 for (off
= 0 ; off
< CPU_FW_IMAGE_SIZE
; off
+= PAGE_SIZE_2MB
)
4761 if (hl_mmu_unmap(hdev
->kernel_ctx
,
4762 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
))
4763 dev_warn_ratelimited(hdev
->dev
,
4764 "Failed to unmap address 0x%llx\n",
4765 prop
->dram_base_address
+ off
);
4767 goya
->device_cpu_mmu_mappings_done
= false;
4770 static void goya_mmu_prepare(struct hl_device
*hdev
, u32 asid
)
4772 struct goya_device
*goya
= hdev
->asic_specific
;
4775 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4778 if (asid
& ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK
) {
4779 WARN(1, "asid %u is too big\n", asid
);
4783 /* zero the MMBP and ASID bits and then set the ASID */
4784 for (i
= 0 ; i
< GOYA_MMU_REGS_NUM
; i
++)
4785 goya_mmu_prepare_reg(hdev
, goya_mmu_regs
[i
], asid
);
4788 static void goya_mmu_invalidate_cache(struct hl_device
*hdev
, bool is_hard
)
4790 struct goya_device
*goya
= hdev
->asic_specific
;
4791 u32 status
, timeout_usec
;
4794 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4797 /* no need in L1 only invalidation in Goya */
4802 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
4804 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
4806 mutex_lock(&hdev
->mmu_cache_lock
);
4808 /* L0 & L1 invalidation */
4809 WREG32(mmSTLB_INV_ALL_START
, 1);
4811 rc
= hl_poll_timeout(
4813 mmSTLB_INV_ALL_START
,
4819 mutex_unlock(&hdev
->mmu_cache_lock
);
4822 dev_notice_ratelimited(hdev
->dev
,
4823 "Timeout when waiting for MMU cache invalidation\n");
4826 static void goya_mmu_invalidate_cache_range(struct hl_device
*hdev
,
4827 bool is_hard
, u32 asid
, u64 va
, u64 size
)
4829 struct goya_device
*goya
= hdev
->asic_specific
;
4830 u32 status
, timeout_usec
, inv_data
, pi
;
4833 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4836 /* no need in L1 only invalidation in Goya */
4841 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
4843 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
4845 mutex_lock(&hdev
->mmu_cache_lock
);
4848 * TODO: currently invalidate entire L0 & L1 as in regular hard
4849 * invalidation. Need to apply invalidation of specific cache lines with
4850 * mask of ASID & VA & size.
4851 * Note that L1 with be flushed entirely in any case.
4854 /* L0 & L1 invalidation */
4855 inv_data
= RREG32(mmSTLB_CACHE_INV
);
4857 pi
= ((inv_data
& STLB_CACHE_INV_PRODUCER_INDEX_MASK
) + 1) & 0xFF;
4858 WREG32(mmSTLB_CACHE_INV
,
4859 (inv_data
& STLB_CACHE_INV_INDEX_MASK_MASK
) | pi
);
4861 rc
= hl_poll_timeout(
4863 mmSTLB_INV_CONSUMER_INDEX
,
4869 mutex_unlock(&hdev
->mmu_cache_lock
);
4872 dev_notice_ratelimited(hdev
->dev
,
4873 "Timeout when waiting for MMU cache invalidation\n");
4876 int goya_send_heartbeat(struct hl_device
*hdev
)
4878 struct goya_device
*goya
= hdev
->asic_specific
;
4880 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
4883 return hl_fw_send_heartbeat(hdev
);
4886 int goya_armcp_info_get(struct hl_device
*hdev
)
4888 struct goya_device
*goya
= hdev
->asic_specific
;
4889 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4893 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
4896 rc
= hl_fw_armcp_info_get(hdev
);
4900 dram_size
= le64_to_cpu(prop
->armcp_info
.dram_size
);
4902 if ((!is_power_of_2(dram_size
)) ||
4903 (dram_size
< DRAM_PHYS_DEFAULT_SIZE
)) {
4905 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
4907 dram_size
= DRAM_PHYS_DEFAULT_SIZE
;
4910 prop
->dram_size
= dram_size
;
4911 prop
->dram_end_address
= prop
->dram_base_address
+ dram_size
;
4917 static bool goya_is_device_idle(struct hl_device
*hdev
, u32
*mask
,
4920 const char *fmt
= "%-5d%-9s%#-14x%#-16x%#x\n";
4921 const char *dma_fmt
= "%-5d%-9s%#-14x%#x\n";
4922 u32 qm_glbl_sts0
, cmdq_glbl_sts0
, dma_core_sts0
, tpc_cfg_sts
,
4924 bool is_idle
= true, is_eng_idle
;
4929 seq_puts(s
, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
4930 "--- ------- ------------ -------------\n");
4932 offset
= mmDMA_QM_1_GLBL_STS0
- mmDMA_QM_0_GLBL_STS0
;
4934 for (i
= 0 ; i
< DMA_MAX_NUM
; i
++) {
4935 qm_glbl_sts0
= RREG32(mmDMA_QM_0_GLBL_STS0
+ i
* offset
);
4936 dma_core_sts0
= RREG32(mmDMA_CH_0_STS0
+ i
* offset
);
4937 is_eng_idle
= IS_DMA_QM_IDLE(qm_glbl_sts0
) &&
4938 IS_DMA_IDLE(dma_core_sts0
);
4939 is_idle
&= is_eng_idle
;
4942 *mask
|= !is_eng_idle
<< (GOYA_ENGINE_ID_DMA_0
+ i
);
4944 seq_printf(s
, dma_fmt
, i
, is_eng_idle
? "Y" : "N",
4945 qm_glbl_sts0
, dma_core_sts0
);
4950 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
4951 "--- ------- ------------ -------------- ----------\n");
4953 offset
= mmTPC1_QM_GLBL_STS0
- mmTPC0_QM_GLBL_STS0
;
4955 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++) {
4956 qm_glbl_sts0
= RREG32(mmTPC0_QM_GLBL_STS0
+ i
* offset
);
4957 cmdq_glbl_sts0
= RREG32(mmTPC0_CMDQ_GLBL_STS0
+ i
* offset
);
4958 tpc_cfg_sts
= RREG32(mmTPC0_CFG_STATUS
+ i
* offset
);
4959 is_eng_idle
= IS_TPC_QM_IDLE(qm_glbl_sts0
) &&
4960 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0
) &&
4961 IS_TPC_IDLE(tpc_cfg_sts
);
4962 is_idle
&= is_eng_idle
;
4965 *mask
|= !is_eng_idle
<< (GOYA_ENGINE_ID_TPC_0
+ i
);
4967 seq_printf(s
, fmt
, i
, is_eng_idle
? "Y" : "N",
4968 qm_glbl_sts0
, cmdq_glbl_sts0
, tpc_cfg_sts
);
4973 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
4974 "--- ------- ------------ -------------- -----------\n");
4976 qm_glbl_sts0
= RREG32(mmMME_QM_GLBL_STS0
);
4977 cmdq_glbl_sts0
= RREG32(mmMME_CMDQ_GLBL_STS0
);
4978 mme_arch_sts
= RREG32(mmMME_ARCH_STATUS
);
4979 is_eng_idle
= IS_MME_QM_IDLE(qm_glbl_sts0
) &&
4980 IS_MME_CMDQ_IDLE(cmdq_glbl_sts0
) &&
4981 IS_MME_IDLE(mme_arch_sts
);
4982 is_idle
&= is_eng_idle
;
4985 *mask
|= !is_eng_idle
<< GOYA_ENGINE_ID_MME_0
;
4987 seq_printf(s
, fmt
, 0, is_eng_idle
? "Y" : "N", qm_glbl_sts0
,
4988 cmdq_glbl_sts0
, mme_arch_sts
);
4995 static void goya_hw_queues_lock(struct hl_device
*hdev
)
4997 struct goya_device
*goya
= hdev
->asic_specific
;
4999 spin_lock(&goya
->hw_queues_lock
);
5002 static void goya_hw_queues_unlock(struct hl_device
*hdev
)
5004 struct goya_device
*goya
= hdev
->asic_specific
;
5006 spin_unlock(&goya
->hw_queues_lock
);
5009 static u32
goya_get_pci_id(struct hl_device
*hdev
)
5011 return hdev
->pdev
->device
;
5014 static int goya_get_eeprom_data(struct hl_device
*hdev
, void *data
,
5017 struct goya_device
*goya
= hdev
->asic_specific
;
5019 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
5022 return hl_fw_get_eeprom_data(hdev
, data
, max_size
);
5025 static enum hl_device_hw_state
goya_get_hw_state(struct hl_device
*hdev
)
5027 return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS
);
5030 static const struct hl_asic_funcs goya_funcs
= {
5031 .early_init
= goya_early_init
,
5032 .early_fini
= goya_early_fini
,
5033 .late_init
= goya_late_init
,
5034 .late_fini
= goya_late_fini
,
5035 .sw_init
= goya_sw_init
,
5036 .sw_fini
= goya_sw_fini
,
5037 .hw_init
= goya_hw_init
,
5038 .hw_fini
= goya_hw_fini
,
5039 .halt_engines
= goya_halt_engines
,
5040 .suspend
= goya_suspend
,
5041 .resume
= goya_resume
,
5042 .cb_mmap
= goya_cb_mmap
,
5043 .ring_doorbell
= goya_ring_doorbell
,
5044 .flush_pq_write
= goya_flush_pq_write
,
5045 .asic_dma_alloc_coherent
= goya_dma_alloc_coherent
,
5046 .asic_dma_free_coherent
= goya_dma_free_coherent
,
5047 .get_int_queue_base
= goya_get_int_queue_base
,
5048 .test_queues
= goya_test_queues
,
5049 .asic_dma_pool_zalloc
= goya_dma_pool_zalloc
,
5050 .asic_dma_pool_free
= goya_dma_pool_free
,
5051 .cpu_accessible_dma_pool_alloc
= goya_cpu_accessible_dma_pool_alloc
,
5052 .cpu_accessible_dma_pool_free
= goya_cpu_accessible_dma_pool_free
,
5053 .hl_dma_unmap_sg
= goya_dma_unmap_sg
,
5054 .cs_parser
= goya_cs_parser
,
5055 .asic_dma_map_sg
= goya_dma_map_sg
,
5056 .get_dma_desc_list_size
= goya_get_dma_desc_list_size
,
5057 .add_end_of_cb_packets
= goya_add_end_of_cb_packets
,
5058 .update_eq_ci
= goya_update_eq_ci
,
5059 .context_switch
= goya_context_switch
,
5060 .restore_phase_topology
= goya_restore_phase_topology
,
5061 .debugfs_read32
= goya_debugfs_read32
,
5062 .debugfs_write32
= goya_debugfs_write32
,
5063 .add_device_attr
= goya_add_device_attr
,
5064 .handle_eqe
= goya_handle_eqe
,
5065 .set_pll_profile
= goya_set_pll_profile
,
5066 .get_events_stat
= goya_get_events_stat
,
5067 .read_pte
= goya_read_pte
,
5068 .write_pte
= goya_write_pte
,
5069 .mmu_invalidate_cache
= goya_mmu_invalidate_cache
,
5070 .mmu_invalidate_cache_range
= goya_mmu_invalidate_cache_range
,
5071 .send_heartbeat
= goya_send_heartbeat
,
5072 .debug_coresight
= goya_debug_coresight
,
5073 .is_device_idle
= goya_is_device_idle
,
5074 .soft_reset_late_init
= goya_soft_reset_late_init
,
5075 .hw_queues_lock
= goya_hw_queues_lock
,
5076 .hw_queues_unlock
= goya_hw_queues_unlock
,
5077 .get_pci_id
= goya_get_pci_id
,
5078 .get_eeprom_data
= goya_get_eeprom_data
,
5079 .send_cpu_message
= goya_send_cpu_message
,
5080 .get_hw_state
= goya_get_hw_state
,
5081 .pci_bars_map
= goya_pci_bars_map
,
5082 .set_dram_bar_base
= goya_set_ddr_bar_base
,
5083 .init_iatu
= goya_init_iatu
,
5086 .halt_coresight
= goya_halt_coresight
5090 * goya_set_asic_funcs - set Goya function pointers
5092 * @*hdev: pointer to hl_device structure
5095 void goya_set_asic_funcs(struct hl_device
*hdev
)
5097 hdev
->asic_funcs
= &goya_funcs
;