]>
Commit | Line | Data |
---|---|---|
f544afac AL |
1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | #include "amdgpu.h" | |
23 | #include "amdgpu_amdkfd.h" | |
f544afac | 24 | #include "amdgpu_amdkfd_gfx_v9.h" |
036e348f | 25 | #include "amdgpu_amdkfd_aldebaran.h" |
f544afac AL |
26 | #include "gc/gc_9_4_3_offset.h" |
27 | #include "gc/gc_9_4_3_sh_mask.h" | |
28 | #include "athub/athub_1_8_0_offset.h" | |
29 | #include "athub/athub_1_8_0_sh_mask.h" | |
a0587999 AL |
30 | #include "oss/osssys_4_4_2_offset.h" |
31 | #include "oss/osssys_4_4_2_sh_mask.h" | |
f544afac AL |
32 | #include "v9_structs.h" |
33 | #include "soc15.h" | |
a805889a MJ |
34 | #include "sdma/sdma_4_4_2_offset.h" |
35 | #include "sdma/sdma_4_4_2_sh_mask.h" | |
036e348f | 36 | #include <uapi/linux/kfd_ioctl.h> |
a805889a MJ |
37 | |
38 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) | |
39 | { | |
40 | return (struct v9_sdma_mqd *)mqd; | |
41 | } | |
42 | ||
43 | static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, | |
44 | unsigned int engine_id, | |
45 | unsigned int queue_id) | |
46 | { | |
f8b34a05 LL |
47 | uint32_t sdma_engine_reg_base = |
48 | SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id), | |
49 | regSDMA_RLC0_RB_CNTL) - | |
50 | regSDMA_RLC0_RB_CNTL; | |
a805889a MJ |
51 | uint32_t retval = sdma_engine_reg_base + |
52 | queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL); | |
53 | ||
54 | pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, | |
55 | queue_id, retval); | |
56 | return retval; | |
57 | } | |
58 | ||
d522458e | 59 | static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, |
a805889a MJ |
60 | uint32_t __user *wptr, struct mm_struct *mm) |
61 | { | |
62 | struct v9_sdma_mqd *m; | |
63 | uint32_t sdma_rlc_reg_offset; | |
64 | unsigned long end_jiffies; | |
65 | uint32_t data; | |
66 | uint64_t data64; | |
67 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | |
68 | ||
69 | m = get_sdma_mqd(mqd); | |
70 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
71 | m->sdma_queue_id); | |
72 | ||
73 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, | |
74 | m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)); | |
75 | ||
76 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | |
77 | while (true) { | |
78 | data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS); | |
79 | if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
80 | break; | |
81 | if (time_after(jiffies, end_jiffies)) { | |
82 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
83 | return -ETIME; | |
84 | } | |
85 | usleep_range(500, 1000); | |
86 | } | |
87 | ||
88 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET, | |
89 | m->sdmax_rlcx_doorbell_offset); | |
90 | ||
91 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL, | |
92 | ENABLE, 1); | |
93 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data); | |
94 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR, | |
95 | m->sdmax_rlcx_rb_rptr); | |
96 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI, | |
97 | m->sdmax_rlcx_rb_rptr_hi); | |
98 | ||
99 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1); | |
100 | if (read_user_wptr(mm, wptr64, data64)) { | |
101 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR, | |
102 | lower_32_bits(data64)); | |
103 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI, | |
104 | upper_32_bits(data64)); | |
105 | } else { | |
106 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR, | |
107 | m->sdmax_rlcx_rb_rptr); | |
108 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI, | |
109 | m->sdmax_rlcx_rb_rptr_hi); | |
110 | } | |
111 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0); | |
112 | ||
113 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); | |
114 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI, | |
115 | m->sdmax_rlcx_rb_base_hi); | |
116 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO, | |
117 | m->sdmax_rlcx_rb_rptr_addr_lo); | |
118 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI, | |
119 | m->sdmax_rlcx_rb_rptr_addr_hi); | |
120 | ||
121 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL, | |
122 | RB_ENABLE, 1); | |
123 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data); | |
124 | ||
125 | return 0; | |
126 | } | |
127 | ||
d522458e | 128 | static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev, |
a805889a MJ |
129 | uint32_t engine_id, uint32_t queue_id, |
130 | uint32_t (**dump)[2], uint32_t *n_regs) | |
131 | { | |
132 | uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, | |
133 | engine_id, queue_id); | |
134 | uint32_t i = 0, reg; | |
135 | #undef HQD_N_REGS | |
136 | #define HQD_N_REGS (19+6+7+12) | |
137 | #define DUMP_REG(addr) do { \ | |
138 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | |
139 | break; \ | |
140 | (*dump)[i][0] = (addr) << 2; \ | |
141 | (*dump)[i++][1] = RREG32(addr); \ | |
142 | } while (0) | |
143 | ||
144 | *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); | |
145 | if (*dump == NULL) | |
146 | return -ENOMEM; | |
147 | ||
148 | for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++) | |
149 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
150 | for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++) | |
151 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
152 | for (reg = regSDMA_RLC0_IB_SUB_REMAIN; | |
153 | reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++) | |
154 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
155 | for (reg = regSDMA_RLC0_MIDCMD_DATA0; | |
156 | reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++) | |
157 | DUMP_REG(sdma_rlc_reg_offset + reg); | |
158 | ||
159 | WARN_ON_ONCE(i != HQD_N_REGS); | |
160 | *n_regs = i; | |
161 | ||
162 | return 0; | |
163 | } | |
164 | ||
d522458e | 165 | static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) |
a805889a MJ |
166 | { |
167 | struct v9_sdma_mqd *m; | |
168 | uint32_t sdma_rlc_reg_offset; | |
169 | uint32_t sdma_rlc_rb_cntl; | |
170 | ||
171 | m = get_sdma_mqd(mqd); | |
172 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
173 | m->sdma_queue_id); | |
174 | ||
175 | sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL); | |
176 | ||
177 | if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK) | |
178 | return true; | |
179 | ||
180 | return false; | |
181 | } | |
182 | ||
d522458e | 183 | static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, |
a805889a MJ |
184 | unsigned int utimeout) |
185 | { | |
186 | struct v9_sdma_mqd *m; | |
187 | uint32_t sdma_rlc_reg_offset; | |
188 | uint32_t temp; | |
189 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | |
190 | ||
191 | m = get_sdma_mqd(mqd); | |
192 | sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, | |
193 | m->sdma_queue_id); | |
194 | ||
195 | temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL); | |
196 | temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK; | |
197 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp); | |
198 | ||
199 | while (true) { | |
200 | temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS); | |
201 | if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK) | |
202 | break; | |
203 | if (time_after(jiffies, end_jiffies)) { | |
204 | pr_err("SDMA RLC not idle in %s\n", __func__); | |
205 | return -ETIME; | |
206 | } | |
207 | usleep_range(500, 1000); | |
208 | } | |
209 | ||
210 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0); | |
211 | WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, | |
212 | RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) | | |
213 | SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK); | |
214 | ||
215 | m->sdmax_rlcx_rb_rptr = | |
216 | RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR); | |
217 | m->sdmax_rlcx_rb_rptr_hi = | |
218 | RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI); | |
219 | ||
220 | return 0; | |
221 | } | |
f544afac AL |
222 | |
223 | static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev, | |
a0587999 | 224 | u32 pasid, unsigned int vmid, uint32_t xcc_inst) |
f544afac AL |
225 | { |
226 | unsigned long timeout; | |
a0587999 | 227 | unsigned int reg; |
02ee3b02 | 228 | unsigned int phy_inst = GET_INST(GC, xcc_inst); |
a0587999 | 229 | /* Every two XCCs share one AID */ |
02ee3b02 | 230 | unsigned int aid = phy_inst / 2; |
f544afac AL |
231 | |
232 | /* | |
233 | * We have to assume that there is no outstanding mapping. | |
234 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | |
235 | * a mapping is in progress or because a mapping finished | |
236 | * and the SW cleared it. | |
237 | * So the protocol is to always wait & clear. | |
238 | */ | |
239 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | |
240 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | |
241 | ||
a0587999 | 242 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, |
f544afac AL |
243 | regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping); |
244 | ||
245 | timeout = jiffies + msecs_to_jiffies(10); | |
a0587999 | 246 | while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0, |
f544afac AL |
247 | regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & |
248 | (1U << vmid))) { | |
249 | if (time_after(jiffies, timeout)) { | |
250 | pr_err("Fail to program VMID-PASID mapping\n"); | |
251 | return -ETIME; | |
252 | } | |
253 | cpu_relax(); | |
254 | } | |
255 | ||
a0587999 | 256 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, |
f544afac AL |
257 | regATC_VMID_PASID_MAPPING_UPDATE_STATUS), |
258 | 1U << vmid); | |
259 | ||
a0587999 AL |
260 | reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX)); |
261 | /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs, | |
262 | * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1" | |
263 | * programs _LUT for XCC and "aid * 4" for AID where the XCC connects | |
264 | * to. | |
265 | */ | |
266 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), | |
02ee3b02 | 267 | aid * 4 + (phy_inst % 2) + 1); |
a0587999 | 268 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, |
f544afac | 269 | pasid_mapping); |
a0587999 AL |
270 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), |
271 | aid * 4); | |
272 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid, | |
f544afac | 273 | pasid_mapping); |
a0587999 | 274 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg); |
f544afac AL |
275 | |
276 | return 0; | |
277 | } | |
278 | ||
279 | static inline struct v9_mqd *get_mqd(void *mqd) | |
280 | { | |
281 | return (struct v9_mqd *)mqd; | |
282 | } | |
283 | ||
284 | static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, | |
285 | uint32_t pipe_id, uint32_t queue_id, | |
286 | uint32_t __user *wptr, uint32_t wptr_shift, | |
e2069a7b | 287 | uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) |
f544afac AL |
288 | { |
289 | struct v9_mqd *m; | |
290 | uint32_t *mqd_hqd; | |
291 | uint32_t reg, hqd_base, hqd_end, data; | |
292 | ||
293 | m = get_mqd(mqd); | |
294 | ||
e2069a7b | 295 | kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); |
f544afac AL |
296 | |
297 | /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */ | |
298 | mqd_hqd = &m->cp_mqd_base_addr_lo; | |
659a4ab8 LL |
299 | hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR); |
300 | hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); | |
f544afac AL |
301 | |
302 | for (reg = hqd_base; reg <= hqd_end; reg++) | |
85150626 | 303 | WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); |
f544afac AL |
304 | |
305 | ||
306 | /* Activate doorbell logic before triggering WPTR poll. */ | |
307 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | |
308 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | |
659a4ab8 | 309 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), |
f544afac AL |
310 | data); |
311 | ||
312 | if (wptr) { | |
313 | /* Don't read wptr with get_user because the user | |
314 | * context may not be accessible (if this function | |
315 | * runs in a work queue). Instead trigger a one-shot | |
316 | * polling read from memory in the CP. This assumes | |
317 | * that wptr is GPU-accessible in the queue's VMID via | |
318 | * ATC or SVM. WPTR==RPTR before starting the poll so | |
319 | * the CP starts fetching new commands from the right | |
320 | * place. | |
321 | * | |
322 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | |
323 | * tricky. Assume that the queue didn't overflow. The | |
324 | * number of valid bits in the 32-bit RPTR depends on | |
325 | * the queue size. The remaining bits are taken from | |
326 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | |
327 | * queue size. | |
328 | */ | |
329 | uint32_t queue_size = | |
330 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | |
331 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | |
332 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | |
333 | ||
334 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | |
335 | guessed_wptr += queue_size; | |
336 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | |
337 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | |
338 | ||
659a4ab8 | 339 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), |
f544afac | 340 | lower_32_bits(guessed_wptr)); |
659a4ab8 | 341 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), |
f544afac | 342 | upper_32_bits(guessed_wptr)); |
659a4ab8 | 343 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), |
f544afac | 344 | lower_32_bits((uintptr_t)wptr)); |
659a4ab8 | 345 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), |
f544afac AL |
346 | regCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
347 | upper_32_bits((uintptr_t)wptr)); | |
659a4ab8 | 348 | WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), |
f544afac AL |
349 | (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, |
350 | queue_id)); | |
351 | } | |
352 | ||
353 | /* Start the EOP fetcher */ | |
659a4ab8 | 354 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), |
f544afac AL |
355 | REG_SET_FIELD(m->cp_hqd_eop_rptr, |
356 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | |
357 | ||
358 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | |
659a4ab8 | 359 | WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); |
f544afac | 360 | |
e2069a7b | 361 | kgd_gfx_v9_release_queue(adev, inst); |
f544afac AL |
362 | |
363 | return 0; | |
364 | } | |
365 | ||
036e348f EH |
366 | /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ |
367 | static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev, | |
368 | bool keep_trap_enabled, | |
369 | uint32_t vmid) | |
370 | { | |
371 | uint32_t data = 0; | |
372 | ||
373 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); | |
374 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); | |
375 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); | |
376 | ||
377 | return data; | |
378 | } | |
379 | ||
380 | static int kgd_gfx_v9_4_3_validate_trap_override_request( | |
381 | struct amdgpu_device *adev, | |
382 | uint32_t trap_override, | |
383 | uint32_t *trap_mask_supported) | |
384 | { | |
385 | *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | | |
386 | KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | | |
387 | KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | | |
388 | KFD_DBG_TRAP_MASK_FP_OVERFLOW | | |
389 | KFD_DBG_TRAP_MASK_FP_UNDERFLOW | | |
390 | KFD_DBG_TRAP_MASK_FP_INEXACT | | |
391 | KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | | |
392 | KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | | |
393 | KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION | | |
394 | KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | | |
395 | KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; | |
396 | ||
397 | if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && | |
398 | trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) | |
399 | return -EPERM; | |
400 | ||
401 | return 0; | |
402 | } | |
403 | ||
404 | static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) | |
405 | { | |
406 | uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; | |
407 | uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; | |
408 | uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | | |
409 | KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | | |
410 | KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | | |
411 | KFD_DBG_TRAP_MASK_FP_OVERFLOW | | |
412 | KFD_DBG_TRAP_MASK_FP_UNDERFLOW | | |
413 | KFD_DBG_TRAP_MASK_FP_INEXACT | | |
414 | KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | | |
415 | KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | | |
416 | KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); | |
417 | uint32_t ret; | |
418 | ||
419 | ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); | |
420 | ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); | |
421 | ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); | |
422 | ||
423 | return ret; | |
424 | } | |
425 | ||
426 | static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) | |
427 | { | |
428 | uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); | |
429 | ||
430 | if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) | |
431 | ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; | |
432 | ||
433 | if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) | |
434 | ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; | |
435 | ||
436 | return ret; | |
437 | } | |
438 | ||
439 | /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ | |
440 | static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override( | |
441 | struct amdgpu_device *adev, | |
442 | uint32_t vmid, | |
443 | uint32_t trap_override, | |
444 | uint32_t trap_mask_bits, | |
445 | uint32_t trap_mask_request, | |
446 | uint32_t *trap_mask_prev, | |
447 | uint32_t kfd_dbg_trap_cntl_prev) | |
448 | ||
449 | { | |
450 | uint32_t data = 0; | |
451 | ||
452 | *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); | |
453 | ||
454 | data = (trap_mask_bits & trap_mask_request) | | |
455 | (*trap_mask_prev & ~trap_mask_request); | |
456 | data = trap_mask_map_sw_to_hw(data); | |
457 | ||
458 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); | |
459 | data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); | |
460 | ||
461 | return data; | |
462 | } | |
463 | ||
464 | #define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) | |
465 | static uint32_t kgd_gfx_v9_4_3_set_address_watch( | |
466 | struct amdgpu_device *adev, | |
467 | uint64_t watch_address, | |
468 | uint32_t watch_address_mask, | |
469 | uint32_t watch_id, | |
470 | uint32_t watch_mode, | |
471 | uint32_t debug_vmid, | |
472 | uint32_t inst) | |
473 | { | |
474 | uint32_t watch_address_high; | |
475 | uint32_t watch_address_low; | |
476 | uint32_t watch_address_cntl; | |
477 | ||
478 | watch_address_cntl = 0; | |
479 | watch_address_low = lower_32_bits(watch_address); | |
480 | watch_address_high = upper_32_bits(watch_address) & 0xffff; | |
481 | ||
482 | watch_address_cntl = REG_SET_FIELD(watch_address_cntl, | |
483 | TCP_WATCH0_CNTL, | |
484 | MODE, | |
485 | watch_mode); | |
486 | ||
487 | watch_address_cntl = REG_SET_FIELD(watch_address_cntl, | |
488 | TCP_WATCH0_CNTL, | |
489 | MASK, | |
490 | watch_address_mask >> 7); | |
491 | ||
492 | watch_address_cntl = REG_SET_FIELD(watch_address_cntl, | |
493 | TCP_WATCH0_CNTL, | |
494 | VALID, | |
495 | 1); | |
496 | ||
497 | WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), | |
498 | regTCP_WATCH0_ADDR_H) + | |
499 | (watch_id * TCP_WATCH_STRIDE)), | |
500 | watch_address_high); | |
501 | ||
502 | WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), | |
503 | regTCP_WATCH0_ADDR_L) + | |
504 | (watch_id * TCP_WATCH_STRIDE)), | |
505 | watch_address_low); | |
506 | ||
507 | return watch_address_cntl; | |
508 | } | |
509 | ||
510 | static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev, | |
511 | uint32_t watch_id) | |
512 | { | |
513 | return 0; | |
514 | } | |
515 | ||
f544afac AL |
516 | const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { |
517 | .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, | |
518 | .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, | |
519 | .init_interrupts = kgd_gfx_v9_init_interrupts, | |
520 | .hqd_load = kgd_gfx_v9_4_3_hqd_load, | |
521 | .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, | |
a805889a | 522 | .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load, |
f544afac | 523 | .hqd_dump = kgd_gfx_v9_hqd_dump, |
a805889a | 524 | .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump, |
f544afac | 525 | .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, |
a805889a | 526 | .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied, |
f544afac | 527 | .hqd_destroy = kgd_gfx_v9_hqd_destroy, |
a805889a | 528 | .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy, |
f544afac AL |
529 | .wave_control_execute = kgd_gfx_v9_wave_control_execute, |
530 | .get_atc_vmid_pasid_mapping_info = | |
531 | kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, | |
532 | .set_vm_context_page_table_base = | |
533 | kgd_gfx_v9_set_vm_context_page_table_base, | |
c4cde735 | 534 | .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, |
f544afac | 535 | .program_trap_handler_settings = |
036e348f EH |
536 | kgd_gfx_v9_program_trap_handler_settings, |
537 | .build_grace_period_packet_info = | |
538 | kgd_gfx_v9_build_grace_period_packet_info, | |
539 | .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, | |
540 | .enable_debug_trap = kgd_aldebaran_enable_debug_trap, | |
541 | .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, | |
542 | .validate_trap_override_request = | |
543 | kgd_gfx_v9_4_3_validate_trap_override_request, | |
544 | .set_wave_launch_trap_override = | |
545 | kgd_gfx_v9_4_3_set_wave_launch_trap_override, | |
546 | .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, | |
547 | .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, | |
548 | .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch | |
f544afac | 549 | }; |