]> git.ipfire.org Git - thirdparty/linux.git/blob - drivers/gpu/drm/msm/adreno/a5xx_gpu.c
Merge tag 'drm-misc-next-2020-06-19' of git://anongit.freedesktop.org/drm/drm-misc...
[thirdparty/linux.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 uint32_t wptr;
26 unsigned long flags;
27
28 spin_lock_irqsave(&ring->lock, flags);
29
30 /* Copy the shadow to the actual register */
31 ring->cur = ring->next;
32
33 /* Make sure to wrap wptr if we need to */
34 wptr = get_wptr(ring);
35
36 spin_unlock_irqrestore(&ring->lock, flags);
37
38 /* Make sure everything is posted before making a decision */
39 mb();
40
41 /* Update HW if this is the current ring and we are not in preempt */
42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 struct msm_file_private *ctx)
48 {
49 struct msm_drm_private *priv = gpu->dev->dev_private;
50 struct msm_ringbuffer *ring = submit->ring;
51 struct msm_gem_object *obj;
52 uint32_t *ptr, dwords;
53 unsigned int i;
54
55 for (i = 0; i < submit->nr_cmds; i++) {
56 switch (submit->cmd[i].type) {
57 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 break;
59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 if (priv->lastctx == ctx)
61 break;
62 /* fall-thru */
63 case MSM_SUBMIT_CMD_BUF:
64 /* copy commands into RB: */
65 obj = submit->bos[submit->cmd[i].idx].obj;
66 dwords = submit->cmd[i].size;
67
68 ptr = msm_gem_get_vaddr(&obj->base);
69
70 /* _get_vaddr() shouldn't fail at this point,
71 * since we've already mapped it once in
72 * submit_reloc()
73 */
74 if (WARN_ON(!ptr))
75 return;
76
77 for (i = 0; i < dwords; i++) {
78 /* normally the OUT_PKTn() would wait
79 * for space for the packet. But since
80 * we just OUT_RING() the whole thing,
81 * need to call adreno_wait_ring()
82 * ourself:
83 */
84 adreno_wait_ring(ring, 1);
85 OUT_RING(ring, ptr[i]);
86 }
87
88 msm_gem_put_vaddr(&obj->base);
89
90 break;
91 }
92 }
93
94 a5xx_flush(gpu, ring);
95 a5xx_preempt_trigger(gpu);
96
97 /* we might not necessarily have a cmd from userspace to
98 * trigger an event to know that submit has completed, so
99 * do this manually:
100 */
101 a5xx_idle(gpu, ring);
102 ring->memptrs->fence = submit->seqno;
103 msm_gpu_retire(gpu);
104 }
105
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 struct msm_file_private *ctx)
108 {
109 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 struct msm_drm_private *priv = gpu->dev->dev_private;
112 struct msm_ringbuffer *ring = submit->ring;
113 unsigned int i, ibs = 0;
114
115 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 priv->lastctx = NULL;
117 a5xx_submit_in_rb(gpu, submit, ctx);
118 return;
119 }
120
121 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 OUT_RING(ring, 0x02);
123
124 /* Turn off protected mode to write to special registers */
125 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 OUT_RING(ring, 0);
127
128 /* Set the save preemption record for the ring/command */
129 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132
133 /* Turn back on protected mode */
134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 OUT_RING(ring, 1);
136
137 /* Enable local preemption for finegrain preemption */
138 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 OUT_RING(ring, 0x02);
140
141 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 OUT_RING(ring, 0x02);
144
145 /* Submit the commands */
146 for (i = 0; i < submit->nr_cmds; i++) {
147 switch (submit->cmd[i].type) {
148 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 break;
150 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 if (priv->lastctx == ctx)
152 break;
153 /* fall-thru */
154 case MSM_SUBMIT_CMD_BUF:
155 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 OUT_RING(ring, submit->cmd[i].size);
159 ibs++;
160 break;
161 }
162 }
163
164 /*
165 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 * are done rendering - otherwise a lucky preemption would start
167 * replaying from the last checkpoint
168 */
169 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 OUT_RING(ring, 0);
171 OUT_RING(ring, 0);
172 OUT_RING(ring, 0);
173 OUT_RING(ring, 0);
174 OUT_RING(ring, 0);
175
176 /* Turn off IB level preemptions */
177 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 OUT_RING(ring, 0x01);
179
180 /* Write the fence to the scratch register */
181 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 OUT_RING(ring, submit->seqno);
183
184 /*
185 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 * timestamp is written to the memory and then triggers the interrupt
187 */
188 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192 OUT_RING(ring, submit->seqno);
193
194 /* Yield the floor on command completion */
195 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
196 /*
197 * If dword[2:1] are non zero, they specify an address for the CP to
198 * write the value of dword[3] to on preemption complete. Write 0 to
199 * skip the write
200 */
201 OUT_RING(ring, 0x00);
202 OUT_RING(ring, 0x00);
203 /* Data value - not used if the address above is 0 */
204 OUT_RING(ring, 0x01);
205 /* Set bit 0 to trigger an interrupt on preempt complete */
206 OUT_RING(ring, 0x01);
207
208 a5xx_flush(gpu, ring);
209
210 /* Check to see if we need to start preemption */
211 a5xx_preempt_trigger(gpu);
212 }
213
214 static const struct {
215 u32 offset;
216 u32 value;
217 } a5xx_hwcg[] = {
218 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 };
311
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
313 {
314 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 unsigned int i;
316
317 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318 gpu_write(gpu, a5xx_hwcg[i].offset,
319 state ? a5xx_hwcg[i].value : 0);
320
321 if (adreno_is_a540(adreno_gpu)) {
322 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324 }
325
326 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 }
329
330 static int a5xx_me_init(struct msm_gpu *gpu)
331 {
332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 struct msm_ringbuffer *ring = gpu->rb[0];
334
335 OUT_PKT7(ring, CP_ME_INIT, 8);
336
337 OUT_RING(ring, 0x0000002F);
338
339 /* Enable multiple hardware contexts */
340 OUT_RING(ring, 0x00000003);
341
342 /* Enable error detection */
343 OUT_RING(ring, 0x20000000);
344
345 /* Don't enable header dump */
346 OUT_RING(ring, 0x00000000);
347 OUT_RING(ring, 0x00000000);
348
349 /* Specify workarounds for various microcode issues */
350 if (adreno_is_a530(adreno_gpu)) {
351 /* Workaround for token end syncs
352 * Force a WFI after every direct-render 3D mode draw and every
353 * 2D mode 3 draw
354 */
355 OUT_RING(ring, 0x0000000B);
356 } else if (adreno_is_a510(adreno_gpu)) {
357 /* Workaround for token and syncs */
358 OUT_RING(ring, 0x00000001);
359 } else {
360 /* No workarounds enabled */
361 OUT_RING(ring, 0x00000000);
362 }
363
364 OUT_RING(ring, 0x00000000);
365 OUT_RING(ring, 0x00000000);
366
367 gpu->funcs->flush(gpu, ring);
368 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
369 }
370
371 static int a5xx_preempt_start(struct msm_gpu *gpu)
372 {
373 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
375 struct msm_ringbuffer *ring = gpu->rb[0];
376
377 if (gpu->nr_rings == 1)
378 return 0;
379
380 /* Turn off protected mode to write to special registers */
381 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
382 OUT_RING(ring, 0);
383
384 /* Set the save preemption record for the ring/command */
385 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
386 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
387 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388
389 /* Turn back on protected mode */
390 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
391 OUT_RING(ring, 1);
392
393 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
394 OUT_RING(ring, 0x00);
395
396 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
397 OUT_RING(ring, 0x01);
398
399 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
400 OUT_RING(ring, 0x01);
401
402 /* Yield the floor on command completion */
403 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
404 OUT_RING(ring, 0x00);
405 OUT_RING(ring, 0x00);
406 OUT_RING(ring, 0x01);
407 OUT_RING(ring, 0x01);
408
409 gpu->funcs->flush(gpu, ring);
410
411 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
412 }
413
414 static int a5xx_ucode_init(struct msm_gpu *gpu)
415 {
416 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
417 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
418 int ret;
419
420 if (!a5xx_gpu->pm4_bo) {
421 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
422 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
423
424
425 if (IS_ERR(a5xx_gpu->pm4_bo)) {
426 ret = PTR_ERR(a5xx_gpu->pm4_bo);
427 a5xx_gpu->pm4_bo = NULL;
428 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
429 ret);
430 return ret;
431 }
432
433 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
434 }
435
436 if (!a5xx_gpu->pfp_bo) {
437 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
438 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
439
440 if (IS_ERR(a5xx_gpu->pfp_bo)) {
441 ret = PTR_ERR(a5xx_gpu->pfp_bo);
442 a5xx_gpu->pfp_bo = NULL;
443 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
444 ret);
445 return ret;
446 }
447
448 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
449 }
450
451 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
452 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
453
454 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
455 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
456
457 return 0;
458 }
459
460 #define SCM_GPU_ZAP_SHADER_RESUME 0
461
462 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
463 {
464 int ret;
465
466 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
467 if (ret)
468 DRM_ERROR("%s: zap-shader resume failed: %d\n",
469 gpu->name, ret);
470
471 return ret;
472 }
473
474 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
475 {
476 static bool loaded;
477 int ret;
478
479 /*
480 * If the zap shader is already loaded into memory we just need to kick
481 * the remote processor to reinitialize it
482 */
483 if (loaded)
484 return a5xx_zap_shader_resume(gpu);
485
486 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
487
488 loaded = !ret;
489 return ret;
490 }
491
492 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
493 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
494 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
495 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
496 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
497 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
498 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
499 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
500 A5XX_RBBM_INT_0_MASK_CP_SW | \
501 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
502 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
503 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
504
505 static int a5xx_hw_init(struct msm_gpu *gpu)
506 {
507 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
508 int ret;
509
510 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
511
512 if (adreno_is_a540(adreno_gpu))
513 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
514
515 /* Make all blocks contribute to the GPU BUSY perf counter */
516 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
517
518 /* Enable RBBM error reporting bits */
519 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
520
521 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
522 /*
523 * Mask out the activity signals from RB1-3 to avoid false
524 * positives
525 */
526
527 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
528 0xF0000000);
529 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
530 0xFFFFFFFF);
531 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
532 0xFFFFFFFF);
533 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
534 0xFFFFFFFF);
535 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
536 0xFFFFFFFF);
537 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
538 0xFFFFFFFF);
539 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
540 0xFFFFFFFF);
541 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
542 0xFFFFFFFF);
543 }
544
545 /* Enable fault detection */
546 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
547 (1 << 30) | 0xFFFF);
548
549 /* Turn on performance counters */
550 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
551
552 /* Select CP0 to always count cycles */
553 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
554
555 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
556 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
557
558 /* Increase VFD cache access so LRZ and other data gets evicted less */
559 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
560
561 /* Disable L2 bypass in the UCHE */
562 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
563 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
564 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
565 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
566
567 /* Set the GMEM VA range (0 to gpu->gmem) */
568 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
569 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
570 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
571 0x00100000 + adreno_gpu->gmem - 1);
572 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
573
574 if (adreno_is_a510(adreno_gpu)) {
575 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
576 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
577 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
578 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
579 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
580 (0x200 << 11 | 0x200 << 22));
581 } else {
582 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
583 if (adreno_is_a530(adreno_gpu))
584 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
585 if (adreno_is_a540(adreno_gpu))
586 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
587 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
588 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
589 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
590 (0x400 << 11 | 0x300 << 22));
591 }
592
593 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
594 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
595
596 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
597
598 /* Enable USE_RETENTION_FLOPS */
599 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
600
601 /* Enable ME/PFP split notification */
602 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
603
604 /*
605 * In A5x, CCU can send context_done event of a particular context to
606 * UCHE which ultimately reaches CP even when there is valid
607 * transaction of that context inside CCU. This can let CP to program
608 * config registers, which will make the "valid transaction" inside
609 * CCU to be interpreted differently. This can cause gpu fault. This
610 * bug is fixed in latest A510 revision. To enable this bug fix -
611 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
612 * (disable). For older A510 version this bit is unused.
613 */
614 if (adreno_is_a510(adreno_gpu))
615 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
616
617 /* Enable HWCG */
618 a5xx_set_hwcg(gpu, true);
619
620 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
621
622 /* Set the highest bank bit */
623 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
624 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
625 if (adreno_is_a540(adreno_gpu))
626 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
627
628 /* Protect registers from the CP */
629 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
630
631 /* RBBM */
632 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
633 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
634 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
635 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
636 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
637 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
638
639 /* Content protect */
640 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
641 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
642 16));
643 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
644 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
645
646 /* CP */
647 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
648 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
649 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
650 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
651
652 /* RB */
653 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
654 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
655
656 /* VPC */
657 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
658 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
659
660 /* UCHE */
661 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
662
663 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
664 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
665 ADRENO_PROTECT_RW(0x10000, 0x8000));
666
667 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
668 /*
669 * Disable the trusted memory range - we don't actually supported secure
670 * memory rendering at this point in time and we don't want to block off
671 * part of the virtual memory space.
672 */
673 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
674 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
675 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
676
677 /* Put the GPU into 64 bit by default */
678 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
679 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
680 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
681 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
682 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
683 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
684 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
685 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
686 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
687 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
688 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
689 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
690
691 /*
692 * VPC corner case with local memory load kill leads to corrupt
693 * internal state. Normal Disable does not work for all a5x chips.
694 * So do the following setting to disable it.
695 */
696 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
697 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
698 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
699 }
700
701 ret = adreno_hw_init(gpu);
702 if (ret)
703 return ret;
704
705 a5xx_preempt_hw_init(gpu);
706
707 if (!adreno_is_a510(adreno_gpu))
708 a5xx_gpmu_ucode_init(gpu);
709
710 ret = a5xx_ucode_init(gpu);
711 if (ret)
712 return ret;
713
714 /* Disable the interrupts through the initial bringup stage */
715 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
716
717 /* Clear ME_HALT to start the micro engine */
718 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
719 ret = a5xx_me_init(gpu);
720 if (ret)
721 return ret;
722
723 ret = a5xx_power_init(gpu);
724 if (ret)
725 return ret;
726
727 /*
728 * Send a pipeline event stat to get misbehaving counters to start
729 * ticking correctly
730 */
731 if (adreno_is_a530(adreno_gpu)) {
732 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
733 OUT_RING(gpu->rb[0], 0x0F);
734
735 gpu->funcs->flush(gpu, gpu->rb[0]);
736 if (!a5xx_idle(gpu, gpu->rb[0]))
737 return -EINVAL;
738 }
739
740 /*
741 * If the chip that we are using does support loading one, then
742 * try to load a zap shader into the secure world. If successful
743 * we can use the CP to switch out of secure mode. If not then we
744 * have no resource but to try to switch ourselves out manually. If we
745 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
746 * be blocked and a permissions violation will soon follow.
747 */
748 ret = a5xx_zap_shader_init(gpu);
749 if (!ret) {
750 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
751 OUT_RING(gpu->rb[0], 0x00000000);
752
753 gpu->funcs->flush(gpu, gpu->rb[0]);
754 if (!a5xx_idle(gpu, gpu->rb[0]))
755 return -EINVAL;
756 } else if (ret == -ENODEV) {
757 /*
758 * This device does not use zap shader (but print a warning
759 * just in case someone got their dt wrong.. hopefully they
760 * have a debug UART to realize the error of their ways...
761 * if you mess this up you are about to crash horribly)
762 */
763 dev_warn_once(gpu->dev->dev,
764 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
765 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
766 } else {
767 return ret;
768 }
769
770 /* Last step - yield the ringbuffer */
771 a5xx_preempt_start(gpu);
772
773 return 0;
774 }
775
776 static void a5xx_recover(struct msm_gpu *gpu)
777 {
778 int i;
779
780 adreno_dump_info(gpu);
781
782 for (i = 0; i < 8; i++) {
783 printk("CP_SCRATCH_REG%d: %u\n", i,
784 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
785 }
786
787 if (hang_debug)
788 a5xx_dump(gpu);
789
790 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
791 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
792 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
793 adreno_recover(gpu);
794 }
795
796 static void a5xx_destroy(struct msm_gpu *gpu)
797 {
798 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
799 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
800
801 DBG("%s", gpu->name);
802
803 a5xx_preempt_fini(gpu);
804
805 if (a5xx_gpu->pm4_bo) {
806 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
807 drm_gem_object_put(a5xx_gpu->pm4_bo);
808 }
809
810 if (a5xx_gpu->pfp_bo) {
811 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
812 drm_gem_object_put(a5xx_gpu->pfp_bo);
813 }
814
815 if (a5xx_gpu->gpmu_bo) {
816 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
817 drm_gem_object_put(a5xx_gpu->gpmu_bo);
818 }
819
820 adreno_gpu_cleanup(adreno_gpu);
821 kfree(a5xx_gpu);
822 }
823
824 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
825 {
826 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
827 return false;
828
829 /*
830 * Nearly every abnormality ends up pausing the GPU and triggering a
831 * fault so we can safely just watch for this one interrupt to fire
832 */
833 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
834 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
835 }
836
837 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
838 {
839 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
840 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
841
842 if (ring != a5xx_gpu->cur_ring) {
843 WARN(1, "Tried to idle a non-current ringbuffer\n");
844 return false;
845 }
846
847 /* wait for CP to drain ringbuffer: */
848 if (!adreno_idle(gpu, ring))
849 return false;
850
851 if (spin_until(_a5xx_check_idle(gpu))) {
852 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
853 gpu->name, __builtin_return_address(0),
854 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
855 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
856 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
857 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
858 return false;
859 }
860
861 return true;
862 }
863
864 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
865 {
866 struct msm_gpu *gpu = arg;
867 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
868 iova, flags,
869 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
870 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
871 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
872 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
873
874 return -EFAULT;
875 }
876
877 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
878 {
879 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
880
881 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
882 u32 val;
883
884 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
885
886 /*
887 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
888 * read it twice
889 */
890
891 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
892 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
893
894 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
895 val);
896 }
897
898 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
899 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
900 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
901
902 if (status & A5XX_CP_INT_CP_DMA_ERROR)
903 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
904
905 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
906 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
907
908 dev_err_ratelimited(gpu->dev->dev,
909 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
910 val & (1 << 24) ? "WRITE" : "READ",
911 (val & 0xFFFFF) >> 2, val);
912 }
913
914 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
915 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
916 const char *access[16] = { "reserved", "reserved",
917 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
918 "", "", "me read", "me write", "", "", "crashdump read",
919 "crashdump write" };
920
921 dev_err_ratelimited(gpu->dev->dev,
922 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
923 status & 0xFFFFF, access[(status >> 24) & 0xF],
924 (status & (1 << 31)), status);
925 }
926 }
927
928 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
929 {
930 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
931 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
932
933 dev_err_ratelimited(gpu->dev->dev,
934 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
935 val & (1 << 28) ? "WRITE" : "READ",
936 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
937 (val >> 24) & 0xF);
938
939 /* Clear the error */
940 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
941
942 /* Clear the interrupt */
943 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
944 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
945 }
946
947 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
948 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
949
950 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
951 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
952 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
953
954 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
955 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
956 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
957
958 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
959 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
960 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
961
962 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
963 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
964
965 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
966 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
967 }
968
969 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
970 {
971 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
972
973 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
974
975 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
976 addr);
977 }
978
979 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
980 {
981 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
982 }
983
984 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
985 {
986 struct drm_device *dev = gpu->dev;
987 struct msm_drm_private *priv = dev->dev_private;
988 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
989
990 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
991 ring ? ring->id : -1, ring ? ring->seqno : 0,
992 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
993 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
994 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
995 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
996 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
997 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
998 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
999
1000 /* Turn off the hangcheck timer to keep it from bothering us */
1001 del_timer(&gpu->hangcheck_timer);
1002
1003 queue_work(priv->wq, &gpu->recover_work);
1004 }
1005
1006 #define RBBM_ERROR_MASK \
1007 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1008 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1009 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1010 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1011 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1012 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1013
1014 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1015 {
1016 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1017
1018 /*
1019 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1020 * before the source is cleared the interrupt will storm.
1021 */
1022 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1023 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1024
1025 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1026 if (status & RBBM_ERROR_MASK)
1027 a5xx_rbbm_err_irq(gpu, status);
1028
1029 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1030 a5xx_cp_err_irq(gpu);
1031
1032 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1033 a5xx_fault_detect_irq(gpu);
1034
1035 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1036 a5xx_uche_err_irq(gpu);
1037
1038 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1039 a5xx_gpmu_err_irq(gpu);
1040
1041 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1042 a5xx_preempt_trigger(gpu);
1043 msm_gpu_retire(gpu);
1044 }
1045
1046 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1047 a5xx_preempt_irq(gpu);
1048
1049 return IRQ_HANDLED;
1050 }
1051
1052 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1053 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1054 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1055 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1056 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1057 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1058 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1059 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1060 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1061 };
1062
1063 static const u32 a5xx_registers[] = {
1064 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1065 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1066 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1067 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1068 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1069 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1070 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1071 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1072 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1073 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1074 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1075 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1076 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1077 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1078 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1079 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1080 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1081 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1082 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1083 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1084 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1085 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1086 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1087 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1088 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1089 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1090 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1091 0xAC60, 0xAC60, ~0,
1092 };
1093
1094 static void a5xx_dump(struct msm_gpu *gpu)
1095 {
1096 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1097 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1098 adreno_dump(gpu);
1099 }
1100
1101 static int a5xx_pm_resume(struct msm_gpu *gpu)
1102 {
1103 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1104 int ret;
1105
1106 /* Turn on the core power */
1107 ret = msm_gpu_pm_resume(gpu);
1108 if (ret)
1109 return ret;
1110
1111 if (adreno_is_a510(adreno_gpu)) {
1112 /* Halt the sp_input_clk at HM level */
1113 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1114 a5xx_set_hwcg(gpu, true);
1115 /* Turn on sp_input_clk at HM level */
1116 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1117 return 0;
1118 }
1119
1120 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1121 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1122
1123 /* Wait 3 usecs before polling */
1124 udelay(3);
1125
1126 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1127 (1 << 20), (1 << 20));
1128 if (ret) {
1129 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1130 gpu->name,
1131 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1132 return ret;
1133 }
1134
1135 /* Turn on the SP domain */
1136 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1137 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1138 (1 << 20), (1 << 20));
1139 if (ret)
1140 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1141 gpu->name);
1142
1143 return ret;
1144 }
1145
1146 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1147 {
1148 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1149 u32 mask = 0xf;
1150
1151 /* A510 has 3 XIN ports in VBIF */
1152 if (adreno_is_a510(adreno_gpu))
1153 mask = 0x7;
1154
1155 /* Clear the VBIF pipe before shutting down */
1156 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1157 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1158 mask) == mask);
1159
1160 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1161
1162 /*
1163 * Reset the VBIF before power collapse to avoid issue with FIFO
1164 * entries
1165 */
1166 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1167 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1168
1169 return msm_gpu_pm_suspend(gpu);
1170 }
1171
1172 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1173 {
1174 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1175 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1176
1177 return 0;
1178 }
1179
1180 struct a5xx_crashdumper {
1181 void *ptr;
1182 struct drm_gem_object *bo;
1183 u64 iova;
1184 };
1185
1186 struct a5xx_gpu_state {
1187 struct msm_gpu_state base;
1188 u32 *hlsqregs;
1189 };
1190
1191 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1192 struct a5xx_crashdumper *dumper)
1193 {
1194 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1195 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1196 &dumper->bo, &dumper->iova);
1197
1198 if (!IS_ERR(dumper->ptr))
1199 msm_gem_object_set_name(dumper->bo, "crashdump");
1200
1201 return PTR_ERR_OR_ZERO(dumper->ptr);
1202 }
1203
1204 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1205 struct a5xx_crashdumper *dumper)
1206 {
1207 u32 val;
1208
1209 if (IS_ERR_OR_NULL(dumper->ptr))
1210 return -EINVAL;
1211
1212 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1213 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1214
1215 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1216
1217 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1218 val & 0x04, 100, 10000);
1219 }
1220
1221 /*
1222 * These are a list of the registers that need to be read through the HLSQ
1223 * aperture through the crashdumper. These are not nominally accessible from
1224 * the CPU on a secure platform.
1225 */
1226 static const struct {
1227 u32 type;
1228 u32 regoffset;
1229 u32 count;
1230 } a5xx_hlsq_aperture_regs[] = {
1231 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1232 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1233 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1234 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1235 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1236 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1237 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1238 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1239 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1240 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1241 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1242 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1243 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1244 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1245 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1246 };
1247
1248 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1249 struct a5xx_gpu_state *a5xx_state)
1250 {
1251 struct a5xx_crashdumper dumper = { 0 };
1252 u32 offset, count = 0;
1253 u64 *ptr;
1254 int i;
1255
1256 if (a5xx_crashdumper_init(gpu, &dumper))
1257 return;
1258
1259 /* The script will be written at offset 0 */
1260 ptr = dumper.ptr;
1261
1262 /* Start writing the data at offset 256k */
1263 offset = dumper.iova + (256 * SZ_1K);
1264
1265 /* Count how many additional registers to get from the HLSQ aperture */
1266 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1267 count += a5xx_hlsq_aperture_regs[i].count;
1268
1269 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1270 if (!a5xx_state->hlsqregs)
1271 return;
1272
1273 /* Build the crashdump script */
1274 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1275 u32 type = a5xx_hlsq_aperture_regs[i].type;
1276 u32 c = a5xx_hlsq_aperture_regs[i].count;
1277
1278 /* Write the register to select the desired bank */
1279 *ptr++ = ((u64) type << 8);
1280 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1281 (1 << 21) | 1;
1282
1283 *ptr++ = offset;
1284 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1285 | c;
1286
1287 offset += c * sizeof(u32);
1288 }
1289
1290 /* Write two zeros to close off the script */
1291 *ptr++ = 0;
1292 *ptr++ = 0;
1293
1294 if (a5xx_crashdumper_run(gpu, &dumper)) {
1295 kfree(a5xx_state->hlsqregs);
1296 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1297 return;
1298 }
1299
1300 /* Copy the data from the crashdumper to the state */
1301 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1302 count * sizeof(u32));
1303
1304 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305 }
1306
1307 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1308 {
1309 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1310 GFP_KERNEL);
1311
1312 if (!a5xx_state)
1313 return ERR_PTR(-ENOMEM);
1314
1315 /* Temporarily disable hardware clock gating before reading the hw */
1316 a5xx_set_hwcg(gpu, false);
1317
1318 /* First get the generic state from the adreno core */
1319 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1320
1321 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1322
1323 /* Get the HLSQ regs with the help of the crashdumper */
1324 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1325
1326 a5xx_set_hwcg(gpu, true);
1327
1328 return &a5xx_state->base;
1329 }
1330
1331 static void a5xx_gpu_state_destroy(struct kref *kref)
1332 {
1333 struct msm_gpu_state *state = container_of(kref,
1334 struct msm_gpu_state, ref);
1335 struct a5xx_gpu_state *a5xx_state = container_of(state,
1336 struct a5xx_gpu_state, base);
1337
1338 kfree(a5xx_state->hlsqregs);
1339
1340 adreno_gpu_state_destroy(state);
1341 kfree(a5xx_state);
1342 }
1343
1344 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1345 {
1346 if (IS_ERR_OR_NULL(state))
1347 return 1;
1348
1349 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1350 }
1351
1352
1353 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1354 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1355 struct drm_printer *p)
1356 {
1357 int i, j;
1358 u32 pos = 0;
1359 struct a5xx_gpu_state *a5xx_state = container_of(state,
1360 struct a5xx_gpu_state, base);
1361
1362 if (IS_ERR_OR_NULL(state))
1363 return;
1364
1365 adreno_show(gpu, state, p);
1366
1367 /* Dump the additional a5xx HLSQ registers */
1368 if (!a5xx_state->hlsqregs)
1369 return;
1370
1371 drm_printf(p, "registers-hlsq:\n");
1372
1373 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1374 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1375 u32 c = a5xx_hlsq_aperture_regs[i].count;
1376
1377 for (j = 0; j < c; j++, pos++, o++) {
1378 /*
1379 * To keep the crashdump simple we pull the entire range
1380 * for each register type but not all of the registers
1381 * in the range are valid. Fortunately invalid registers
1382 * stick out like a sore thumb with a value of
1383 * 0xdeadbeef
1384 */
1385 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1386 continue;
1387
1388 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1389 o << 2, a5xx_state->hlsqregs[pos]);
1390 }
1391 }
1392 }
1393 #endif
1394
1395 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1396 {
1397 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1398 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1399
1400 return a5xx_gpu->cur_ring;
1401 }
1402
1403 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1404 {
1405 u64 busy_cycles, busy_time;
1406
1407 /* Only read the gpu busy if the hardware is already active */
1408 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1409 return 0;
1410
1411 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1412 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1413
1414 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1415 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1416
1417 gpu->devfreq.busy_cycles = busy_cycles;
1418
1419 pm_runtime_put(&gpu->pdev->dev);
1420
1421 if (WARN_ON(busy_time > ~0LU))
1422 return ~0LU;
1423
1424 return (unsigned long)busy_time;
1425 }
1426
1427 static const struct adreno_gpu_funcs funcs = {
1428 .base = {
1429 .get_param = adreno_get_param,
1430 .hw_init = a5xx_hw_init,
1431 .pm_suspend = a5xx_pm_suspend,
1432 .pm_resume = a5xx_pm_resume,
1433 .recover = a5xx_recover,
1434 .submit = a5xx_submit,
1435 .flush = a5xx_flush,
1436 .active_ring = a5xx_active_ring,
1437 .irq = a5xx_irq,
1438 .destroy = a5xx_destroy,
1439 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1440 .show = a5xx_show,
1441 #endif
1442 #if defined(CONFIG_DEBUG_FS)
1443 .debugfs_init = a5xx_debugfs_init,
1444 #endif
1445 .gpu_busy = a5xx_gpu_busy,
1446 .gpu_state_get = a5xx_gpu_state_get,
1447 .gpu_state_put = a5xx_gpu_state_put,
1448 .create_address_space = adreno_iommu_create_address_space,
1449 },
1450 .get_timestamp = a5xx_get_timestamp,
1451 };
1452
1453 static void check_speed_bin(struct device *dev)
1454 {
1455 struct nvmem_cell *cell;
1456 u32 val;
1457
1458 /*
1459 * If the OPP table specifies a opp-supported-hw property then we have
1460 * to set something with dev_pm_opp_set_supported_hw() or the table
1461 * doesn't get populated so pick an arbitrary value that should
1462 * ensure the default frequencies are selected but not conflict with any
1463 * actual bins
1464 */
1465 val = 0x80;
1466
1467 cell = nvmem_cell_get(dev, "speed_bin");
1468
1469 if (!IS_ERR(cell)) {
1470 void *buf = nvmem_cell_read(cell, NULL);
1471
1472 if (!IS_ERR(buf)) {
1473 u8 bin = *((u8 *) buf);
1474
1475 val = (1 << bin);
1476 kfree(buf);
1477 }
1478
1479 nvmem_cell_put(cell);
1480 }
1481
1482 dev_pm_opp_set_supported_hw(dev, &val, 1);
1483 }
1484
1485 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1486 {
1487 struct msm_drm_private *priv = dev->dev_private;
1488 struct platform_device *pdev = priv->gpu_pdev;
1489 struct a5xx_gpu *a5xx_gpu = NULL;
1490 struct adreno_gpu *adreno_gpu;
1491 struct msm_gpu *gpu;
1492 int ret;
1493
1494 if (!pdev) {
1495 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1496 return ERR_PTR(-ENXIO);
1497 }
1498
1499 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1500 if (!a5xx_gpu)
1501 return ERR_PTR(-ENOMEM);
1502
1503 adreno_gpu = &a5xx_gpu->base;
1504 gpu = &adreno_gpu->base;
1505
1506 adreno_gpu->registers = a5xx_registers;
1507 adreno_gpu->reg_offsets = a5xx_register_offsets;
1508
1509 a5xx_gpu->lm_leakage = 0x4E001A;
1510
1511 check_speed_bin(&pdev->dev);
1512
1513 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1514 if (ret) {
1515 a5xx_destroy(&(a5xx_gpu->base.base));
1516 return ERR_PTR(ret);
1517 }
1518
1519 if (gpu->aspace)
1520 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1521
1522 /* Set up the preemption specific bits and pieces for each ringbuffer */
1523 a5xx_preempt_init(gpu);
1524
1525 return gpu;
1526 }