]> git.ipfire.org Git - people/ms/linux.git/blame - drivers/gpu/drm/radeon/cik.c
drm/radeon: add structs to store uvd clock voltage deps
[people/ms/linux.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
8cc1a532
AD
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
6f2043ce 29#include "radeon_asic.h"
8cc1a532
AD
30#include "cikd.h"
31#include "atom.h"
841cf442 32#include "cik_blit_shaders.h"
8c68e393 33#include "radeon_ucode.h"
22c775ce 34#include "clearstate_ci.h"
02c81327
AD
35
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 42MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
43MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
44MODULE_FIRMWARE("radeon/KAVERI_me.bin");
45MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
46MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
47MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 48MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
49MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
50MODULE_FIRMWARE("radeon/KABINI_me.bin");
51MODULE_FIRMWARE("radeon/KABINI_ce.bin");
52MODULE_FIRMWARE("radeon/KABINI_mec.bin");
53MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 54MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 55
a59781bb
AD
56extern int r600_ih_ring_alloc(struct radeon_device *rdev);
57extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
58extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
59extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 60extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1fd11777
AD
61extern void sumo_rlc_fini(struct radeon_device *rdev);
62extern int sumo_rlc_init(struct radeon_device *rdev);
1c49165d 63extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
866d83de 64extern void si_rlc_reset(struct radeon_device *rdev);
22c775ce 65extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
cc066715 66static void cik_rlc_stop(struct radeon_device *rdev);
8a7cd276 67static void cik_pcie_gen3_enable(struct radeon_device *rdev);
7235711a 68static void cik_program_aspm(struct radeon_device *rdev);
22c775ce
AD
69static void cik_init_pg(struct radeon_device *rdev);
70static void cik_init_cg(struct radeon_device *rdev);
6f2043ce 71
6e2c3c0a
AD
72/*
73 * Indirect registers accessor
74 */
75u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
76{
77 u32 r;
78
79 WREG32(PCIE_INDEX, reg);
80 (void)RREG32(PCIE_INDEX);
81 r = RREG32(PCIE_DATA);
82 return r;
83}
84
85void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
86{
87 WREG32(PCIE_INDEX, reg);
88 (void)RREG32(PCIE_INDEX);
89 WREG32(PCIE_DATA, v);
90 (void)RREG32(PCIE_DATA);
91}
92
22c775ce
AD
93static const u32 spectre_rlc_save_restore_register_list[] =
94{
95 (0x0e00 << 16) | (0xc12c >> 2),
96 0x00000000,
97 (0x0e00 << 16) | (0xc140 >> 2),
98 0x00000000,
99 (0x0e00 << 16) | (0xc150 >> 2),
100 0x00000000,
101 (0x0e00 << 16) | (0xc15c >> 2),
102 0x00000000,
103 (0x0e00 << 16) | (0xc168 >> 2),
104 0x00000000,
105 (0x0e00 << 16) | (0xc170 >> 2),
106 0x00000000,
107 (0x0e00 << 16) | (0xc178 >> 2),
108 0x00000000,
109 (0x0e00 << 16) | (0xc204 >> 2),
110 0x00000000,
111 (0x0e00 << 16) | (0xc2b4 >> 2),
112 0x00000000,
113 (0x0e00 << 16) | (0xc2b8 >> 2),
114 0x00000000,
115 (0x0e00 << 16) | (0xc2bc >> 2),
116 0x00000000,
117 (0x0e00 << 16) | (0xc2c0 >> 2),
118 0x00000000,
119 (0x0e00 << 16) | (0x8228 >> 2),
120 0x00000000,
121 (0x0e00 << 16) | (0x829c >> 2),
122 0x00000000,
123 (0x0e00 << 16) | (0x869c >> 2),
124 0x00000000,
125 (0x0600 << 16) | (0x98f4 >> 2),
126 0x00000000,
127 (0x0e00 << 16) | (0x98f8 >> 2),
128 0x00000000,
129 (0x0e00 << 16) | (0x9900 >> 2),
130 0x00000000,
131 (0x0e00 << 16) | (0xc260 >> 2),
132 0x00000000,
133 (0x0e00 << 16) | (0x90e8 >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0x3c000 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0x3c00c >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0x8c1c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0x9700 >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0xcd20 >> 2),
144 0x00000000,
145 (0x4e00 << 16) | (0xcd20 >> 2),
146 0x00000000,
147 (0x5e00 << 16) | (0xcd20 >> 2),
148 0x00000000,
149 (0x6e00 << 16) | (0xcd20 >> 2),
150 0x00000000,
151 (0x7e00 << 16) | (0xcd20 >> 2),
152 0x00000000,
153 (0x8e00 << 16) | (0xcd20 >> 2),
154 0x00000000,
155 (0x9e00 << 16) | (0xcd20 >> 2),
156 0x00000000,
157 (0xae00 << 16) | (0xcd20 >> 2),
158 0x00000000,
159 (0xbe00 << 16) | (0xcd20 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x89bc >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0x8900 >> 2),
164 0x00000000,
165 0x3,
166 (0x0e00 << 16) | (0xc130 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc134 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc1fc >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc208 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc264 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0xc268 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0xc26c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0xc270 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xc274 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0xc278 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc27c >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc280 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0xc284 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0xc288 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0xc28c >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0xc290 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0xc294 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc298 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0xc29c >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0xc2a0 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc2a4 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc2a8 >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc2ac >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc2b0 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0x301d0 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0x30238 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0x30250 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0x30254 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0x30258 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0x3025c >> 2),
225 0x00000000,
226 (0x4e00 << 16) | (0xc900 >> 2),
227 0x00000000,
228 (0x5e00 << 16) | (0xc900 >> 2),
229 0x00000000,
230 (0x6e00 << 16) | (0xc900 >> 2),
231 0x00000000,
232 (0x7e00 << 16) | (0xc900 >> 2),
233 0x00000000,
234 (0x8e00 << 16) | (0xc900 >> 2),
235 0x00000000,
236 (0x9e00 << 16) | (0xc900 >> 2),
237 0x00000000,
238 (0xae00 << 16) | (0xc900 >> 2),
239 0x00000000,
240 (0xbe00 << 16) | (0xc900 >> 2),
241 0x00000000,
242 (0x4e00 << 16) | (0xc904 >> 2),
243 0x00000000,
244 (0x5e00 << 16) | (0xc904 >> 2),
245 0x00000000,
246 (0x6e00 << 16) | (0xc904 >> 2),
247 0x00000000,
248 (0x7e00 << 16) | (0xc904 >> 2),
249 0x00000000,
250 (0x8e00 << 16) | (0xc904 >> 2),
251 0x00000000,
252 (0x9e00 << 16) | (0xc904 >> 2),
253 0x00000000,
254 (0xae00 << 16) | (0xc904 >> 2),
255 0x00000000,
256 (0xbe00 << 16) | (0xc904 >> 2),
257 0x00000000,
258 (0x4e00 << 16) | (0xc908 >> 2),
259 0x00000000,
260 (0x5e00 << 16) | (0xc908 >> 2),
261 0x00000000,
262 (0x6e00 << 16) | (0xc908 >> 2),
263 0x00000000,
264 (0x7e00 << 16) | (0xc908 >> 2),
265 0x00000000,
266 (0x8e00 << 16) | (0xc908 >> 2),
267 0x00000000,
268 (0x9e00 << 16) | (0xc908 >> 2),
269 0x00000000,
270 (0xae00 << 16) | (0xc908 >> 2),
271 0x00000000,
272 (0xbe00 << 16) | (0xc908 >> 2),
273 0x00000000,
274 (0x4e00 << 16) | (0xc90c >> 2),
275 0x00000000,
276 (0x5e00 << 16) | (0xc90c >> 2),
277 0x00000000,
278 (0x6e00 << 16) | (0xc90c >> 2),
279 0x00000000,
280 (0x7e00 << 16) | (0xc90c >> 2),
281 0x00000000,
282 (0x8e00 << 16) | (0xc90c >> 2),
283 0x00000000,
284 (0x9e00 << 16) | (0xc90c >> 2),
285 0x00000000,
286 (0xae00 << 16) | (0xc90c >> 2),
287 0x00000000,
288 (0xbe00 << 16) | (0xc90c >> 2),
289 0x00000000,
290 (0x4e00 << 16) | (0xc910 >> 2),
291 0x00000000,
292 (0x5e00 << 16) | (0xc910 >> 2),
293 0x00000000,
294 (0x6e00 << 16) | (0xc910 >> 2),
295 0x00000000,
296 (0x7e00 << 16) | (0xc910 >> 2),
297 0x00000000,
298 (0x8e00 << 16) | (0xc910 >> 2),
299 0x00000000,
300 (0x9e00 << 16) | (0xc910 >> 2),
301 0x00000000,
302 (0xae00 << 16) | (0xc910 >> 2),
303 0x00000000,
304 (0xbe00 << 16) | (0xc910 >> 2),
305 0x00000000,
306 (0x0e00 << 16) | (0xc99c >> 2),
307 0x00000000,
308 (0x0e00 << 16) | (0x9834 >> 2),
309 0x00000000,
310 (0x0000 << 16) | (0x30f00 >> 2),
311 0x00000000,
312 (0x0001 << 16) | (0x30f00 >> 2),
313 0x00000000,
314 (0x0000 << 16) | (0x30f04 >> 2),
315 0x00000000,
316 (0x0001 << 16) | (0x30f04 >> 2),
317 0x00000000,
318 (0x0000 << 16) | (0x30f08 >> 2),
319 0x00000000,
320 (0x0001 << 16) | (0x30f08 >> 2),
321 0x00000000,
322 (0x0000 << 16) | (0x30f0c >> 2),
323 0x00000000,
324 (0x0001 << 16) | (0x30f0c >> 2),
325 0x00000000,
326 (0x0600 << 16) | (0x9b7c >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0x8a14 >> 2),
329 0x00000000,
330 (0x0e00 << 16) | (0x8a18 >> 2),
331 0x00000000,
332 (0x0600 << 16) | (0x30a00 >> 2),
333 0x00000000,
334 (0x0e00 << 16) | (0x8bf0 >> 2),
335 0x00000000,
336 (0x0e00 << 16) | (0x8bcc >> 2),
337 0x00000000,
338 (0x0e00 << 16) | (0x8b24 >> 2),
339 0x00000000,
340 (0x0e00 << 16) | (0x30a04 >> 2),
341 0x00000000,
342 (0x0600 << 16) | (0x30a10 >> 2),
343 0x00000000,
344 (0x0600 << 16) | (0x30a14 >> 2),
345 0x00000000,
346 (0x0600 << 16) | (0x30a18 >> 2),
347 0x00000000,
348 (0x0600 << 16) | (0x30a2c >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0xc700 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0xc704 >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0xc708 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0xc768 >> 2),
357 0x00000000,
358 (0x0400 << 16) | (0xc770 >> 2),
359 0x00000000,
360 (0x0400 << 16) | (0xc774 >> 2),
361 0x00000000,
362 (0x0400 << 16) | (0xc778 >> 2),
363 0x00000000,
364 (0x0400 << 16) | (0xc77c >> 2),
365 0x00000000,
366 (0x0400 << 16) | (0xc780 >> 2),
367 0x00000000,
368 (0x0400 << 16) | (0xc784 >> 2),
369 0x00000000,
370 (0x0400 << 16) | (0xc788 >> 2),
371 0x00000000,
372 (0x0400 << 16) | (0xc78c >> 2),
373 0x00000000,
374 (0x0400 << 16) | (0xc798 >> 2),
375 0x00000000,
376 (0x0400 << 16) | (0xc79c >> 2),
377 0x00000000,
378 (0x0400 << 16) | (0xc7a0 >> 2),
379 0x00000000,
380 (0x0400 << 16) | (0xc7a4 >> 2),
381 0x00000000,
382 (0x0400 << 16) | (0xc7a8 >> 2),
383 0x00000000,
384 (0x0400 << 16) | (0xc7ac >> 2),
385 0x00000000,
386 (0x0400 << 16) | (0xc7b0 >> 2),
387 0x00000000,
388 (0x0400 << 16) | (0xc7b4 >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0x9100 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0x3c010 >> 2),
393 0x00000000,
394 (0x0e00 << 16) | (0x92a8 >> 2),
395 0x00000000,
396 (0x0e00 << 16) | (0x92ac >> 2),
397 0x00000000,
398 (0x0e00 << 16) | (0x92b4 >> 2),
399 0x00000000,
400 (0x0e00 << 16) | (0x92b8 >> 2),
401 0x00000000,
402 (0x0e00 << 16) | (0x92bc >> 2),
403 0x00000000,
404 (0x0e00 << 16) | (0x92c0 >> 2),
405 0x00000000,
406 (0x0e00 << 16) | (0x92c4 >> 2),
407 0x00000000,
408 (0x0e00 << 16) | (0x92c8 >> 2),
409 0x00000000,
410 (0x0e00 << 16) | (0x92cc >> 2),
411 0x00000000,
412 (0x0e00 << 16) | (0x92d0 >> 2),
413 0x00000000,
414 (0x0e00 << 16) | (0x8c00 >> 2),
415 0x00000000,
416 (0x0e00 << 16) | (0x8c04 >> 2),
417 0x00000000,
418 (0x0e00 << 16) | (0x8c20 >> 2),
419 0x00000000,
420 (0x0e00 << 16) | (0x8c38 >> 2),
421 0x00000000,
422 (0x0e00 << 16) | (0x8c3c >> 2),
423 0x00000000,
424 (0x0e00 << 16) | (0xae00 >> 2),
425 0x00000000,
426 (0x0e00 << 16) | (0x9604 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0xac08 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0xac0c >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0xac10 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0xac14 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0xac58 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0xac68 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0xac6c >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0xac70 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0xac74 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0xac78 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0xac7c >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0xac80 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0xac84 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0xac88 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0xac8c >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x970c >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x9714 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0x9718 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x971c >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0x31068 >> 2),
467 0x00000000,
468 (0x4e00 << 16) | (0x31068 >> 2),
469 0x00000000,
470 (0x5e00 << 16) | (0x31068 >> 2),
471 0x00000000,
472 (0x6e00 << 16) | (0x31068 >> 2),
473 0x00000000,
474 (0x7e00 << 16) | (0x31068 >> 2),
475 0x00000000,
476 (0x8e00 << 16) | (0x31068 >> 2),
477 0x00000000,
478 (0x9e00 << 16) | (0x31068 >> 2),
479 0x00000000,
480 (0xae00 << 16) | (0x31068 >> 2),
481 0x00000000,
482 (0xbe00 << 16) | (0x31068 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xcd10 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xcd14 >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0x88b0 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0x88b4 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0x88b8 >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0x88bc >> 2),
495 0x00000000,
496 (0x0400 << 16) | (0x89c0 >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0x88c4 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0x88c8 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0x88d0 >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x88d4 >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x88d8 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x8980 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x30938 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x3093c >> 2),
513 0x00000000,
514 (0x0e00 << 16) | (0x30940 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x89a0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x30900 >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x30904 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x89b4 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0x3c210 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x3c214 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x3c218 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x8904 >> 2),
531 0x00000000,
532 0x5,
533 (0x0e00 << 16) | (0x8c28 >> 2),
534 (0x0e00 << 16) | (0x8c2c >> 2),
535 (0x0e00 << 16) | (0x8c30 >> 2),
536 (0x0e00 << 16) | (0x8c34 >> 2),
537 (0x0e00 << 16) | (0x9600 >> 2),
538};
539
540static const u32 kalindi_rlc_save_restore_register_list[] =
541{
542 (0x0e00 << 16) | (0xc12c >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0xc140 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0xc150 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0xc15c >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0xc168 >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0xc170 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0xc204 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0xc2b4 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0xc2b8 >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0xc2bc >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0xc2c0 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x8228 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x829c >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x869c >> 2),
569 0x00000000,
570 (0x0600 << 16) | (0x98f4 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0x98f8 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0x9900 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0xc260 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0x90e8 >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0x3c000 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x3c00c >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x8c1c >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x9700 >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0xcd20 >> 2),
589 0x00000000,
590 (0x4e00 << 16) | (0xcd20 >> 2),
591 0x00000000,
592 (0x5e00 << 16) | (0xcd20 >> 2),
593 0x00000000,
594 (0x6e00 << 16) | (0xcd20 >> 2),
595 0x00000000,
596 (0x7e00 << 16) | (0xcd20 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x89bc >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x8900 >> 2),
601 0x00000000,
602 0x3,
603 (0x0e00 << 16) | (0xc130 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xc134 >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0xc1fc >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xc208 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0xc264 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc268 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc26c >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc270 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc274 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xc28c >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc290 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc294 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc298 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc2a0 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc2a4 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc2a8 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xc2ac >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x301d0 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x30238 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x30250 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x30254 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0x30258 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x3025c >> 2),
648 0x00000000,
649 (0x4e00 << 16) | (0xc900 >> 2),
650 0x00000000,
651 (0x5e00 << 16) | (0xc900 >> 2),
652 0x00000000,
653 (0x6e00 << 16) | (0xc900 >> 2),
654 0x00000000,
655 (0x7e00 << 16) | (0xc900 >> 2),
656 0x00000000,
657 (0x4e00 << 16) | (0xc904 >> 2),
658 0x00000000,
659 (0x5e00 << 16) | (0xc904 >> 2),
660 0x00000000,
661 (0x6e00 << 16) | (0xc904 >> 2),
662 0x00000000,
663 (0x7e00 << 16) | (0xc904 >> 2),
664 0x00000000,
665 (0x4e00 << 16) | (0xc908 >> 2),
666 0x00000000,
667 (0x5e00 << 16) | (0xc908 >> 2),
668 0x00000000,
669 (0x6e00 << 16) | (0xc908 >> 2),
670 0x00000000,
671 (0x7e00 << 16) | (0xc908 >> 2),
672 0x00000000,
673 (0x4e00 << 16) | (0xc90c >> 2),
674 0x00000000,
675 (0x5e00 << 16) | (0xc90c >> 2),
676 0x00000000,
677 (0x6e00 << 16) | (0xc90c >> 2),
678 0x00000000,
679 (0x7e00 << 16) | (0xc90c >> 2),
680 0x00000000,
681 (0x4e00 << 16) | (0xc910 >> 2),
682 0x00000000,
683 (0x5e00 << 16) | (0xc910 >> 2),
684 0x00000000,
685 (0x6e00 << 16) | (0xc910 >> 2),
686 0x00000000,
687 (0x7e00 << 16) | (0xc910 >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0xc99c >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x9834 >> 2),
692 0x00000000,
693 (0x0000 << 16) | (0x30f00 >> 2),
694 0x00000000,
695 (0x0000 << 16) | (0x30f04 >> 2),
696 0x00000000,
697 (0x0000 << 16) | (0x30f08 >> 2),
698 0x00000000,
699 (0x0000 << 16) | (0x30f0c >> 2),
700 0x00000000,
701 (0x0600 << 16) | (0x9b7c >> 2),
702 0x00000000,
703 (0x0e00 << 16) | (0x8a14 >> 2),
704 0x00000000,
705 (0x0e00 << 16) | (0x8a18 >> 2),
706 0x00000000,
707 (0x0600 << 16) | (0x30a00 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0x8bf0 >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x8bcc >> 2),
712 0x00000000,
713 (0x0e00 << 16) | (0x8b24 >> 2),
714 0x00000000,
715 (0x0e00 << 16) | (0x30a04 >> 2),
716 0x00000000,
717 (0x0600 << 16) | (0x30a10 >> 2),
718 0x00000000,
719 (0x0600 << 16) | (0x30a14 >> 2),
720 0x00000000,
721 (0x0600 << 16) | (0x30a18 >> 2),
722 0x00000000,
723 (0x0600 << 16) | (0x30a2c >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0xc700 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0xc704 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0xc708 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0xc768 >> 2),
732 0x00000000,
733 (0x0400 << 16) | (0xc770 >> 2),
734 0x00000000,
735 (0x0400 << 16) | (0xc774 >> 2),
736 0x00000000,
737 (0x0400 << 16) | (0xc798 >> 2),
738 0x00000000,
739 (0x0400 << 16) | (0xc79c >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0x9100 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x3c010 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0x8c00 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x8c04 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8c20 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x8c38 >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x8c3c >> 2),
754 0x00000000,
755 (0x0e00 << 16) | (0xae00 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x9604 >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0xac08 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0xac0c >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0xac10 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0xac14 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0xac58 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0xac68 >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0xac6c >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xac70 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xac74 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xac78 >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0xac7c >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0xac80 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0xac84 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0xac88 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xac8c >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0x970c >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0x9714 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0x9718 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x971c >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0x31068 >> 2),
798 0x00000000,
799 (0x4e00 << 16) | (0x31068 >> 2),
800 0x00000000,
801 (0x5e00 << 16) | (0x31068 >> 2),
802 0x00000000,
803 (0x6e00 << 16) | (0x31068 >> 2),
804 0x00000000,
805 (0x7e00 << 16) | (0x31068 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xcd10 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xcd14 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0x88b0 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0x88b4 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0x88b8 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0x88bc >> 2),
818 0x00000000,
819 (0x0400 << 16) | (0x89c0 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0x88c4 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0x88c8 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0x88d0 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x88d4 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x88d8 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x8980 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x30938 >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x3093c >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x30940 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x89a0 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x30900 >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x30904 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0x89b4 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0x3e1fc >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x3c210 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x3c214 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x3c218 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x8904 >> 2),
856 0x00000000,
857 0x5,
858 (0x0e00 << 16) | (0x8c28 >> 2),
859 (0x0e00 << 16) | (0x8c2c >> 2),
860 (0x0e00 << 16) | (0x8c30 >> 2),
861 (0x0e00 << 16) | (0x8c34 >> 2),
862 (0x0e00 << 16) | (0x9600 >> 2),
863};
864
0aafd313
AD
865static const u32 bonaire_golden_spm_registers[] =
866{
867 0x30800, 0xe0ffffff, 0xe0000000
868};
869
870static const u32 bonaire_golden_common_registers[] =
871{
872 0xc770, 0xffffffff, 0x00000800,
873 0xc774, 0xffffffff, 0x00000800,
874 0xc798, 0xffffffff, 0x00007fbf,
875 0xc79c, 0xffffffff, 0x00007faf
876};
877
878static const u32 bonaire_golden_registers[] =
879{
880 0x3354, 0x00000333, 0x00000333,
881 0x3350, 0x000c0fc0, 0x00040200,
882 0x9a10, 0x00010000, 0x00058208,
883 0x3c000, 0xffff1fff, 0x00140000,
884 0x3c200, 0xfdfc0fff, 0x00000100,
885 0x3c234, 0x40000000, 0x40000200,
886 0x9830, 0xffffffff, 0x00000000,
887 0x9834, 0xf00fffff, 0x00000400,
888 0x9838, 0x0002021c, 0x00020200,
889 0xc78, 0x00000080, 0x00000000,
890 0x5bb0, 0x000000f0, 0x00000070,
891 0x5bc0, 0xf0311fff, 0x80300000,
892 0x98f8, 0x73773777, 0x12010001,
893 0x350c, 0x00810000, 0x408af000,
894 0x7030, 0x31000111, 0x00000011,
895 0x2f48, 0x73773777, 0x12010001,
896 0x220c, 0x00007fb6, 0x0021a1b1,
897 0x2210, 0x00007fb6, 0x002021b1,
898 0x2180, 0x00007fb6, 0x00002191,
899 0x2218, 0x00007fb6, 0x002121b1,
900 0x221c, 0x00007fb6, 0x002021b1,
901 0x21dc, 0x00007fb6, 0x00002191,
902 0x21e0, 0x00007fb6, 0x00002191,
903 0x3628, 0x0000003f, 0x0000000a,
904 0x362c, 0x0000003f, 0x0000000a,
905 0x2ae4, 0x00073ffe, 0x000022a2,
906 0x240c, 0x000007ff, 0x00000000,
907 0x8a14, 0xf000003f, 0x00000007,
908 0x8bf0, 0x00002001, 0x00000001,
909 0x8b24, 0xffffffff, 0x00ffffff,
910 0x30a04, 0x0000ff0f, 0x00000000,
911 0x28a4c, 0x07ffffff, 0x06000000,
912 0x4d8, 0x00000fff, 0x00000100,
913 0x3e78, 0x00000001, 0x00000002,
914 0x9100, 0x03000000, 0x0362c688,
915 0x8c00, 0x000000ff, 0x00000001,
916 0xe40, 0x00001fff, 0x00001fff,
917 0x9060, 0x0000007f, 0x00000020,
918 0x9508, 0x00010000, 0x00010000,
919 0xac14, 0x000003ff, 0x000000f3,
920 0xac0c, 0xffffffff, 0x00001032
921};
922
923static const u32 bonaire_mgcg_cgcg_init[] =
924{
925 0xc420, 0xffffffff, 0xfffffffc,
926 0x30800, 0xffffffff, 0xe0000000,
927 0x3c2a0, 0xffffffff, 0x00000100,
928 0x3c208, 0xffffffff, 0x00000100,
929 0x3c2c0, 0xffffffff, 0xc0000100,
930 0x3c2c8, 0xffffffff, 0xc0000100,
931 0x3c2c4, 0xffffffff, 0xc0000100,
932 0x55e4, 0xffffffff, 0x00600100,
933 0x3c280, 0xffffffff, 0x00000100,
934 0x3c214, 0xffffffff, 0x06000100,
935 0x3c220, 0xffffffff, 0x00000100,
936 0x3c218, 0xffffffff, 0x06000100,
937 0x3c204, 0xffffffff, 0x00000100,
938 0x3c2e0, 0xffffffff, 0x00000100,
939 0x3c224, 0xffffffff, 0x00000100,
940 0x3c200, 0xffffffff, 0x00000100,
941 0x3c230, 0xffffffff, 0x00000100,
942 0x3c234, 0xffffffff, 0x00000100,
943 0x3c250, 0xffffffff, 0x00000100,
944 0x3c254, 0xffffffff, 0x00000100,
945 0x3c258, 0xffffffff, 0x00000100,
946 0x3c25c, 0xffffffff, 0x00000100,
947 0x3c260, 0xffffffff, 0x00000100,
948 0x3c27c, 0xffffffff, 0x00000100,
949 0x3c278, 0xffffffff, 0x00000100,
950 0x3c210, 0xffffffff, 0x06000100,
951 0x3c290, 0xffffffff, 0x00000100,
952 0x3c274, 0xffffffff, 0x00000100,
953 0x3c2b4, 0xffffffff, 0x00000100,
954 0x3c2b0, 0xffffffff, 0x00000100,
955 0x3c270, 0xffffffff, 0x00000100,
956 0x30800, 0xffffffff, 0xe0000000,
957 0x3c020, 0xffffffff, 0x00010000,
958 0x3c024, 0xffffffff, 0x00030002,
959 0x3c028, 0xffffffff, 0x00040007,
960 0x3c02c, 0xffffffff, 0x00060005,
961 0x3c030, 0xffffffff, 0x00090008,
962 0x3c034, 0xffffffff, 0x00010000,
963 0x3c038, 0xffffffff, 0x00030002,
964 0x3c03c, 0xffffffff, 0x00040007,
965 0x3c040, 0xffffffff, 0x00060005,
966 0x3c044, 0xffffffff, 0x00090008,
967 0x3c048, 0xffffffff, 0x00010000,
968 0x3c04c, 0xffffffff, 0x00030002,
969 0x3c050, 0xffffffff, 0x00040007,
970 0x3c054, 0xffffffff, 0x00060005,
971 0x3c058, 0xffffffff, 0x00090008,
972 0x3c05c, 0xffffffff, 0x00010000,
973 0x3c060, 0xffffffff, 0x00030002,
974 0x3c064, 0xffffffff, 0x00040007,
975 0x3c068, 0xffffffff, 0x00060005,
976 0x3c06c, 0xffffffff, 0x00090008,
977 0x3c070, 0xffffffff, 0x00010000,
978 0x3c074, 0xffffffff, 0x00030002,
979 0x3c078, 0xffffffff, 0x00040007,
980 0x3c07c, 0xffffffff, 0x00060005,
981 0x3c080, 0xffffffff, 0x00090008,
982 0x3c084, 0xffffffff, 0x00010000,
983 0x3c088, 0xffffffff, 0x00030002,
984 0x3c08c, 0xffffffff, 0x00040007,
985 0x3c090, 0xffffffff, 0x00060005,
986 0x3c094, 0xffffffff, 0x00090008,
987 0x3c098, 0xffffffff, 0x00010000,
988 0x3c09c, 0xffffffff, 0x00030002,
989 0x3c0a0, 0xffffffff, 0x00040007,
990 0x3c0a4, 0xffffffff, 0x00060005,
991 0x3c0a8, 0xffffffff, 0x00090008,
992 0x3c000, 0xffffffff, 0x96e00200,
993 0x8708, 0xffffffff, 0x00900100,
994 0xc424, 0xffffffff, 0x0020003f,
995 0x38, 0xffffffff, 0x0140001c,
996 0x3c, 0x000f0000, 0x000f0000,
997 0x220, 0xffffffff, 0xC060000C,
998 0x224, 0xc0000fff, 0x00000100,
999 0xf90, 0xffffffff, 0x00000100,
1000 0xf98, 0x00000101, 0x00000000,
1001 0x20a8, 0xffffffff, 0x00000104,
1002 0x55e4, 0xff000fff, 0x00000100,
1003 0x30cc, 0xc0000fff, 0x00000104,
1004 0xc1e4, 0x00000001, 0x00000001,
1005 0xd00c, 0xff000ff0, 0x00000100,
1006 0xd80c, 0xff000ff0, 0x00000100
1007};
1008
1009static const u32 spectre_golden_spm_registers[] =
1010{
1011 0x30800, 0xe0ffffff, 0xe0000000
1012};
1013
1014static const u32 spectre_golden_common_registers[] =
1015{
1016 0xc770, 0xffffffff, 0x00000800,
1017 0xc774, 0xffffffff, 0x00000800,
1018 0xc798, 0xffffffff, 0x00007fbf,
1019 0xc79c, 0xffffffff, 0x00007faf
1020};
1021
1022static const u32 spectre_golden_registers[] =
1023{
1024 0x3c000, 0xffff1fff, 0x96940200,
1025 0x3c00c, 0xffff0001, 0xff000000,
1026 0x3c200, 0xfffc0fff, 0x00000100,
1027 0x6ed8, 0x00010101, 0x00010000,
1028 0x9834, 0xf00fffff, 0x00000400,
1029 0x9838, 0xfffffffc, 0x00020200,
1030 0x5bb0, 0x000000f0, 0x00000070,
1031 0x5bc0, 0xf0311fff, 0x80300000,
1032 0x98f8, 0x73773777, 0x12010001,
1033 0x9b7c, 0x00ff0000, 0x00fc0000,
1034 0x2f48, 0x73773777, 0x12010001,
1035 0x8a14, 0xf000003f, 0x00000007,
1036 0x8b24, 0xffffffff, 0x00ffffff,
1037 0x28350, 0x3f3f3fff, 0x00000082,
1038 0x28355, 0x0000003f, 0x00000000,
1039 0x3e78, 0x00000001, 0x00000002,
1040 0x913c, 0xffff03df, 0x00000004,
1041 0xc768, 0x00000008, 0x00000008,
1042 0x8c00, 0x000008ff, 0x00000800,
1043 0x9508, 0x00010000, 0x00010000,
1044 0xac0c, 0xffffffff, 0x54763210,
1045 0x214f8, 0x01ff01ff, 0x00000002,
1046 0x21498, 0x007ff800, 0x00200000,
1047 0x2015c, 0xffffffff, 0x00000f40,
1048 0x30934, 0xffffffff, 0x00000001
1049};
1050
1051static const u32 spectre_mgcg_cgcg_init[] =
1052{
1053 0xc420, 0xffffffff, 0xfffffffc,
1054 0x30800, 0xffffffff, 0xe0000000,
1055 0x3c2a0, 0xffffffff, 0x00000100,
1056 0x3c208, 0xffffffff, 0x00000100,
1057 0x3c2c0, 0xffffffff, 0x00000100,
1058 0x3c2c8, 0xffffffff, 0x00000100,
1059 0x3c2c4, 0xffffffff, 0x00000100,
1060 0x55e4, 0xffffffff, 0x00600100,
1061 0x3c280, 0xffffffff, 0x00000100,
1062 0x3c214, 0xffffffff, 0x06000100,
1063 0x3c220, 0xffffffff, 0x00000100,
1064 0x3c218, 0xffffffff, 0x06000100,
1065 0x3c204, 0xffffffff, 0x00000100,
1066 0x3c2e0, 0xffffffff, 0x00000100,
1067 0x3c224, 0xffffffff, 0x00000100,
1068 0x3c200, 0xffffffff, 0x00000100,
1069 0x3c230, 0xffffffff, 0x00000100,
1070 0x3c234, 0xffffffff, 0x00000100,
1071 0x3c250, 0xffffffff, 0x00000100,
1072 0x3c254, 0xffffffff, 0x00000100,
1073 0x3c258, 0xffffffff, 0x00000100,
1074 0x3c25c, 0xffffffff, 0x00000100,
1075 0x3c260, 0xffffffff, 0x00000100,
1076 0x3c27c, 0xffffffff, 0x00000100,
1077 0x3c278, 0xffffffff, 0x00000100,
1078 0x3c210, 0xffffffff, 0x06000100,
1079 0x3c290, 0xffffffff, 0x00000100,
1080 0x3c274, 0xffffffff, 0x00000100,
1081 0x3c2b4, 0xffffffff, 0x00000100,
1082 0x3c2b0, 0xffffffff, 0x00000100,
1083 0x3c270, 0xffffffff, 0x00000100,
1084 0x30800, 0xffffffff, 0xe0000000,
1085 0x3c020, 0xffffffff, 0x00010000,
1086 0x3c024, 0xffffffff, 0x00030002,
1087 0x3c028, 0xffffffff, 0x00040007,
1088 0x3c02c, 0xffffffff, 0x00060005,
1089 0x3c030, 0xffffffff, 0x00090008,
1090 0x3c034, 0xffffffff, 0x00010000,
1091 0x3c038, 0xffffffff, 0x00030002,
1092 0x3c03c, 0xffffffff, 0x00040007,
1093 0x3c040, 0xffffffff, 0x00060005,
1094 0x3c044, 0xffffffff, 0x00090008,
1095 0x3c048, 0xffffffff, 0x00010000,
1096 0x3c04c, 0xffffffff, 0x00030002,
1097 0x3c050, 0xffffffff, 0x00040007,
1098 0x3c054, 0xffffffff, 0x00060005,
1099 0x3c058, 0xffffffff, 0x00090008,
1100 0x3c05c, 0xffffffff, 0x00010000,
1101 0x3c060, 0xffffffff, 0x00030002,
1102 0x3c064, 0xffffffff, 0x00040007,
1103 0x3c068, 0xffffffff, 0x00060005,
1104 0x3c06c, 0xffffffff, 0x00090008,
1105 0x3c070, 0xffffffff, 0x00010000,
1106 0x3c074, 0xffffffff, 0x00030002,
1107 0x3c078, 0xffffffff, 0x00040007,
1108 0x3c07c, 0xffffffff, 0x00060005,
1109 0x3c080, 0xffffffff, 0x00090008,
1110 0x3c084, 0xffffffff, 0x00010000,
1111 0x3c088, 0xffffffff, 0x00030002,
1112 0x3c08c, 0xffffffff, 0x00040007,
1113 0x3c090, 0xffffffff, 0x00060005,
1114 0x3c094, 0xffffffff, 0x00090008,
1115 0x3c098, 0xffffffff, 0x00010000,
1116 0x3c09c, 0xffffffff, 0x00030002,
1117 0x3c0a0, 0xffffffff, 0x00040007,
1118 0x3c0a4, 0xffffffff, 0x00060005,
1119 0x3c0a8, 0xffffffff, 0x00090008,
1120 0x3c0ac, 0xffffffff, 0x00010000,
1121 0x3c0b0, 0xffffffff, 0x00030002,
1122 0x3c0b4, 0xffffffff, 0x00040007,
1123 0x3c0b8, 0xffffffff, 0x00060005,
1124 0x3c0bc, 0xffffffff, 0x00090008,
1125 0x3c000, 0xffffffff, 0x96e00200,
1126 0x8708, 0xffffffff, 0x00900100,
1127 0xc424, 0xffffffff, 0x0020003f,
1128 0x38, 0xffffffff, 0x0140001c,
1129 0x3c, 0x000f0000, 0x000f0000,
1130 0x220, 0xffffffff, 0xC060000C,
1131 0x224, 0xc0000fff, 0x00000100,
1132 0xf90, 0xffffffff, 0x00000100,
1133 0xf98, 0x00000101, 0x00000000,
1134 0x20a8, 0xffffffff, 0x00000104,
1135 0x55e4, 0xff000fff, 0x00000100,
1136 0x30cc, 0xc0000fff, 0x00000104,
1137 0xc1e4, 0x00000001, 0x00000001,
1138 0xd00c, 0xff000ff0, 0x00000100,
1139 0xd80c, 0xff000ff0, 0x00000100
1140};
1141
1142static const u32 kalindi_golden_spm_registers[] =
1143{
1144 0x30800, 0xe0ffffff, 0xe0000000
1145};
1146
1147static const u32 kalindi_golden_common_registers[] =
1148{
1149 0xc770, 0xffffffff, 0x00000800,
1150 0xc774, 0xffffffff, 0x00000800,
1151 0xc798, 0xffffffff, 0x00007fbf,
1152 0xc79c, 0xffffffff, 0x00007faf
1153};
1154
1155static const u32 kalindi_golden_registers[] =
1156{
1157 0x3c000, 0xffffdfff, 0x6e944040,
1158 0x55e4, 0xff607fff, 0xfc000100,
1159 0x3c220, 0xff000fff, 0x00000100,
1160 0x3c224, 0xff000fff, 0x00000100,
1161 0x3c200, 0xfffc0fff, 0x00000100,
1162 0x6ed8, 0x00010101, 0x00010000,
1163 0x9830, 0xffffffff, 0x00000000,
1164 0x9834, 0xf00fffff, 0x00000400,
1165 0x5bb0, 0x000000f0, 0x00000070,
1166 0x5bc0, 0xf0311fff, 0x80300000,
1167 0x98f8, 0x73773777, 0x12010001,
1168 0x98fc, 0xffffffff, 0x00000010,
1169 0x9b7c, 0x00ff0000, 0x00fc0000,
1170 0x8030, 0x00001f0f, 0x0000100a,
1171 0x2f48, 0x73773777, 0x12010001,
1172 0x2408, 0x000fffff, 0x000c007f,
1173 0x8a14, 0xf000003f, 0x00000007,
1174 0x8b24, 0x3fff3fff, 0x00ffcfff,
1175 0x30a04, 0x0000ff0f, 0x00000000,
1176 0x28a4c, 0x07ffffff, 0x06000000,
1177 0x4d8, 0x00000fff, 0x00000100,
1178 0x3e78, 0x00000001, 0x00000002,
1179 0xc768, 0x00000008, 0x00000008,
1180 0x8c00, 0x000000ff, 0x00000003,
1181 0x214f8, 0x01ff01ff, 0x00000002,
1182 0x21498, 0x007ff800, 0x00200000,
1183 0x2015c, 0xffffffff, 0x00000f40,
1184 0x88c4, 0x001f3ae3, 0x00000082,
1185 0x88d4, 0x0000001f, 0x00000010,
1186 0x30934, 0xffffffff, 0x00000000
1187};
1188
1189static const u32 kalindi_mgcg_cgcg_init[] =
1190{
1191 0xc420, 0xffffffff, 0xfffffffc,
1192 0x30800, 0xffffffff, 0xe0000000,
1193 0x3c2a0, 0xffffffff, 0x00000100,
1194 0x3c208, 0xffffffff, 0x00000100,
1195 0x3c2c0, 0xffffffff, 0x00000100,
1196 0x3c2c8, 0xffffffff, 0x00000100,
1197 0x3c2c4, 0xffffffff, 0x00000100,
1198 0x55e4, 0xffffffff, 0x00600100,
1199 0x3c280, 0xffffffff, 0x00000100,
1200 0x3c214, 0xffffffff, 0x06000100,
1201 0x3c220, 0xffffffff, 0x00000100,
1202 0x3c218, 0xffffffff, 0x06000100,
1203 0x3c204, 0xffffffff, 0x00000100,
1204 0x3c2e0, 0xffffffff, 0x00000100,
1205 0x3c224, 0xffffffff, 0x00000100,
1206 0x3c200, 0xffffffff, 0x00000100,
1207 0x3c230, 0xffffffff, 0x00000100,
1208 0x3c234, 0xffffffff, 0x00000100,
1209 0x3c250, 0xffffffff, 0x00000100,
1210 0x3c254, 0xffffffff, 0x00000100,
1211 0x3c258, 0xffffffff, 0x00000100,
1212 0x3c25c, 0xffffffff, 0x00000100,
1213 0x3c260, 0xffffffff, 0x00000100,
1214 0x3c27c, 0xffffffff, 0x00000100,
1215 0x3c278, 0xffffffff, 0x00000100,
1216 0x3c210, 0xffffffff, 0x06000100,
1217 0x3c290, 0xffffffff, 0x00000100,
1218 0x3c274, 0xffffffff, 0x00000100,
1219 0x3c2b4, 0xffffffff, 0x00000100,
1220 0x3c2b0, 0xffffffff, 0x00000100,
1221 0x3c270, 0xffffffff, 0x00000100,
1222 0x30800, 0xffffffff, 0xe0000000,
1223 0x3c020, 0xffffffff, 0x00010000,
1224 0x3c024, 0xffffffff, 0x00030002,
1225 0x3c028, 0xffffffff, 0x00040007,
1226 0x3c02c, 0xffffffff, 0x00060005,
1227 0x3c030, 0xffffffff, 0x00090008,
1228 0x3c034, 0xffffffff, 0x00010000,
1229 0x3c038, 0xffffffff, 0x00030002,
1230 0x3c03c, 0xffffffff, 0x00040007,
1231 0x3c040, 0xffffffff, 0x00060005,
1232 0x3c044, 0xffffffff, 0x00090008,
1233 0x3c000, 0xffffffff, 0x96e00200,
1234 0x8708, 0xffffffff, 0x00900100,
1235 0xc424, 0xffffffff, 0x0020003f,
1236 0x38, 0xffffffff, 0x0140001c,
1237 0x3c, 0x000f0000, 0x000f0000,
1238 0x220, 0xffffffff, 0xC060000C,
1239 0x224, 0xc0000fff, 0x00000100,
1240 0x20a8, 0xffffffff, 0x00000104,
1241 0x55e4, 0xff000fff, 0x00000100,
1242 0x30cc, 0xc0000fff, 0x00000104,
1243 0xc1e4, 0x00000001, 0x00000001,
1244 0xd00c, 0xff000ff0, 0x00000100,
1245 0xd80c, 0xff000ff0, 0x00000100
1246};
1247
1248static void cik_init_golden_registers(struct radeon_device *rdev)
1249{
1250 switch (rdev->family) {
1251 case CHIP_BONAIRE:
1252 radeon_program_register_sequence(rdev,
1253 bonaire_mgcg_cgcg_init,
1254 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1255 radeon_program_register_sequence(rdev,
1256 bonaire_golden_registers,
1257 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1258 radeon_program_register_sequence(rdev,
1259 bonaire_golden_common_registers,
1260 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1261 radeon_program_register_sequence(rdev,
1262 bonaire_golden_spm_registers,
1263 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1264 break;
1265 case CHIP_KABINI:
1266 radeon_program_register_sequence(rdev,
1267 kalindi_mgcg_cgcg_init,
1268 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1269 radeon_program_register_sequence(rdev,
1270 kalindi_golden_registers,
1271 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1272 radeon_program_register_sequence(rdev,
1273 kalindi_golden_common_registers,
1274 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1275 radeon_program_register_sequence(rdev,
1276 kalindi_golden_spm_registers,
1277 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1278 break;
1279 case CHIP_KAVERI:
1280 radeon_program_register_sequence(rdev,
1281 spectre_mgcg_cgcg_init,
1282 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1283 radeon_program_register_sequence(rdev,
1284 spectre_golden_registers,
1285 (const u32)ARRAY_SIZE(spectre_golden_registers));
1286 radeon_program_register_sequence(rdev,
1287 spectre_golden_common_registers,
1288 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1289 radeon_program_register_sequence(rdev,
1290 spectre_golden_spm_registers,
1291 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1292 break;
1293 default:
1294 break;
1295 }
1296}
1297
2c67912c
AD
1298/**
1299 * cik_get_xclk - get the xclk
1300 *
1301 * @rdev: radeon_device pointer
1302 *
1303 * Returns the reference clock used by the gfx engine
1304 * (CIK).
1305 */
1306u32 cik_get_xclk(struct radeon_device *rdev)
1307{
1308 u32 reference_clock = rdev->clock.spll.reference_freq;
1309
1310 if (rdev->flags & RADEON_IS_IGP) {
1311 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1312 return reference_clock / 2;
1313 } else {
1314 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1315 return reference_clock / 4;
1316 }
1317 return reference_clock;
1318}
1319
75efdee1
AD
1320/**
1321 * cik_mm_rdoorbell - read a doorbell dword
1322 *
1323 * @rdev: radeon_device pointer
1324 * @offset: byte offset into the aperture
1325 *
1326 * Returns the value in the doorbell aperture at the
1327 * requested offset (CIK).
1328 */
1329u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1330{
1331 if (offset < rdev->doorbell.size) {
1332 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1333 } else {
1334 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1335 return 0;
1336 }
1337}
1338
1339/**
1340 * cik_mm_wdoorbell - write a doorbell dword
1341 *
1342 * @rdev: radeon_device pointer
1343 * @offset: byte offset into the aperture
1344 * @v: value to write
1345 *
1346 * Writes @v to the doorbell aperture at the
1347 * requested offset (CIK).
1348 */
1349void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1350{
1351 if (offset < rdev->doorbell.size) {
1352 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1353 } else {
1354 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1355 }
1356}
1357
bc8273fe
AD
1358#define BONAIRE_IO_MC_REGS_SIZE 36
1359
1360static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1361{
1362 {0x00000070, 0x04400000},
1363 {0x00000071, 0x80c01803},
1364 {0x00000072, 0x00004004},
1365 {0x00000073, 0x00000100},
1366 {0x00000074, 0x00ff0000},
1367 {0x00000075, 0x34000000},
1368 {0x00000076, 0x08000014},
1369 {0x00000077, 0x00cc08ec},
1370 {0x00000078, 0x00000400},
1371 {0x00000079, 0x00000000},
1372 {0x0000007a, 0x04090000},
1373 {0x0000007c, 0x00000000},
1374 {0x0000007e, 0x4408a8e8},
1375 {0x0000007f, 0x00000304},
1376 {0x00000080, 0x00000000},
1377 {0x00000082, 0x00000001},
1378 {0x00000083, 0x00000002},
1379 {0x00000084, 0xf3e4f400},
1380 {0x00000085, 0x052024e3},
1381 {0x00000087, 0x00000000},
1382 {0x00000088, 0x01000000},
1383 {0x0000008a, 0x1c0a0000},
1384 {0x0000008b, 0xff010000},
1385 {0x0000008d, 0xffffefff},
1386 {0x0000008e, 0xfff3efff},
1387 {0x0000008f, 0xfff3efbf},
1388 {0x00000092, 0xf7ffffff},
1389 {0x00000093, 0xffffff7f},
1390 {0x00000095, 0x00101101},
1391 {0x00000096, 0x00000fff},
1392 {0x00000097, 0x00116fff},
1393 {0x00000098, 0x60010000},
1394 {0x00000099, 0x10010000},
1395 {0x0000009a, 0x00006000},
1396 {0x0000009b, 0x00001000},
1397 {0x0000009f, 0x00b48000}
1398};
1399
b556b12e
AD
1400/**
1401 * cik_srbm_select - select specific register instances
1402 *
1403 * @rdev: radeon_device pointer
1404 * @me: selected ME (micro engine)
1405 * @pipe: pipe
1406 * @queue: queue
1407 * @vmid: VMID
1408 *
1409 * Switches the currently active registers instances. Some
1410 * registers are instanced per VMID, others are instanced per
1411 * me/pipe/queue combination.
1412 */
1413static void cik_srbm_select(struct radeon_device *rdev,
1414 u32 me, u32 pipe, u32 queue, u32 vmid)
1415{
1416 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1417 MEID(me & 0x3) |
1418 VMID(vmid & 0xf) |
1419 QUEUEID(queue & 0x7));
1420 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1421}
1422
bc8273fe
AD
1423/* ucode loading */
1424/**
1425 * ci_mc_load_microcode - load MC ucode into the hw
1426 *
1427 * @rdev: radeon_device pointer
1428 *
1429 * Load the GDDR MC ucode into the hw (CIK).
1430 * Returns 0 on success, error on failure.
1431 */
1432static int ci_mc_load_microcode(struct radeon_device *rdev)
1433{
1434 const __be32 *fw_data;
1435 u32 running, blackout = 0;
1436 u32 *io_mc_regs;
1437 int i, ucode_size, regs_size;
1438
1439 if (!rdev->mc_fw)
1440 return -EINVAL;
1441
1442 switch (rdev->family) {
1443 case CHIP_BONAIRE:
1444 default:
1445 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1446 ucode_size = CIK_MC_UCODE_SIZE;
1447 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1448 break;
1449 }
1450
1451 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1452
1453 if (running == 0) {
1454 if (running) {
1455 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1456 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1457 }
1458
1459 /* reset the engine and set to writable */
1460 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1461 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1462
1463 /* load mc io regs */
1464 for (i = 0; i < regs_size; i++) {
1465 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1466 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1467 }
1468 /* load the MC ucode */
1469 fw_data = (const __be32 *)rdev->mc_fw->data;
1470 for (i = 0; i < ucode_size; i++)
1471 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1472
1473 /* put the engine back into the active state */
1474 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1475 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1476 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1477
1478 /* wait for training to complete */
1479 for (i = 0; i < rdev->usec_timeout; i++) {
1480 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1481 break;
1482 udelay(1);
1483 }
1484 for (i = 0; i < rdev->usec_timeout; i++) {
1485 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1486 break;
1487 udelay(1);
1488 }
1489
1490 if (running)
1491 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1492 }
1493
1494 return 0;
1495}
1496
02c81327
AD
1497/**
1498 * cik_init_microcode - load ucode images from disk
1499 *
1500 * @rdev: radeon_device pointer
1501 *
1502 * Use the firmware interface to load the ucode images into
1503 * the driver (not loaded into hw).
1504 * Returns 0 on success, error on failure.
1505 */
1506static int cik_init_microcode(struct radeon_device *rdev)
1507{
02c81327
AD
1508 const char *chip_name;
1509 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
1510 mec_req_size, rlc_req_size, mc_req_size,
1511 sdma_req_size;
02c81327
AD
1512 char fw_name[30];
1513 int err;
1514
1515 DRM_DEBUG("\n");
1516
02c81327
AD
1517 switch (rdev->family) {
1518 case CHIP_BONAIRE:
1519 chip_name = "BONAIRE";
1520 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1521 me_req_size = CIK_ME_UCODE_SIZE * 4;
1522 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1523 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1524 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1525 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 1526 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1527 break;
1528 case CHIP_KAVERI:
1529 chip_name = "KAVERI";
1530 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1531 me_req_size = CIK_ME_UCODE_SIZE * 4;
1532 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1533 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1534 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 1535 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1536 break;
1537 case CHIP_KABINI:
1538 chip_name = "KABINI";
1539 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1540 me_req_size = CIK_ME_UCODE_SIZE * 4;
1541 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1542 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1543 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 1544 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
1545 break;
1546 default: BUG();
1547 }
1548
1549 DRM_INFO("Loading %s Microcode\n", chip_name);
1550
1551 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
0a168933 1552 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
02c81327
AD
1553 if (err)
1554 goto out;
1555 if (rdev->pfp_fw->size != pfp_req_size) {
1556 printk(KERN_ERR
1557 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1558 rdev->pfp_fw->size, fw_name);
1559 err = -EINVAL;
1560 goto out;
1561 }
1562
1563 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
0a168933 1564 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
02c81327
AD
1565 if (err)
1566 goto out;
1567 if (rdev->me_fw->size != me_req_size) {
1568 printk(KERN_ERR
1569 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1570 rdev->me_fw->size, fw_name);
1571 err = -EINVAL;
1572 }
1573
1574 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
0a168933 1575 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
02c81327
AD
1576 if (err)
1577 goto out;
1578 if (rdev->ce_fw->size != ce_req_size) {
1579 printk(KERN_ERR
1580 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1581 rdev->ce_fw->size, fw_name);
1582 err = -EINVAL;
1583 }
1584
1585 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
0a168933 1586 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
02c81327
AD
1587 if (err)
1588 goto out;
1589 if (rdev->mec_fw->size != mec_req_size) {
1590 printk(KERN_ERR
1591 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1592 rdev->mec_fw->size, fw_name);
1593 err = -EINVAL;
1594 }
1595
1596 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
0a168933 1597 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
02c81327
AD
1598 if (err)
1599 goto out;
1600 if (rdev->rlc_fw->size != rlc_req_size) {
1601 printk(KERN_ERR
1602 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1603 rdev->rlc_fw->size, fw_name);
1604 err = -EINVAL;
1605 }
1606
21a93e13 1607 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
0a168933 1608 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
21a93e13
AD
1609 if (err)
1610 goto out;
1611 if (rdev->sdma_fw->size != sdma_req_size) {
1612 printk(KERN_ERR
1613 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1614 rdev->sdma_fw->size, fw_name);
1615 err = -EINVAL;
1616 }
1617
02c81327
AD
1618 /* No MC ucode on APUs */
1619 if (!(rdev->flags & RADEON_IS_IGP)) {
1620 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
0a168933 1621 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
02c81327
AD
1622 if (err)
1623 goto out;
1624 if (rdev->mc_fw->size != mc_req_size) {
1625 printk(KERN_ERR
1626 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1627 rdev->mc_fw->size, fw_name);
1628 err = -EINVAL;
1629 }
1630 }
1631
1632out:
02c81327
AD
1633 if (err) {
1634 if (err != -EINVAL)
1635 printk(KERN_ERR
1636 "cik_cp: Failed to load firmware \"%s\"\n",
1637 fw_name);
1638 release_firmware(rdev->pfp_fw);
1639 rdev->pfp_fw = NULL;
1640 release_firmware(rdev->me_fw);
1641 rdev->me_fw = NULL;
1642 release_firmware(rdev->ce_fw);
1643 rdev->ce_fw = NULL;
1644 release_firmware(rdev->rlc_fw);
1645 rdev->rlc_fw = NULL;
1646 release_firmware(rdev->mc_fw);
1647 rdev->mc_fw = NULL;
1648 }
1649 return err;
1650}
1651
8cc1a532
AD
1652/*
1653 * Core functions
1654 */
1655/**
1656 * cik_tiling_mode_table_init - init the hw tiling table
1657 *
1658 * @rdev: radeon_device pointer
1659 *
1660 * Starting with SI, the tiling setup is done globally in a
1661 * set of 32 tiling modes. Rather than selecting each set of
1662 * parameters per surface as on older asics, we just select
1663 * which index in the tiling table we want to use, and the
1664 * surface uses those parameters (CIK).
1665 */
1666static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1667{
1668 const u32 num_tile_mode_states = 32;
1669 const u32 num_secondary_tile_mode_states = 16;
1670 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1671 u32 num_pipe_configs;
1672 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1673 rdev->config.cik.max_shader_engines;
1674
1675 switch (rdev->config.cik.mem_row_size_in_kb) {
1676 case 1:
1677 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1678 break;
1679 case 2:
1680 default:
1681 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1682 break;
1683 case 4:
1684 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1685 break;
1686 }
1687
1688 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1689 if (num_pipe_configs > 8)
1690 num_pipe_configs = 8; /* ??? */
1691
1692 if (num_pipe_configs == 8) {
1693 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1694 switch (reg_offset) {
1695 case 0:
1696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1697 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1700 break;
1701 case 1:
1702 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1706 break;
1707 case 2:
1708 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1711 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1712 break;
1713 case 3:
1714 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1718 break;
1719 case 4:
1720 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1723 TILE_SPLIT(split_equal_to_row_size));
1724 break;
1725 case 5:
1726 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1728 break;
1729 case 6:
1730 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1732 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1734 break;
1735 case 7:
1736 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1737 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1738 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1739 TILE_SPLIT(split_equal_to_row_size));
1740 break;
1741 case 8:
1742 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1743 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1744 break;
1745 case 9:
1746 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1748 break;
1749 case 10:
1750 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1752 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1754 break;
1755 case 11:
1756 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1757 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1758 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1760 break;
1761 case 12:
1762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1763 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1764 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1766 break;
1767 case 13:
1768 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1769 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1770 break;
1771 case 14:
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1776 break;
1777 case 16:
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1782 break;
1783 case 17:
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1788 break;
1789 case 27:
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1792 break;
1793 case 28:
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1795 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1798 break;
1799 case 29:
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1801 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1802 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1804 break;
1805 case 30:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1810 break;
1811 default:
1812 gb_tile_moden = 0;
1813 break;
1814 }
39aee490 1815 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1816 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1817 }
1818 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1819 switch (reg_offset) {
1820 case 0:
1821 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824 NUM_BANKS(ADDR_SURF_16_BANK));
1825 break;
1826 case 1:
1827 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1830 NUM_BANKS(ADDR_SURF_16_BANK));
1831 break;
1832 case 2:
1833 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1836 NUM_BANKS(ADDR_SURF_16_BANK));
1837 break;
1838 case 3:
1839 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1842 NUM_BANKS(ADDR_SURF_16_BANK));
1843 break;
1844 case 4:
1845 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1848 NUM_BANKS(ADDR_SURF_8_BANK));
1849 break;
1850 case 5:
1851 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1854 NUM_BANKS(ADDR_SURF_4_BANK));
1855 break;
1856 case 6:
1857 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1860 NUM_BANKS(ADDR_SURF_2_BANK));
1861 break;
1862 case 8:
1863 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1866 NUM_BANKS(ADDR_SURF_16_BANK));
1867 break;
1868 case 9:
1869 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1872 NUM_BANKS(ADDR_SURF_16_BANK));
1873 break;
1874 case 10:
1875 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1878 NUM_BANKS(ADDR_SURF_16_BANK));
1879 break;
1880 case 11:
1881 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1884 NUM_BANKS(ADDR_SURF_16_BANK));
1885 break;
1886 case 12:
1887 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1890 NUM_BANKS(ADDR_SURF_8_BANK));
1891 break;
1892 case 13:
1893 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1896 NUM_BANKS(ADDR_SURF_4_BANK));
1897 break;
1898 case 14:
1899 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1902 NUM_BANKS(ADDR_SURF_2_BANK));
1903 break;
1904 default:
1905 gb_tile_moden = 0;
1906 break;
1907 }
1908 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1909 }
1910 } else if (num_pipe_configs == 4) {
1911 if (num_rbs == 4) {
1912 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1913 switch (reg_offset) {
1914 case 0:
1915 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1919 break;
1920 case 1:
1921 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1922 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1923 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1925 break;
1926 case 2:
1927 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1929 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1931 break;
1932 case 3:
1933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1935 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1937 break;
1938 case 4:
1939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 TILE_SPLIT(split_equal_to_row_size));
1943 break;
1944 case 5:
1945 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1947 break;
1948 case 6:
1949 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1951 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1953 break;
1954 case 7:
1955 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1957 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1958 TILE_SPLIT(split_equal_to_row_size));
1959 break;
1960 case 8:
1961 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1962 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1963 break;
1964 case 9:
1965 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1966 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1967 break;
1968 case 10:
1969 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1971 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1973 break;
1974 case 11:
1975 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1976 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1977 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1979 break;
1980 case 12:
1981 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1982 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1983 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1985 break;
1986 case 13:
1987 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1988 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1989 break;
1990 case 14:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1995 break;
1996 case 16:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2001 break;
2002 case 17:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2007 break;
2008 case 27:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2011 break;
2012 case 28:
2013 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2017 break;
2018 case 29:
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2023 break;
2024 case 30:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2029 break;
2030 default:
2031 gb_tile_moden = 0;
2032 break;
2033 }
39aee490 2034 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2035 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2036 }
2037 } else if (num_rbs < 4) {
2038 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2039 switch (reg_offset) {
2040 case 0:
2041 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2043 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2044 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2045 break;
2046 case 1:
2047 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2049 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2051 break;
2052 case 2:
2053 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2056 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2057 break;
2058 case 3:
2059 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2061 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2063 break;
2064 case 4:
2065 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2067 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2068 TILE_SPLIT(split_equal_to_row_size));
2069 break;
2070 case 5:
2071 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2073 break;
2074 case 6:
2075 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2078 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2079 break;
2080 case 7:
2081 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2083 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2084 TILE_SPLIT(split_equal_to_row_size));
2085 break;
2086 case 8:
2087 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2088 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2089 break;
2090 case 9:
2091 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2093 break;
2094 case 10:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 break;
2100 case 11:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105 break;
2106 case 12:
2107 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2109 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2111 break;
2112 case 13:
2113 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2115 break;
2116 case 14:
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2121 break;
2122 case 16:
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127 break;
2128 case 17:
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2133 break;
2134 case 27:
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2137 break;
2138 case 28:
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143 break;
2144 case 29:
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 break;
2150 case 30:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 break;
2156 default:
2157 gb_tile_moden = 0;
2158 break;
2159 }
39aee490 2160 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2161 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2162 }
2163 }
2164 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2165 switch (reg_offset) {
2166 case 0:
2167 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2170 NUM_BANKS(ADDR_SURF_16_BANK));
2171 break;
2172 case 1:
2173 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176 NUM_BANKS(ADDR_SURF_16_BANK));
2177 break;
2178 case 2:
2179 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2182 NUM_BANKS(ADDR_SURF_16_BANK));
2183 break;
2184 case 3:
2185 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_16_BANK));
2189 break;
2190 case 4:
2191 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2194 NUM_BANKS(ADDR_SURF_16_BANK));
2195 break;
2196 case 5:
2197 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 break;
2202 case 6:
2203 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 NUM_BANKS(ADDR_SURF_4_BANK));
2207 break;
2208 case 8:
2209 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2212 NUM_BANKS(ADDR_SURF_16_BANK));
2213 break;
2214 case 9:
2215 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2218 NUM_BANKS(ADDR_SURF_16_BANK));
2219 break;
2220 case 10:
2221 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 break;
2226 case 11:
2227 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2230 NUM_BANKS(ADDR_SURF_16_BANK));
2231 break;
2232 case 12:
2233 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 break;
2238 case 13:
2239 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242 NUM_BANKS(ADDR_SURF_8_BANK));
2243 break;
2244 case 14:
2245 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2248 NUM_BANKS(ADDR_SURF_4_BANK));
2249 break;
2250 default:
2251 gb_tile_moden = 0;
2252 break;
2253 }
2254 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2255 }
2256 } else if (num_pipe_configs == 2) {
2257 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2258 switch (reg_offset) {
2259 case 0:
2260 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2262 PIPE_CONFIG(ADDR_SURF_P2) |
2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2264 break;
2265 case 1:
2266 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2270 break;
2271 case 2:
2272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2274 PIPE_CONFIG(ADDR_SURF_P2) |
2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2276 break;
2277 case 3:
2278 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2282 break;
2283 case 4:
2284 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2286 PIPE_CONFIG(ADDR_SURF_P2) |
2287 TILE_SPLIT(split_equal_to_row_size));
2288 break;
2289 case 5:
2290 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2292 break;
2293 case 6:
2294 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2296 PIPE_CONFIG(ADDR_SURF_P2) |
2297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2298 break;
2299 case 7:
2300 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 TILE_SPLIT(split_equal_to_row_size));
2304 break;
2305 case 8:
2306 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2307 break;
2308 case 9:
2309 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2311 break;
2312 case 10:
2313 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317 break;
2318 case 11:
2319 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2323 break;
2324 case 12:
2325 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 break;
2330 case 13:
2331 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2333 break;
2334 case 14:
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339 break;
2340 case 16:
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 break;
2346 case 17:
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2349 PIPE_CONFIG(ADDR_SURF_P2) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351 break;
2352 case 27:
2353 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2355 break;
2356 case 28:
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P2) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 break;
2362 case 29:
2363 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2365 PIPE_CONFIG(ADDR_SURF_P2) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 break;
2368 case 30:
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P2) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 break;
2374 default:
2375 gb_tile_moden = 0;
2376 break;
2377 }
39aee490 2378 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
2379 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2380 }
2381 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2382 switch (reg_offset) {
2383 case 0:
2384 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2387 NUM_BANKS(ADDR_SURF_16_BANK));
2388 break;
2389 case 1:
2390 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393 NUM_BANKS(ADDR_SURF_16_BANK));
2394 break;
2395 case 2:
2396 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399 NUM_BANKS(ADDR_SURF_16_BANK));
2400 break;
2401 case 3:
2402 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405 NUM_BANKS(ADDR_SURF_16_BANK));
2406 break;
2407 case 4:
2408 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2411 NUM_BANKS(ADDR_SURF_16_BANK));
2412 break;
2413 case 5:
2414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417 NUM_BANKS(ADDR_SURF_16_BANK));
2418 break;
2419 case 6:
2420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 break;
2425 case 8:
2426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2429 NUM_BANKS(ADDR_SURF_16_BANK));
2430 break;
2431 case 9:
2432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2435 NUM_BANKS(ADDR_SURF_16_BANK));
2436 break;
2437 case 10:
2438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2441 NUM_BANKS(ADDR_SURF_16_BANK));
2442 break;
2443 case 11:
2444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2447 NUM_BANKS(ADDR_SURF_16_BANK));
2448 break;
2449 case 12:
2450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2453 NUM_BANKS(ADDR_SURF_16_BANK));
2454 break;
2455 case 13:
2456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459 NUM_BANKS(ADDR_SURF_16_BANK));
2460 break;
2461 case 14:
2462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 NUM_BANKS(ADDR_SURF_8_BANK));
2466 break;
2467 default:
2468 gb_tile_moden = 0;
2469 break;
2470 }
2471 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2472 }
2473 } else
2474 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2475}
2476
2477/**
2478 * cik_select_se_sh - select which SE, SH to address
2479 *
2480 * @rdev: radeon_device pointer
2481 * @se_num: shader engine to address
2482 * @sh_num: sh block to address
2483 *
2484 * Select which SE, SH combinations to address. Certain
2485 * registers are instanced per SE or SH. 0xffffffff means
2486 * broadcast to all SEs or SHs (CIK).
2487 */
2488static void cik_select_se_sh(struct radeon_device *rdev,
2489 u32 se_num, u32 sh_num)
2490{
2491 u32 data = INSTANCE_BROADCAST_WRITES;
2492
2493 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
b0fe3d39 2494 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
8cc1a532
AD
2495 else if (se_num == 0xffffffff)
2496 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2497 else if (sh_num == 0xffffffff)
2498 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2499 else
2500 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2501 WREG32(GRBM_GFX_INDEX, data);
2502}
2503
2504/**
2505 * cik_create_bitmask - create a bitmask
2506 *
2507 * @bit_width: length of the mask
2508 *
2509 * create a variable length bit mask (CIK).
2510 * Returns the bitmask.
2511 */
2512static u32 cik_create_bitmask(u32 bit_width)
2513{
2514 u32 i, mask = 0;
2515
2516 for (i = 0; i < bit_width; i++) {
2517 mask <<= 1;
2518 mask |= 1;
2519 }
2520 return mask;
2521}
2522
2523/**
2524 * cik_select_se_sh - select which SE, SH to address
2525 *
2526 * @rdev: radeon_device pointer
2527 * @max_rb_num: max RBs (render backends) for the asic
2528 * @se_num: number of SEs (shader engines) for the asic
2529 * @sh_per_se: number of SH blocks per SE for the asic
2530 *
2531 * Calculates the bitmask of disabled RBs (CIK).
2532 * Returns the disabled RB bitmask.
2533 */
2534static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2535 u32 max_rb_num, u32 se_num,
2536 u32 sh_per_se)
2537{
2538 u32 data, mask;
2539
2540 data = RREG32(CC_RB_BACKEND_DISABLE);
2541 if (data & 1)
2542 data &= BACKEND_DISABLE_MASK;
2543 else
2544 data = 0;
2545 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2546
2547 data >>= BACKEND_DISABLE_SHIFT;
2548
2549 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2550
2551 return data & mask;
2552}
2553
2554/**
2555 * cik_setup_rb - setup the RBs on the asic
2556 *
2557 * @rdev: radeon_device pointer
2558 * @se_num: number of SEs (shader engines) for the asic
2559 * @sh_per_se: number of SH blocks per SE for the asic
2560 * @max_rb_num: max RBs (render backends) for the asic
2561 *
2562 * Configures per-SE/SH RB registers (CIK).
2563 */
2564static void cik_setup_rb(struct radeon_device *rdev,
2565 u32 se_num, u32 sh_per_se,
2566 u32 max_rb_num)
2567{
2568 int i, j;
2569 u32 data, mask;
2570 u32 disabled_rbs = 0;
2571 u32 enabled_rbs = 0;
2572
2573 for (i = 0; i < se_num; i++) {
2574 for (j = 0; j < sh_per_se; j++) {
2575 cik_select_se_sh(rdev, i, j);
2576 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2577 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2578 }
2579 }
2580 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2581
2582 mask = 1;
2583 for (i = 0; i < max_rb_num; i++) {
2584 if (!(disabled_rbs & mask))
2585 enabled_rbs |= mask;
2586 mask <<= 1;
2587 }
2588
2589 for (i = 0; i < se_num; i++) {
2590 cik_select_se_sh(rdev, i, 0xffffffff);
2591 data = 0;
2592 for (j = 0; j < sh_per_se; j++) {
2593 switch (enabled_rbs & 3) {
2594 case 1:
2595 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2596 break;
2597 case 2:
2598 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2599 break;
2600 case 3:
2601 default:
2602 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2603 break;
2604 }
2605 enabled_rbs >>= 2;
2606 }
2607 WREG32(PA_SC_RASTER_CONFIG, data);
2608 }
2609 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2610}
2611
2612/**
2613 * cik_gpu_init - setup the 3D engine
2614 *
2615 * @rdev: radeon_device pointer
2616 *
2617 * Configures the 3D engine and tiling configuration
2618 * registers so that the 3D engine is usable.
2619 */
2620static void cik_gpu_init(struct radeon_device *rdev)
2621{
2622 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2623 u32 mc_shared_chmap, mc_arb_ramcfg;
2624 u32 hdp_host_path_cntl;
2625 u32 tmp;
2626 int i, j;
2627
2628 switch (rdev->family) {
2629 case CHIP_BONAIRE:
2630 rdev->config.cik.max_shader_engines = 2;
2631 rdev->config.cik.max_tile_pipes = 4;
2632 rdev->config.cik.max_cu_per_sh = 7;
2633 rdev->config.cik.max_sh_per_se = 1;
2634 rdev->config.cik.max_backends_per_se = 2;
2635 rdev->config.cik.max_texture_channel_caches = 4;
2636 rdev->config.cik.max_gprs = 256;
2637 rdev->config.cik.max_gs_threads = 32;
2638 rdev->config.cik.max_hw_contexts = 8;
2639
2640 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2641 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2642 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2643 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2644 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2645 break;
2646 case CHIP_KAVERI:
2647 /* TODO */
2648 break;
2649 case CHIP_KABINI:
2650 default:
2651 rdev->config.cik.max_shader_engines = 1;
2652 rdev->config.cik.max_tile_pipes = 2;
2653 rdev->config.cik.max_cu_per_sh = 2;
2654 rdev->config.cik.max_sh_per_se = 1;
2655 rdev->config.cik.max_backends_per_se = 1;
2656 rdev->config.cik.max_texture_channel_caches = 2;
2657 rdev->config.cik.max_gprs = 256;
2658 rdev->config.cik.max_gs_threads = 16;
2659 rdev->config.cik.max_hw_contexts = 8;
2660
2661 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2662 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2663 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2664 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2665 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2666 break;
2667 }
2668
2669 /* Initialize HDP */
2670 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2671 WREG32((0x2c14 + j), 0x00000000);
2672 WREG32((0x2c18 + j), 0x00000000);
2673 WREG32((0x2c1c + j), 0x00000000);
2674 WREG32((0x2c20 + j), 0x00000000);
2675 WREG32((0x2c24 + j), 0x00000000);
2676 }
2677
2678 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2679
2680 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2681
2682 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2683 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2684
2685 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2686 rdev->config.cik.mem_max_burst_length_bytes = 256;
2687 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2688 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2689 if (rdev->config.cik.mem_row_size_in_kb > 4)
2690 rdev->config.cik.mem_row_size_in_kb = 4;
2691 /* XXX use MC settings? */
2692 rdev->config.cik.shader_engine_tile_size = 32;
2693 rdev->config.cik.num_gpus = 1;
2694 rdev->config.cik.multi_gpu_tile_size = 64;
2695
2696 /* fix up row size */
2697 gb_addr_config &= ~ROW_SIZE_MASK;
2698 switch (rdev->config.cik.mem_row_size_in_kb) {
2699 case 1:
2700 default:
2701 gb_addr_config |= ROW_SIZE(0);
2702 break;
2703 case 2:
2704 gb_addr_config |= ROW_SIZE(1);
2705 break;
2706 case 4:
2707 gb_addr_config |= ROW_SIZE(2);
2708 break;
2709 }
2710
2711 /* setup tiling info dword. gb_addr_config is not adequate since it does
2712 * not have bank info, so create a custom tiling dword.
2713 * bits 3:0 num_pipes
2714 * bits 7:4 num_banks
2715 * bits 11:8 group_size
2716 * bits 15:12 row_size
2717 */
2718 rdev->config.cik.tile_config = 0;
2719 switch (rdev->config.cik.num_tile_pipes) {
2720 case 1:
2721 rdev->config.cik.tile_config |= (0 << 0);
2722 break;
2723 case 2:
2724 rdev->config.cik.tile_config |= (1 << 0);
2725 break;
2726 case 4:
2727 rdev->config.cik.tile_config |= (2 << 0);
2728 break;
2729 case 8:
2730 default:
2731 /* XXX what about 12? */
2732 rdev->config.cik.tile_config |= (3 << 0);
2733 break;
2734 }
2735 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2736 rdev->config.cik.tile_config |= 1 << 4;
2737 else
2738 rdev->config.cik.tile_config |= 0 << 4;
2739 rdev->config.cik.tile_config |=
2740 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2741 rdev->config.cik.tile_config |=
2742 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2743
2744 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2745 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2746 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
2747 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2748 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
2749 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2750 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2751 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
2752
2753 cik_tiling_mode_table_init(rdev);
2754
2755 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2756 rdev->config.cik.max_sh_per_se,
2757 rdev->config.cik.max_backends_per_se);
2758
2759 /* set HW defaults for 3D engine */
2760 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2761
2762 WREG32(SX_DEBUG_1, 0x20);
2763
2764 WREG32(TA_CNTL_AUX, 0x00010000);
2765
2766 tmp = RREG32(SPI_CONFIG_CNTL);
2767 tmp |= 0x03000000;
2768 WREG32(SPI_CONFIG_CNTL, tmp);
2769
2770 WREG32(SQ_CONFIG, 1);
2771
2772 WREG32(DB_DEBUG, 0);
2773
2774 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2775 tmp |= 0x00000400;
2776 WREG32(DB_DEBUG2, tmp);
2777
2778 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2779 tmp |= 0x00020200;
2780 WREG32(DB_DEBUG3, tmp);
2781
2782 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2783 tmp |= 0x00018208;
2784 WREG32(CB_HW_CONTROL, tmp);
2785
2786 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2787
2788 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2789 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2790 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2791 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2792
2793 WREG32(VGT_NUM_INSTANCES, 1);
2794
2795 WREG32(CP_PERFMON_CNTL, 0);
2796
2797 WREG32(SQ_CONFIG, 0);
2798
2799 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2800 FORCE_EOV_MAX_REZ_CNT(255)));
2801
2802 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2803 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2804
2805 WREG32(VGT_GS_VERTEX_REUSE, 16);
2806 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2807
2808 tmp = RREG32(HDP_MISC_CNTL);
2809 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2810 WREG32(HDP_MISC_CNTL, tmp);
2811
2812 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2813 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2814
2815 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2816 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2817
2818 udelay(50);
2819}
2820
2cae3bc3
AD
2821/*
2822 * GPU scratch registers helpers function.
2823 */
2824/**
2825 * cik_scratch_init - setup driver info for CP scratch regs
2826 *
2827 * @rdev: radeon_device pointer
2828 *
2829 * Set up the number and offset of the CP scratch registers.
2830 * NOTE: use of CP scratch registers is a legacy inferface and
2831 * is not used by default on newer asics (r6xx+). On newer asics,
2832 * memory buffers are used for fences rather than scratch regs.
2833 */
2834static void cik_scratch_init(struct radeon_device *rdev)
2835{
2836 int i;
2837
2838 rdev->scratch.num_reg = 7;
2839 rdev->scratch.reg_base = SCRATCH_REG0;
2840 for (i = 0; i < rdev->scratch.num_reg; i++) {
2841 rdev->scratch.free[i] = true;
2842 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2843 }
2844}
2845
fbc832c7
AD
2846/**
2847 * cik_ring_test - basic gfx ring test
2848 *
2849 * @rdev: radeon_device pointer
2850 * @ring: radeon_ring structure holding ring information
2851 *
2852 * Allocate a scratch register and write to it using the gfx ring (CIK).
2853 * Provides a basic gfx ring test to verify that the ring is working.
2854 * Used by cik_cp_gfx_resume();
2855 * Returns 0 on success, error on failure.
2856 */
2857int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2858{
2859 uint32_t scratch;
2860 uint32_t tmp = 0;
2861 unsigned i;
2862 int r;
2863
2864 r = radeon_scratch_get(rdev, &scratch);
2865 if (r) {
2866 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2867 return r;
2868 }
2869 WREG32(scratch, 0xCAFEDEAD);
2870 r = radeon_ring_lock(rdev, ring, 3);
2871 if (r) {
2872 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2873 radeon_scratch_free(rdev, scratch);
2874 return r;
2875 }
2876 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2877 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2878 radeon_ring_write(ring, 0xDEADBEEF);
2879 radeon_ring_unlock_commit(rdev, ring);
963e81f9 2880
fbc832c7
AD
2881 for (i = 0; i < rdev->usec_timeout; i++) {
2882 tmp = RREG32(scratch);
2883 if (tmp == 0xDEADBEEF)
2884 break;
2885 DRM_UDELAY(1);
2886 }
2887 if (i < rdev->usec_timeout) {
2888 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2889 } else {
2890 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2891 ring->idx, scratch, tmp);
2892 r = -EINVAL;
2893 }
2894 radeon_scratch_free(rdev, scratch);
2895 return r;
2896}
2897
2cae3bc3 2898/**
b07fdd38 2899 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2cae3bc3
AD
2900 *
2901 * @rdev: radeon_device pointer
2902 * @fence: radeon fence object
2903 *
2904 * Emits a fence sequnce number on the gfx ring and flushes
2905 * GPU caches.
2906 */
b07fdd38
AD
2907void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2908 struct radeon_fence *fence)
2cae3bc3
AD
2909{
2910 struct radeon_ring *ring = &rdev->ring[fence->ring];
2911 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2912
2913 /* EVENT_WRITE_EOP - flush caches, send int */
2914 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2915 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2916 EOP_TC_ACTION_EN |
2917 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2918 EVENT_INDEX(5)));
2919 radeon_ring_write(ring, addr & 0xfffffffc);
2920 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2921 radeon_ring_write(ring, fence->seq);
2922 radeon_ring_write(ring, 0);
2923 /* HDP flush */
2924 /* We should be using the new WAIT_REG_MEM special op packet here
2925 * but it causes the CP to hang
2926 */
2927 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2928 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2929 WRITE_DATA_DST_SEL(0)));
2930 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2931 radeon_ring_write(ring, 0);
2932 radeon_ring_write(ring, 0);
2933}
2934
b07fdd38
AD
2935/**
2936 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2937 *
2938 * @rdev: radeon_device pointer
2939 * @fence: radeon fence object
2940 *
2941 * Emits a fence sequnce number on the compute ring and flushes
2942 * GPU caches.
2943 */
2944void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2945 struct radeon_fence *fence)
2946{
2947 struct radeon_ring *ring = &rdev->ring[fence->ring];
2948 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2949
2950 /* RELEASE_MEM - flush caches, send int */
2951 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2952 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2953 EOP_TC_ACTION_EN |
2954 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2955 EVENT_INDEX(5)));
2956 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2957 radeon_ring_write(ring, addr & 0xfffffffc);
2958 radeon_ring_write(ring, upper_32_bits(addr));
2959 radeon_ring_write(ring, fence->seq);
2960 radeon_ring_write(ring, 0);
2961 /* HDP flush */
2962 /* We should be using the new WAIT_REG_MEM special op packet here
2963 * but it causes the CP to hang
2964 */
2965 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2966 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2967 WRITE_DATA_DST_SEL(0)));
2968 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2969 radeon_ring_write(ring, 0);
2970 radeon_ring_write(ring, 0);
2971}
2972
2cae3bc3
AD
2973void cik_semaphore_ring_emit(struct radeon_device *rdev,
2974 struct radeon_ring *ring,
2975 struct radeon_semaphore *semaphore,
2976 bool emit_wait)
2977{
2978 uint64_t addr = semaphore->gpu_addr;
2979 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2980
2981 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2982 radeon_ring_write(ring, addr & 0xffffffff);
2983 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2984}
2985
2986/*
2987 * IB stuff
2988 */
2989/**
2990 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2991 *
2992 * @rdev: radeon_device pointer
2993 * @ib: radeon indirect buffer object
2994 *
2995 * Emits an DE (drawing engine) or CE (constant engine) IB
2996 * on the gfx ring. IBs are usually generated by userspace
2997 * acceleration drivers and submitted to the kernel for
2998 * sheduling on the ring. This function schedules the IB
2999 * on the gfx ring for execution by the GPU.
3000 */
3001void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3002{
3003 struct radeon_ring *ring = &rdev->ring[ib->ring];
3004 u32 header, control = INDIRECT_BUFFER_VALID;
3005
3006 if (ib->is_const_ib) {
3007 /* set switch buffer packet before const IB */
3008 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3009 radeon_ring_write(ring, 0);
3010
3011 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3012 } else {
3013 u32 next_rptr;
3014 if (ring->rptr_save_reg) {
3015 next_rptr = ring->wptr + 3 + 4;
3016 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3017 radeon_ring_write(ring, ((ring->rptr_save_reg -
3018 PACKET3_SET_UCONFIG_REG_START) >> 2));
3019 radeon_ring_write(ring, next_rptr);
3020 } else if (rdev->wb.enabled) {
3021 next_rptr = ring->wptr + 5 + 4;
3022 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3023 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3024 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3025 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3026 radeon_ring_write(ring, next_rptr);
3027 }
3028
3029 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3030 }
3031
3032 control |= ib->length_dw |
3033 (ib->vm ? (ib->vm->id << 24) : 0);
3034
3035 radeon_ring_write(ring, header);
3036 radeon_ring_write(ring,
3037#ifdef __BIG_ENDIAN
3038 (2 << 0) |
3039#endif
3040 (ib->gpu_addr & 0xFFFFFFFC));
3041 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3042 radeon_ring_write(ring, control);
3043}
3044
fbc832c7
AD
3045/**
3046 * cik_ib_test - basic gfx ring IB test
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ring: radeon_ring structure holding ring information
3050 *
3051 * Allocate an IB and execute it on the gfx ring (CIK).
3052 * Provides a basic gfx ring test to verify that IBs are working.
3053 * Returns 0 on success, error on failure.
3054 */
3055int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3056{
3057 struct radeon_ib ib;
3058 uint32_t scratch;
3059 uint32_t tmp = 0;
3060 unsigned i;
3061 int r;
3062
3063 r = radeon_scratch_get(rdev, &scratch);
3064 if (r) {
3065 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3066 return r;
3067 }
3068 WREG32(scratch, 0xCAFEDEAD);
3069 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3070 if (r) {
3071 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3072 return r;
3073 }
3074 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3075 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3076 ib.ptr[2] = 0xDEADBEEF;
3077 ib.length_dw = 3;
3078 r = radeon_ib_schedule(rdev, &ib, NULL);
3079 if (r) {
3080 radeon_scratch_free(rdev, scratch);
3081 radeon_ib_free(rdev, &ib);
3082 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3083 return r;
3084 }
3085 r = radeon_fence_wait(ib.fence, false);
3086 if (r) {
3087 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3088 return r;
3089 }
3090 for (i = 0; i < rdev->usec_timeout; i++) {
3091 tmp = RREG32(scratch);
3092 if (tmp == 0xDEADBEEF)
3093 break;
3094 DRM_UDELAY(1);
3095 }
3096 if (i < rdev->usec_timeout) {
3097 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3098 } else {
3099 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3100 scratch, tmp);
3101 r = -EINVAL;
3102 }
3103 radeon_scratch_free(rdev, scratch);
3104 radeon_ib_free(rdev, &ib);
3105 return r;
3106}
3107
841cf442
AD
3108/*
3109 * CP.
3110 * On CIK, gfx and compute now have independant command processors.
3111 *
3112 * GFX
3113 * Gfx consists of a single ring and can process both gfx jobs and
3114 * compute jobs. The gfx CP consists of three microengines (ME):
3115 * PFP - Pre-Fetch Parser
3116 * ME - Micro Engine
3117 * CE - Constant Engine
3118 * The PFP and ME make up what is considered the Drawing Engine (DE).
3119 * The CE is an asynchronous engine used for updating buffer desciptors
3120 * used by the DE so that they can be loaded into cache in parallel
3121 * while the DE is processing state update packets.
3122 *
3123 * Compute
3124 * The compute CP consists of two microengines (ME):
3125 * MEC1 - Compute MicroEngine 1
3126 * MEC2 - Compute MicroEngine 2
3127 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3128 * The queues are exposed to userspace and are programmed directly
3129 * by the compute runtime.
3130 */
3131/**
3132 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3133 *
3134 * @rdev: radeon_device pointer
3135 * @enable: enable or disable the MEs
3136 *
3137 * Halts or unhalts the gfx MEs.
3138 */
3139static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3140{
3141 if (enable)
3142 WREG32(CP_ME_CNTL, 0);
3143 else {
3144 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3145 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3146 }
3147 udelay(50);
3148}
3149
3150/**
3151 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3152 *
3153 * @rdev: radeon_device pointer
3154 *
3155 * Loads the gfx PFP, ME, and CE ucode.
3156 * Returns 0 for success, -EINVAL if the ucode is not available.
3157 */
3158static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3159{
3160 const __be32 *fw_data;
3161 int i;
3162
3163 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3164 return -EINVAL;
3165
3166 cik_cp_gfx_enable(rdev, false);
3167
3168 /* PFP */
3169 fw_data = (const __be32 *)rdev->pfp_fw->data;
3170 WREG32(CP_PFP_UCODE_ADDR, 0);
3171 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3172 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3173 WREG32(CP_PFP_UCODE_ADDR, 0);
3174
3175 /* CE */
3176 fw_data = (const __be32 *)rdev->ce_fw->data;
3177 WREG32(CP_CE_UCODE_ADDR, 0);
3178 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3179 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3180 WREG32(CP_CE_UCODE_ADDR, 0);
3181
3182 /* ME */
3183 fw_data = (const __be32 *)rdev->me_fw->data;
3184 WREG32(CP_ME_RAM_WADDR, 0);
3185 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3186 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3187 WREG32(CP_ME_RAM_WADDR, 0);
3188
3189 WREG32(CP_PFP_UCODE_ADDR, 0);
3190 WREG32(CP_CE_UCODE_ADDR, 0);
3191 WREG32(CP_ME_RAM_WADDR, 0);
3192 WREG32(CP_ME_RAM_RADDR, 0);
3193 return 0;
3194}
3195
3196/**
3197 * cik_cp_gfx_start - start the gfx ring
3198 *
3199 * @rdev: radeon_device pointer
3200 *
3201 * Enables the ring and loads the clear state context and other
3202 * packets required to init the ring.
3203 * Returns 0 for success, error for failure.
3204 */
3205static int cik_cp_gfx_start(struct radeon_device *rdev)
3206{
3207 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3208 int r, i;
3209
3210 /* init the CP */
3211 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3212 WREG32(CP_ENDIAN_SWAP, 0);
3213 WREG32(CP_DEVICE_ID, 1);
3214
3215 cik_cp_gfx_enable(rdev, true);
3216
3217 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3218 if (r) {
3219 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3220 return r;
3221 }
3222
3223 /* init the CE partitions. CE only used for gfx on CIK */
3224 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3225 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3226 radeon_ring_write(ring, 0xc000);
3227 radeon_ring_write(ring, 0xc000);
3228
3229 /* setup clear context state */
3230 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3231 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3232
3233 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3234 radeon_ring_write(ring, 0x80000000);
3235 radeon_ring_write(ring, 0x80000000);
3236
3237 for (i = 0; i < cik_default_size; i++)
3238 radeon_ring_write(ring, cik_default_state[i]);
3239
3240 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3241 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3242
3243 /* set clear context state */
3244 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3245 radeon_ring_write(ring, 0);
3246
3247 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3248 radeon_ring_write(ring, 0x00000316);
3249 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3250 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3251
3252 radeon_ring_unlock_commit(rdev, ring);
3253
3254 return 0;
3255}
3256
3257/**
3258 * cik_cp_gfx_fini - stop the gfx ring
3259 *
3260 * @rdev: radeon_device pointer
3261 *
3262 * Stop the gfx ring and tear down the driver ring
3263 * info.
3264 */
3265static void cik_cp_gfx_fini(struct radeon_device *rdev)
3266{
3267 cik_cp_gfx_enable(rdev, false);
3268 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3269}
3270
3271/**
3272 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3273 *
3274 * @rdev: radeon_device pointer
3275 *
3276 * Program the location and size of the gfx ring buffer
3277 * and test it to make sure it's working.
3278 * Returns 0 for success, error for failure.
3279 */
3280static int cik_cp_gfx_resume(struct radeon_device *rdev)
3281{
3282 struct radeon_ring *ring;
3283 u32 tmp;
3284 u32 rb_bufsz;
3285 u64 rb_addr;
3286 int r;
3287
3288 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3289 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3290
3291 /* Set the write pointer delay */
3292 WREG32(CP_RB_WPTR_DELAY, 0);
3293
3294 /* set the RB to use vmid 0 */
3295 WREG32(CP_RB_VMID, 0);
3296
3297 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3298
3299 /* ring 0 - compute and gfx */
3300 /* Set ring buffer size */
3301 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3302 rb_bufsz = drm_order(ring->ring_size / 8);
3303 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3304#ifdef __BIG_ENDIAN
3305 tmp |= BUF_SWAP_32BIT;
3306#endif
3307 WREG32(CP_RB0_CNTL, tmp);
3308
3309 /* Initialize the ring buffer's read and write pointers */
3310 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3311 ring->wptr = 0;
3312 WREG32(CP_RB0_WPTR, ring->wptr);
3313
3314 /* set the wb address wether it's enabled or not */
3315 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3316 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3317
3318 /* scratch register shadowing is no longer supported */
3319 WREG32(SCRATCH_UMSK, 0);
3320
3321 if (!rdev->wb.enabled)
3322 tmp |= RB_NO_UPDATE;
3323
3324 mdelay(1);
3325 WREG32(CP_RB0_CNTL, tmp);
3326
3327 rb_addr = ring->gpu_addr >> 8;
3328 WREG32(CP_RB0_BASE, rb_addr);
3329 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3330
3331 ring->rptr = RREG32(CP_RB0_RPTR);
3332
3333 /* start the ring */
3334 cik_cp_gfx_start(rdev);
3335 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3336 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3337 if (r) {
3338 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3339 return r;
3340 }
3341 return 0;
3342}
3343
963e81f9
AD
3344u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3345 struct radeon_ring *ring)
3346{
3347 u32 rptr;
3348
3349
3350
3351 if (rdev->wb.enabled) {
3352 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3353 } else {
f61d5b46 3354 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3355 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3356 rptr = RREG32(CP_HQD_PQ_RPTR);
3357 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3358 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3359 }
3360 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3361
3362 return rptr;
3363}
3364
3365u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3366 struct radeon_ring *ring)
3367{
3368 u32 wptr;
3369
3370 if (rdev->wb.enabled) {
3371 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3372 } else {
f61d5b46 3373 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3374 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3375 wptr = RREG32(CP_HQD_PQ_WPTR);
3376 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3377 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3378 }
3379 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3380
3381 return wptr;
3382}
3383
3384void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3385 struct radeon_ring *ring)
3386{
3387 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3388
3389 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3390 WDOORBELL32(ring->doorbell_offset, wptr);
3391}
3392
841cf442
AD
3393/**
3394 * cik_cp_compute_enable - enable/disable the compute CP MEs
3395 *
3396 * @rdev: radeon_device pointer
3397 * @enable: enable or disable the MEs
3398 *
3399 * Halts or unhalts the compute MEs.
3400 */
3401static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3402{
3403 if (enable)
3404 WREG32(CP_MEC_CNTL, 0);
3405 else
3406 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3407 udelay(50);
3408}
3409
3410/**
3411 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3412 *
3413 * @rdev: radeon_device pointer
3414 *
3415 * Loads the compute MEC1&2 ucode.
3416 * Returns 0 for success, -EINVAL if the ucode is not available.
3417 */
3418static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3419{
3420 const __be32 *fw_data;
3421 int i;
3422
3423 if (!rdev->mec_fw)
3424 return -EINVAL;
3425
3426 cik_cp_compute_enable(rdev, false);
3427
3428 /* MEC1 */
3429 fw_data = (const __be32 *)rdev->mec_fw->data;
3430 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3431 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3432 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3433 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3434
3435 if (rdev->family == CHIP_KAVERI) {
3436 /* MEC2 */
3437 fw_data = (const __be32 *)rdev->mec_fw->data;
3438 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3439 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3440 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3441 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3442 }
3443
3444 return 0;
3445}
3446
3447/**
3448 * cik_cp_compute_start - start the compute queues
3449 *
3450 * @rdev: radeon_device pointer
3451 *
3452 * Enable the compute queues.
3453 * Returns 0 for success, error for failure.
3454 */
3455static int cik_cp_compute_start(struct radeon_device *rdev)
3456{
963e81f9
AD
3457 cik_cp_compute_enable(rdev, true);
3458
841cf442
AD
3459 return 0;
3460}
3461
3462/**
3463 * cik_cp_compute_fini - stop the compute queues
3464 *
3465 * @rdev: radeon_device pointer
3466 *
3467 * Stop the compute queues and tear down the driver queue
3468 * info.
3469 */
3470static void cik_cp_compute_fini(struct radeon_device *rdev)
3471{
963e81f9
AD
3472 int i, idx, r;
3473
841cf442 3474 cik_cp_compute_enable(rdev, false);
963e81f9
AD
3475
3476 for (i = 0; i < 2; i++) {
3477 if (i == 0)
3478 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3479 else
3480 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3481
3482 if (rdev->ring[idx].mqd_obj) {
3483 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3484 if (unlikely(r != 0))
3485 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3486
3487 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3488 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3489
3490 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3491 rdev->ring[idx].mqd_obj = NULL;
3492 }
3493 }
841cf442
AD
3494}
3495
963e81f9
AD
3496static void cik_mec_fini(struct radeon_device *rdev)
3497{
3498 int r;
3499
3500 if (rdev->mec.hpd_eop_obj) {
3501 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3502 if (unlikely(r != 0))
3503 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3504 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3505 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3506
3507 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3508 rdev->mec.hpd_eop_obj = NULL;
3509 }
3510}
3511
3512#define MEC_HPD_SIZE 2048
3513
3514static int cik_mec_init(struct radeon_device *rdev)
3515{
3516 int r;
3517 u32 *hpd;
3518
3519 /*
3520 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3521 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3522 */
3523 if (rdev->family == CHIP_KAVERI)
3524 rdev->mec.num_mec = 2;
3525 else
3526 rdev->mec.num_mec = 1;
3527 rdev->mec.num_pipe = 4;
3528 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3529
3530 if (rdev->mec.hpd_eop_obj == NULL) {
3531 r = radeon_bo_create(rdev,
3532 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3533 PAGE_SIZE, true,
3534 RADEON_GEM_DOMAIN_GTT, NULL,
3535 &rdev->mec.hpd_eop_obj);
3536 if (r) {
3537 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3538 return r;
3539 }
3540 }
3541
3542 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3543 if (unlikely(r != 0)) {
3544 cik_mec_fini(rdev);
3545 return r;
3546 }
3547 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3548 &rdev->mec.hpd_eop_gpu_addr);
3549 if (r) {
3550 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3551 cik_mec_fini(rdev);
3552 return r;
3553 }
3554 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3555 if (r) {
3556 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3557 cik_mec_fini(rdev);
3558 return r;
3559 }
3560
3561 /* clear memory. Not sure if this is required or not */
3562 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3563
3564 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3565 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3566
3567 return 0;
3568}
3569
3570struct hqd_registers
3571{
3572 u32 cp_mqd_base_addr;
3573 u32 cp_mqd_base_addr_hi;
3574 u32 cp_hqd_active;
3575 u32 cp_hqd_vmid;
3576 u32 cp_hqd_persistent_state;
3577 u32 cp_hqd_pipe_priority;
3578 u32 cp_hqd_queue_priority;
3579 u32 cp_hqd_quantum;
3580 u32 cp_hqd_pq_base;
3581 u32 cp_hqd_pq_base_hi;
3582 u32 cp_hqd_pq_rptr;
3583 u32 cp_hqd_pq_rptr_report_addr;
3584 u32 cp_hqd_pq_rptr_report_addr_hi;
3585 u32 cp_hqd_pq_wptr_poll_addr;
3586 u32 cp_hqd_pq_wptr_poll_addr_hi;
3587 u32 cp_hqd_pq_doorbell_control;
3588 u32 cp_hqd_pq_wptr;
3589 u32 cp_hqd_pq_control;
3590 u32 cp_hqd_ib_base_addr;
3591 u32 cp_hqd_ib_base_addr_hi;
3592 u32 cp_hqd_ib_rptr;
3593 u32 cp_hqd_ib_control;
3594 u32 cp_hqd_iq_timer;
3595 u32 cp_hqd_iq_rptr;
3596 u32 cp_hqd_dequeue_request;
3597 u32 cp_hqd_dma_offload;
3598 u32 cp_hqd_sema_cmd;
3599 u32 cp_hqd_msg_type;
3600 u32 cp_hqd_atomic0_preop_lo;
3601 u32 cp_hqd_atomic0_preop_hi;
3602 u32 cp_hqd_atomic1_preop_lo;
3603 u32 cp_hqd_atomic1_preop_hi;
3604 u32 cp_hqd_hq_scheduler0;
3605 u32 cp_hqd_hq_scheduler1;
3606 u32 cp_mqd_control;
3607};
3608
3609struct bonaire_mqd
3610{
3611 u32 header;
3612 u32 dispatch_initiator;
3613 u32 dimensions[3];
3614 u32 start_idx[3];
3615 u32 num_threads[3];
3616 u32 pipeline_stat_enable;
3617 u32 perf_counter_enable;
3618 u32 pgm[2];
3619 u32 tba[2];
3620 u32 tma[2];
3621 u32 pgm_rsrc[2];
3622 u32 vmid;
3623 u32 resource_limits;
3624 u32 static_thread_mgmt01[2];
3625 u32 tmp_ring_size;
3626 u32 static_thread_mgmt23[2];
3627 u32 restart[3];
3628 u32 thread_trace_enable;
3629 u32 reserved1;
3630 u32 user_data[16];
3631 u32 vgtcs_invoke_count[2];
3632 struct hqd_registers queue_state;
3633 u32 dequeue_cntr;
3634 u32 interrupt_queue[64];
3635};
3636
841cf442
AD
3637/**
3638 * cik_cp_compute_resume - setup the compute queue registers
3639 *
3640 * @rdev: radeon_device pointer
3641 *
3642 * Program the compute queues and test them to make sure they
3643 * are working.
3644 * Returns 0 for success, error for failure.
3645 */
3646static int cik_cp_compute_resume(struct radeon_device *rdev)
3647{
963e81f9
AD
3648 int r, i, idx;
3649 u32 tmp;
3650 bool use_doorbell = true;
3651 u64 hqd_gpu_addr;
3652 u64 mqd_gpu_addr;
3653 u64 eop_gpu_addr;
3654 u64 wb_gpu_addr;
3655 u32 *buf;
3656 struct bonaire_mqd *mqd;
841cf442 3657
841cf442
AD
3658 r = cik_cp_compute_start(rdev);
3659 if (r)
3660 return r;
963e81f9
AD
3661
3662 /* fix up chicken bits */
3663 tmp = RREG32(CP_CPF_DEBUG);
3664 tmp |= (1 << 23);
3665 WREG32(CP_CPF_DEBUG, tmp);
3666
3667 /* init the pipes */
f61d5b46 3668 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3669 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3670 int me = (i < 4) ? 1 : 2;
3671 int pipe = (i < 4) ? i : (i - 4);
3672
3673 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3674
3675 cik_srbm_select(rdev, me, pipe, 0, 0);
3676
3677 /* write the EOP addr */
3678 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3679 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3680
3681 /* set the VMID assigned */
3682 WREG32(CP_HPD_EOP_VMID, 0);
3683
3684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3685 tmp = RREG32(CP_HPD_EOP_CONTROL);
3686 tmp &= ~EOP_SIZE_MASK;
3687 tmp |= drm_order(MEC_HPD_SIZE / 8);
3688 WREG32(CP_HPD_EOP_CONTROL, tmp);
3689 }
3690 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3691 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3692
3693 /* init the queues. Just two for now. */
3694 for (i = 0; i < 2; i++) {
3695 if (i == 0)
3696 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3697 else
3698 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3699
3700 if (rdev->ring[idx].mqd_obj == NULL) {
3701 r = radeon_bo_create(rdev,
3702 sizeof(struct bonaire_mqd),
3703 PAGE_SIZE, true,
3704 RADEON_GEM_DOMAIN_GTT, NULL,
3705 &rdev->ring[idx].mqd_obj);
3706 if (r) {
3707 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3708 return r;
3709 }
3710 }
3711
3712 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3713 if (unlikely(r != 0)) {
3714 cik_cp_compute_fini(rdev);
3715 return r;
3716 }
3717 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3718 &mqd_gpu_addr);
3719 if (r) {
3720 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3721 cik_cp_compute_fini(rdev);
3722 return r;
3723 }
3724 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3725 if (r) {
3726 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3727 cik_cp_compute_fini(rdev);
3728 return r;
3729 }
3730
3731 /* doorbell offset */
3732 rdev->ring[idx].doorbell_offset =
3733 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3734
3735 /* init the mqd struct */
3736 memset(buf, 0, sizeof(struct bonaire_mqd));
3737
3738 mqd = (struct bonaire_mqd *)buf;
3739 mqd->header = 0xC0310800;
3740 mqd->static_thread_mgmt01[0] = 0xffffffff;
3741 mqd->static_thread_mgmt01[1] = 0xffffffff;
3742 mqd->static_thread_mgmt23[0] = 0xffffffff;
3743 mqd->static_thread_mgmt23[1] = 0xffffffff;
3744
f61d5b46 3745 mutex_lock(&rdev->srbm_mutex);
963e81f9
AD
3746 cik_srbm_select(rdev, rdev->ring[idx].me,
3747 rdev->ring[idx].pipe,
3748 rdev->ring[idx].queue, 0);
3749
3750 /* disable wptr polling */
3751 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3752 tmp &= ~WPTR_POLL_EN;
3753 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3754
3755 /* enable doorbell? */
3756 mqd->queue_state.cp_hqd_pq_doorbell_control =
3757 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3758 if (use_doorbell)
3759 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3760 else
3761 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3762 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3763 mqd->queue_state.cp_hqd_pq_doorbell_control);
3764
3765 /* disable the queue if it's active */
3766 mqd->queue_state.cp_hqd_dequeue_request = 0;
3767 mqd->queue_state.cp_hqd_pq_rptr = 0;
3768 mqd->queue_state.cp_hqd_pq_wptr= 0;
3769 if (RREG32(CP_HQD_ACTIVE) & 1) {
3770 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3771 for (i = 0; i < rdev->usec_timeout; i++) {
3772 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3773 break;
3774 udelay(1);
3775 }
3776 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3777 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3778 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3779 }
3780
3781 /* set the pointer to the MQD */
3782 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3783 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3784 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3785 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3786 /* set MQD vmid to 0 */
3787 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3788 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3789 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3790
3791 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3792 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3793 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3794 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3795 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3796 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3797
3798 /* set up the HQD, this is similar to CP_RB0_CNTL */
3799 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3800 mqd->queue_state.cp_hqd_pq_control &=
3801 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3802
3803 mqd->queue_state.cp_hqd_pq_control |=
3804 drm_order(rdev->ring[idx].ring_size / 8);
3805 mqd->queue_state.cp_hqd_pq_control |=
3806 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3807#ifdef __BIG_ENDIAN
3808 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3809#endif
3810 mqd->queue_state.cp_hqd_pq_control &=
3811 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3812 mqd->queue_state.cp_hqd_pq_control |=
3813 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3814 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3815
3816 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3817 if (i == 0)
3818 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3819 else
3820 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3821 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3822 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3823 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3824 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3825 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3826
3827 /* set the wb address wether it's enabled or not */
3828 if (i == 0)
3829 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3830 else
3831 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3832 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3833 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3834 upper_32_bits(wb_gpu_addr) & 0xffff;
3835 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3836 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3837 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3838 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3839
3840 /* enable the doorbell if requested */
3841 if (use_doorbell) {
3842 mqd->queue_state.cp_hqd_pq_doorbell_control =
3843 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3844 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3845 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3846 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3847 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3848 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3849 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3850
3851 } else {
3852 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3853 }
3854 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3855 mqd->queue_state.cp_hqd_pq_doorbell_control);
3856
3857 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3858 rdev->ring[idx].wptr = 0;
3859 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3860 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3861 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3862 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3863
3864 /* set the vmid for the queue */
3865 mqd->queue_state.cp_hqd_vmid = 0;
3866 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3867
3868 /* activate the queue */
3869 mqd->queue_state.cp_hqd_active = 1;
3870 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3871
3872 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 3873 mutex_unlock(&rdev->srbm_mutex);
963e81f9
AD
3874
3875 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3876 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3877
3878 rdev->ring[idx].ready = true;
3879 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3880 if (r)
3881 rdev->ring[idx].ready = false;
3882 }
3883
841cf442
AD
3884 return 0;
3885}
3886
841cf442
AD
3887static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3888{
3889 cik_cp_gfx_enable(rdev, enable);
3890 cik_cp_compute_enable(rdev, enable);
3891}
3892
841cf442
AD
3893static int cik_cp_load_microcode(struct radeon_device *rdev)
3894{
3895 int r;
3896
3897 r = cik_cp_gfx_load_microcode(rdev);
3898 if (r)
3899 return r;
3900 r = cik_cp_compute_load_microcode(rdev);
3901 if (r)
3902 return r;
3903
3904 return 0;
3905}
3906
841cf442
AD
3907static void cik_cp_fini(struct radeon_device *rdev)
3908{
3909 cik_cp_gfx_fini(rdev);
3910 cik_cp_compute_fini(rdev);
3911}
3912
841cf442
AD
3913static int cik_cp_resume(struct radeon_device *rdev)
3914{
3915 int r;
3916
3917 /* Reset all cp blocks */
3918 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3919 RREG32(GRBM_SOFT_RESET);
3920 mdelay(15);
3921 WREG32(GRBM_SOFT_RESET, 0);
3922 RREG32(GRBM_SOFT_RESET);
3923
3924 r = cik_cp_load_microcode(rdev);
3925 if (r)
3926 return r;
3927
3928 r = cik_cp_gfx_resume(rdev);
3929 if (r)
3930 return r;
3931 r = cik_cp_compute_resume(rdev);
3932 if (r)
3933 return r;
3934
3935 return 0;
3936}
3937
21a93e13
AD
3938/*
3939 * sDMA - System DMA
3940 * Starting with CIK, the GPU has new asynchronous
3941 * DMA engines. These engines are used for compute
3942 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3943 * and each one supports 1 ring buffer used for gfx
3944 * and 2 queues used for compute.
3945 *
3946 * The programming model is very similar to the CP
3947 * (ring buffer, IBs, etc.), but sDMA has it's own
3948 * packet format that is different from the PM4 format
3949 * used by the CP. sDMA supports copying data, writing
3950 * embedded data, solid fills, and a number of other
3951 * things. It also has support for tiling/detiling of
3952 * buffers.
3953 */
3954/**
3955 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3956 *
3957 * @rdev: radeon_device pointer
3958 * @ib: IB object to schedule
3959 *
3960 * Schedule an IB in the DMA ring (CIK).
3961 */
3962void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3963 struct radeon_ib *ib)
3964{
3965 struct radeon_ring *ring = &rdev->ring[ib->ring];
3966 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3967
3968 if (rdev->wb.enabled) {
3969 u32 next_rptr = ring->wptr + 5;
3970 while ((next_rptr & 7) != 4)
3971 next_rptr++;
3972 next_rptr += 4;
3973 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3974 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3975 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3976 radeon_ring_write(ring, 1); /* number of DWs to follow */
3977 radeon_ring_write(ring, next_rptr);
3978 }
3979
3980 /* IB packet must end on a 8 DW boundary */
3981 while ((ring->wptr & 7) != 4)
3982 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3983 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3984 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3985 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3986 radeon_ring_write(ring, ib->length_dw);
3987
3988}
3989
3990/**
3991 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3992 *
3993 * @rdev: radeon_device pointer
3994 * @fence: radeon fence object
3995 *
3996 * Add a DMA fence packet to the ring to write
3997 * the fence seq number and DMA trap packet to generate
3998 * an interrupt if needed (CIK).
3999 */
4000void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4001 struct radeon_fence *fence)
4002{
4003 struct radeon_ring *ring = &rdev->ring[fence->ring];
4004 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4005 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4006 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4007 u32 ref_and_mask;
4008
4009 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4010 ref_and_mask = SDMA0;
4011 else
4012 ref_and_mask = SDMA1;
4013
4014 /* write the fence */
4015 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4016 radeon_ring_write(ring, addr & 0xffffffff);
4017 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4018 radeon_ring_write(ring, fence->seq);
4019 /* generate an interrupt */
4020 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4021 /* flush HDP */
4022 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4023 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4024 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4025 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4026 radeon_ring_write(ring, ref_and_mask); /* MASK */
4027 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4028}
4029
4030/**
4031 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4032 *
4033 * @rdev: radeon_device pointer
4034 * @ring: radeon_ring structure holding ring information
4035 * @semaphore: radeon semaphore object
4036 * @emit_wait: wait or signal semaphore
4037 *
4038 * Add a DMA semaphore packet to the ring wait on or signal
4039 * other rings (CIK).
4040 */
4041void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4042 struct radeon_ring *ring,
4043 struct radeon_semaphore *semaphore,
4044 bool emit_wait)
4045{
4046 u64 addr = semaphore->gpu_addr;
4047 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4048
4049 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4050 radeon_ring_write(ring, addr & 0xfffffff8);
4051 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4052}
4053
4054/**
4055 * cik_sdma_gfx_stop - stop the gfx async dma engines
4056 *
4057 * @rdev: radeon_device pointer
4058 *
4059 * Stop the gfx async dma ring buffers (CIK).
4060 */
4061static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4062{
4063 u32 rb_cntl, reg_offset;
4064 int i;
4065
4066 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4067
4068 for (i = 0; i < 2; i++) {
4069 if (i == 0)
4070 reg_offset = SDMA0_REGISTER_OFFSET;
4071 else
4072 reg_offset = SDMA1_REGISTER_OFFSET;
4073 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4074 rb_cntl &= ~SDMA_RB_ENABLE;
4075 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4076 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4077 }
4078}
4079
4080/**
4081 * cik_sdma_rlc_stop - stop the compute async dma engines
4082 *
4083 * @rdev: radeon_device pointer
4084 *
4085 * Stop the compute async dma queues (CIK).
4086 */
4087static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4088{
4089 /* XXX todo */
4090}
4091
4092/**
4093 * cik_sdma_enable - stop the async dma engines
4094 *
4095 * @rdev: radeon_device pointer
4096 * @enable: enable/disable the DMA MEs.
4097 *
4098 * Halt or unhalt the async dma engines (CIK).
4099 */
4100static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4101{
4102 u32 me_cntl, reg_offset;
4103 int i;
4104
4105 for (i = 0; i < 2; i++) {
4106 if (i == 0)
4107 reg_offset = SDMA0_REGISTER_OFFSET;
4108 else
4109 reg_offset = SDMA1_REGISTER_OFFSET;
4110 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4111 if (enable)
4112 me_cntl &= ~SDMA_HALT;
4113 else
4114 me_cntl |= SDMA_HALT;
4115 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4116 }
4117}
4118
4119/**
4120 * cik_sdma_gfx_resume - setup and start the async dma engines
4121 *
4122 * @rdev: radeon_device pointer
4123 *
4124 * Set up the gfx DMA ring buffers and enable them (CIK).
4125 * Returns 0 for success, error for failure.
4126 */
4127static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4128{
4129 struct radeon_ring *ring;
4130 u32 rb_cntl, ib_cntl;
4131 u32 rb_bufsz;
4132 u32 reg_offset, wb_offset;
4133 int i, r;
4134
4135 for (i = 0; i < 2; i++) {
4136 if (i == 0) {
4137 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4138 reg_offset = SDMA0_REGISTER_OFFSET;
4139 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4140 } else {
4141 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4142 reg_offset = SDMA1_REGISTER_OFFSET;
4143 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4144 }
4145
4146 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4147 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4148
4149 /* Set ring buffer size in dwords */
4150 rb_bufsz = drm_order(ring->ring_size / 4);
4151 rb_cntl = rb_bufsz << 1;
4152#ifdef __BIG_ENDIAN
4153 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4154#endif
4155 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4156
4157 /* Initialize the ring buffer's read and write pointers */
4158 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4159 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4160
4161 /* set the wb address whether it's enabled or not */
4162 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4163 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4164 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4165 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4166
4167 if (rdev->wb.enabled)
4168 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4169
4170 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4171 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4172
4173 ring->wptr = 0;
4174 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4175
4176 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4177
4178 /* enable DMA RB */
4179 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4180
4181 ib_cntl = SDMA_IB_ENABLE;
4182#ifdef __BIG_ENDIAN
4183 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4184#endif
4185 /* enable DMA IBs */
4186 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4187
4188 ring->ready = true;
4189
4190 r = radeon_ring_test(rdev, ring->idx, ring);
4191 if (r) {
4192 ring->ready = false;
4193 return r;
4194 }
4195 }
4196
4197 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4198
4199 return 0;
4200}
4201
4202/**
4203 * cik_sdma_rlc_resume - setup and start the async dma engines
4204 *
4205 * @rdev: radeon_device pointer
4206 *
4207 * Set up the compute DMA queues and enable them (CIK).
4208 * Returns 0 for success, error for failure.
4209 */
4210static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4211{
4212 /* XXX todo */
4213 return 0;
4214}
4215
4216/**
4217 * cik_sdma_load_microcode - load the sDMA ME ucode
4218 *
4219 * @rdev: radeon_device pointer
4220 *
4221 * Loads the sDMA0/1 ucode.
4222 * Returns 0 for success, -EINVAL if the ucode is not available.
4223 */
4224static int cik_sdma_load_microcode(struct radeon_device *rdev)
4225{
4226 const __be32 *fw_data;
4227 int i;
4228
4229 if (!rdev->sdma_fw)
4230 return -EINVAL;
4231
4232 /* stop the gfx rings and rlc compute queues */
4233 cik_sdma_gfx_stop(rdev);
4234 cik_sdma_rlc_stop(rdev);
4235
4236 /* halt the MEs */
4237 cik_sdma_enable(rdev, false);
4238
4239 /* sdma0 */
4240 fw_data = (const __be32 *)rdev->sdma_fw->data;
4241 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4242 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4243 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4244 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4245
4246 /* sdma1 */
4247 fw_data = (const __be32 *)rdev->sdma_fw->data;
4248 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4249 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4250 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4251 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4252
4253 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4254 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4255 return 0;
4256}
4257
4258/**
4259 * cik_sdma_resume - setup and start the async dma engines
4260 *
4261 * @rdev: radeon_device pointer
4262 *
4263 * Set up the DMA engines and enable them (CIK).
4264 * Returns 0 for success, error for failure.
4265 */
4266static int cik_sdma_resume(struct radeon_device *rdev)
4267{
4268 int r;
4269
4270 /* Reset dma */
4271 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4272 RREG32(SRBM_SOFT_RESET);
4273 udelay(50);
4274 WREG32(SRBM_SOFT_RESET, 0);
4275 RREG32(SRBM_SOFT_RESET);
4276
4277 r = cik_sdma_load_microcode(rdev);
4278 if (r)
4279 return r;
4280
4281 /* unhalt the MEs */
4282 cik_sdma_enable(rdev, true);
4283
4284 /* start the gfx rings and rlc compute queues */
4285 r = cik_sdma_gfx_resume(rdev);
4286 if (r)
4287 return r;
4288 r = cik_sdma_rlc_resume(rdev);
4289 if (r)
4290 return r;
4291
4292 return 0;
4293}
4294
4295/**
4296 * cik_sdma_fini - tear down the async dma engines
4297 *
4298 * @rdev: radeon_device pointer
4299 *
4300 * Stop the async dma engines and free the rings (CIK).
4301 */
4302static void cik_sdma_fini(struct radeon_device *rdev)
4303{
4304 /* stop the gfx rings and rlc compute queues */
4305 cik_sdma_gfx_stop(rdev);
4306 cik_sdma_rlc_stop(rdev);
4307 /* halt the MEs */
4308 cik_sdma_enable(rdev, false);
4309 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4310 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4311 /* XXX - compute dma queue tear down */
4312}
4313
4314/**
4315 * cik_copy_dma - copy pages using the DMA engine
4316 *
4317 * @rdev: radeon_device pointer
4318 * @src_offset: src GPU address
4319 * @dst_offset: dst GPU address
4320 * @num_gpu_pages: number of GPU pages to xfer
4321 * @fence: radeon fence object
4322 *
4323 * Copy GPU paging using the DMA engine (CIK).
4324 * Used by the radeon ttm implementation to move pages if
4325 * registered as the asic copy callback.
4326 */
4327int cik_copy_dma(struct radeon_device *rdev,
4328 uint64_t src_offset, uint64_t dst_offset,
4329 unsigned num_gpu_pages,
4330 struct radeon_fence **fence)
4331{
4332 struct radeon_semaphore *sem = NULL;
4333 int ring_index = rdev->asic->copy.dma_ring_index;
4334 struct radeon_ring *ring = &rdev->ring[ring_index];
4335 u32 size_in_bytes, cur_size_in_bytes;
4336 int i, num_loops;
4337 int r = 0;
4338
4339 r = radeon_semaphore_create(rdev, &sem);
4340 if (r) {
4341 DRM_ERROR("radeon: moving bo (%d).\n", r);
4342 return r;
4343 }
4344
4345 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4346 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4347 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4348 if (r) {
4349 DRM_ERROR("radeon: moving bo (%d).\n", r);
4350 radeon_semaphore_free(rdev, &sem, NULL);
4351 return r;
4352 }
4353
4354 if (radeon_fence_need_sync(*fence, ring->idx)) {
4355 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4356 ring->idx);
4357 radeon_fence_note_sync(*fence, ring->idx);
4358 } else {
4359 radeon_semaphore_free(rdev, &sem, NULL);
4360 }
4361
4362 for (i = 0; i < num_loops; i++) {
4363 cur_size_in_bytes = size_in_bytes;
4364 if (cur_size_in_bytes > 0x1fffff)
4365 cur_size_in_bytes = 0x1fffff;
4366 size_in_bytes -= cur_size_in_bytes;
4367 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4368 radeon_ring_write(ring, cur_size_in_bytes);
4369 radeon_ring_write(ring, 0); /* src/dst endian swap */
4370 radeon_ring_write(ring, src_offset & 0xffffffff);
4371 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4372 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4373 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4374 src_offset += cur_size_in_bytes;
4375 dst_offset += cur_size_in_bytes;
4376 }
4377
4378 r = radeon_fence_emit(rdev, fence, ring->idx);
4379 if (r) {
4380 radeon_ring_unlock_undo(rdev, ring);
4381 return r;
4382 }
4383
4384 radeon_ring_unlock_commit(rdev, ring);
4385 radeon_semaphore_free(rdev, &sem, *fence);
4386
4387 return r;
4388}
4389
4390/**
4391 * cik_sdma_ring_test - simple async dma engine test
4392 *
4393 * @rdev: radeon_device pointer
4394 * @ring: radeon_ring structure holding ring information
4395 *
4396 * Test the DMA engine by writing using it to write an
4397 * value to memory. (CIK).
4398 * Returns 0 for success, error for failure.
4399 */
4400int cik_sdma_ring_test(struct radeon_device *rdev,
4401 struct radeon_ring *ring)
4402{
4403 unsigned i;
4404 int r;
4405 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4406 u32 tmp;
4407
4408 if (!ptr) {
4409 DRM_ERROR("invalid vram scratch pointer\n");
4410 return -EINVAL;
4411 }
4412
4413 tmp = 0xCAFEDEAD;
4414 writel(tmp, ptr);
4415
4416 r = radeon_ring_lock(rdev, ring, 4);
4417 if (r) {
4418 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4419 return r;
4420 }
4421 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4422 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4423 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4424 radeon_ring_write(ring, 1); /* number of DWs to follow */
4425 radeon_ring_write(ring, 0xDEADBEEF);
4426 radeon_ring_unlock_commit(rdev, ring);
4427
4428 for (i = 0; i < rdev->usec_timeout; i++) {
4429 tmp = readl(ptr);
4430 if (tmp == 0xDEADBEEF)
4431 break;
4432 DRM_UDELAY(1);
4433 }
4434
4435 if (i < rdev->usec_timeout) {
4436 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4437 } else {
4438 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4439 ring->idx, tmp);
4440 r = -EINVAL;
4441 }
4442 return r;
4443}
4444
4445/**
4446 * cik_sdma_ib_test - test an IB on the DMA engine
4447 *
4448 * @rdev: radeon_device pointer
4449 * @ring: radeon_ring structure holding ring information
4450 *
4451 * Test a simple IB in the DMA ring (CIK).
4452 * Returns 0 on success, error on failure.
4453 */
4454int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4455{
4456 struct radeon_ib ib;
4457 unsigned i;
4458 int r;
4459 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4460 u32 tmp = 0;
4461
4462 if (!ptr) {
4463 DRM_ERROR("invalid vram scratch pointer\n");
4464 return -EINVAL;
4465 }
4466
4467 tmp = 0xCAFEDEAD;
4468 writel(tmp, ptr);
4469
4470 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4471 if (r) {
4472 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4473 return r;
4474 }
4475
4476 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4477 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4478 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4479 ib.ptr[3] = 1;
4480 ib.ptr[4] = 0xDEADBEEF;
4481 ib.length_dw = 5;
4482
4483 r = radeon_ib_schedule(rdev, &ib, NULL);
4484 if (r) {
4485 radeon_ib_free(rdev, &ib);
4486 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4487 return r;
4488 }
4489 r = radeon_fence_wait(ib.fence, false);
4490 if (r) {
4491 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4492 return r;
4493 }
4494 for (i = 0; i < rdev->usec_timeout; i++) {
4495 tmp = readl(ptr);
4496 if (tmp == 0xDEADBEEF)
4497 break;
4498 DRM_UDELAY(1);
4499 }
4500 if (i < rdev->usec_timeout) {
4501 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4502 } else {
4503 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4504 r = -EINVAL;
4505 }
4506 radeon_ib_free(rdev, &ib);
4507 return r;
4508}
4509
6f2043ce 4510
cc066715 4511static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 4512{
6f2043ce
AD
4513 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4514 RREG32(GRBM_STATUS));
4515 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4516 RREG32(GRBM_STATUS2));
4517 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4518 RREG32(GRBM_STATUS_SE0));
4519 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4520 RREG32(GRBM_STATUS_SE1));
4521 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4522 RREG32(GRBM_STATUS_SE2));
4523 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4524 RREG32(GRBM_STATUS_SE3));
4525 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4526 RREG32(SRBM_STATUS));
4527 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4528 RREG32(SRBM_STATUS2));
cc066715
AD
4529 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4530 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4531 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4532 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
963e81f9
AD
4533 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4534 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4535 RREG32(CP_STALLED_STAT1));
4536 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4537 RREG32(CP_STALLED_STAT2));
4538 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4539 RREG32(CP_STALLED_STAT3));
4540 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4541 RREG32(CP_CPF_BUSY_STAT));
4542 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4543 RREG32(CP_CPF_STALLED_STAT1));
4544 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4545 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4546 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4547 RREG32(CP_CPC_STALLED_STAT1));
4548 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
cc066715 4549}
6f2043ce 4550
cc066715
AD
4551/**
4552 * cik_gpu_check_soft_reset - check which blocks are busy
4553 *
4554 * @rdev: radeon_device pointer
4555 *
4556 * Check which blocks are busy and return the relevant reset
4557 * mask to be used by cik_gpu_soft_reset().
4558 * Returns a mask of the blocks to be reset.
4559 */
4560static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4561{
4562 u32 reset_mask = 0;
4563 u32 tmp;
6f2043ce 4564
cc066715
AD
4565 /* GRBM_STATUS */
4566 tmp = RREG32(GRBM_STATUS);
4567 if (tmp & (PA_BUSY | SC_BUSY |
4568 BCI_BUSY | SX_BUSY |
4569 TA_BUSY | VGT_BUSY |
4570 DB_BUSY | CB_BUSY |
4571 GDS_BUSY | SPI_BUSY |
4572 IA_BUSY | IA_BUSY_NO_DMA))
4573 reset_mask |= RADEON_RESET_GFX;
4574
4575 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4576 reset_mask |= RADEON_RESET_CP;
4577
4578 /* GRBM_STATUS2 */
4579 tmp = RREG32(GRBM_STATUS2);
4580 if (tmp & RLC_BUSY)
4581 reset_mask |= RADEON_RESET_RLC;
4582
4583 /* SDMA0_STATUS_REG */
4584 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4585 if (!(tmp & SDMA_IDLE))
4586 reset_mask |= RADEON_RESET_DMA;
4587
4588 /* SDMA1_STATUS_REG */
4589 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4590 if (!(tmp & SDMA_IDLE))
4591 reset_mask |= RADEON_RESET_DMA1;
4592
4593 /* SRBM_STATUS2 */
4594 tmp = RREG32(SRBM_STATUS2);
4595 if (tmp & SDMA_BUSY)
4596 reset_mask |= RADEON_RESET_DMA;
4597
4598 if (tmp & SDMA1_BUSY)
4599 reset_mask |= RADEON_RESET_DMA1;
4600
4601 /* SRBM_STATUS */
4602 tmp = RREG32(SRBM_STATUS);
4603
4604 if (tmp & IH_BUSY)
4605 reset_mask |= RADEON_RESET_IH;
4606
4607 if (tmp & SEM_BUSY)
4608 reset_mask |= RADEON_RESET_SEM;
4609
4610 if (tmp & GRBM_RQ_PENDING)
4611 reset_mask |= RADEON_RESET_GRBM;
4612
4613 if (tmp & VMC_BUSY)
4614 reset_mask |= RADEON_RESET_VMC;
4615
4616 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4617 MCC_BUSY | MCD_BUSY))
4618 reset_mask |= RADEON_RESET_MC;
4619
4620 if (evergreen_is_display_hung(rdev))
4621 reset_mask |= RADEON_RESET_DISPLAY;
4622
4623 /* Skip MC reset as it's mostly likely not hung, just busy */
4624 if (reset_mask & RADEON_RESET_MC) {
4625 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4626 reset_mask &= ~RADEON_RESET_MC;
4627 }
4628
4629 return reset_mask;
6f2043ce
AD
4630}
4631
4632/**
cc066715 4633 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
4634 *
4635 * @rdev: radeon_device pointer
cc066715 4636 * @reset_mask: mask of which blocks to reset
6f2043ce 4637 *
cc066715 4638 * Soft reset the blocks specified in @reset_mask.
6f2043ce 4639 */
cc066715 4640static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
4641{
4642 struct evergreen_mc_save save;
cc066715
AD
4643 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4644 u32 tmp;
4645
4646 if (reset_mask == 0)
4647 return;
4648
4649 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4650
4651 cik_print_gpu_status_regs(rdev);
4652 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4653 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4654 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4655 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4656
4657 /* stop the rlc */
4658 cik_rlc_stop(rdev);
4659
4660 /* Disable GFX parsing/prefetching */
4661 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4662
4663 /* Disable MEC parsing/prefetching */
4664 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4665
4666 if (reset_mask & RADEON_RESET_DMA) {
4667 /* sdma0 */
4668 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4669 tmp |= SDMA_HALT;
4670 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4671 }
4672 if (reset_mask & RADEON_RESET_DMA1) {
4673 /* sdma1 */
4674 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4675 tmp |= SDMA_HALT;
4676 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4677 }
6f2043ce 4678
6f2043ce 4679 evergreen_mc_stop(rdev, &save);
cc066715 4680 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
4681 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4682 }
6f2043ce 4683
cc066715
AD
4684 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4685 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4686
4687 if (reset_mask & RADEON_RESET_CP) {
4688 grbm_soft_reset |= SOFT_RESET_CP;
4689
4690 srbm_soft_reset |= SOFT_RESET_GRBM;
4691 }
4692
4693 if (reset_mask & RADEON_RESET_DMA)
4694 srbm_soft_reset |= SOFT_RESET_SDMA;
4695
4696 if (reset_mask & RADEON_RESET_DMA1)
4697 srbm_soft_reset |= SOFT_RESET_SDMA1;
4698
4699 if (reset_mask & RADEON_RESET_DISPLAY)
4700 srbm_soft_reset |= SOFT_RESET_DC;
4701
4702 if (reset_mask & RADEON_RESET_RLC)
4703 grbm_soft_reset |= SOFT_RESET_RLC;
4704
4705 if (reset_mask & RADEON_RESET_SEM)
4706 srbm_soft_reset |= SOFT_RESET_SEM;
4707
4708 if (reset_mask & RADEON_RESET_IH)
4709 srbm_soft_reset |= SOFT_RESET_IH;
4710
4711 if (reset_mask & RADEON_RESET_GRBM)
4712 srbm_soft_reset |= SOFT_RESET_GRBM;
4713
4714 if (reset_mask & RADEON_RESET_VMC)
4715 srbm_soft_reset |= SOFT_RESET_VMC;
4716
4717 if (!(rdev->flags & RADEON_IS_IGP)) {
4718 if (reset_mask & RADEON_RESET_MC)
4719 srbm_soft_reset |= SOFT_RESET_MC;
4720 }
4721
4722 if (grbm_soft_reset) {
4723 tmp = RREG32(GRBM_SOFT_RESET);
4724 tmp |= grbm_soft_reset;
4725 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4726 WREG32(GRBM_SOFT_RESET, tmp);
4727 tmp = RREG32(GRBM_SOFT_RESET);
4728
4729 udelay(50);
4730
4731 tmp &= ~grbm_soft_reset;
4732 WREG32(GRBM_SOFT_RESET, tmp);
4733 tmp = RREG32(GRBM_SOFT_RESET);
4734 }
4735
4736 if (srbm_soft_reset) {
4737 tmp = RREG32(SRBM_SOFT_RESET);
4738 tmp |= srbm_soft_reset;
4739 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4740 WREG32(SRBM_SOFT_RESET, tmp);
4741 tmp = RREG32(SRBM_SOFT_RESET);
4742
4743 udelay(50);
4744
4745 tmp &= ~srbm_soft_reset;
4746 WREG32(SRBM_SOFT_RESET, tmp);
4747 tmp = RREG32(SRBM_SOFT_RESET);
4748 }
6f2043ce 4749
6f2043ce
AD
4750 /* Wait a little for things to settle down */
4751 udelay(50);
cc066715 4752
6f2043ce 4753 evergreen_mc_resume(rdev, &save);
cc066715
AD
4754 udelay(50);
4755
4756 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
4757}
4758
4759/**
cc066715 4760 * cik_asic_reset - soft reset GPU
6f2043ce
AD
4761 *
4762 * @rdev: radeon_device pointer
4763 *
cc066715
AD
4764 * Look up which blocks are hung and attempt
4765 * to reset them.
6f2043ce
AD
4766 * Returns 0 for success.
4767 */
4768int cik_asic_reset(struct radeon_device *rdev)
4769{
cc066715 4770 u32 reset_mask;
6f2043ce 4771
cc066715
AD
4772 reset_mask = cik_gpu_check_soft_reset(rdev);
4773
4774 if (reset_mask)
4775 r600_set_bios_scratch_engine_hung(rdev, true);
4776
4777 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 4778
cc066715
AD
4779 reset_mask = cik_gpu_check_soft_reset(rdev);
4780
4781 if (!reset_mask)
4782 r600_set_bios_scratch_engine_hung(rdev, false);
4783
4784 return 0;
4785}
4786
4787/**
4788 * cik_gfx_is_lockup - check if the 3D engine is locked up
4789 *
4790 * @rdev: radeon_device pointer
4791 * @ring: radeon_ring structure holding ring information
4792 *
4793 * Check if the 3D engine is locked up (CIK).
4794 * Returns true if the engine is locked, false if not.
4795 */
4796bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4797{
4798 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4799
4800 if (!(reset_mask & (RADEON_RESET_GFX |
4801 RADEON_RESET_COMPUTE |
4802 RADEON_RESET_CP))) {
4803 radeon_ring_lockup_update(ring);
4804 return false;
4805 }
4806 /* force CP activities */
4807 radeon_ring_force_activity(rdev, ring);
4808 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 4809}
1c49165d 4810
21a93e13
AD
4811/**
4812 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4813 *
4814 * @rdev: radeon_device pointer
4815 * @ring: radeon_ring structure holding ring information
4816 *
4817 * Check if the async DMA engine is locked up (CIK).
4818 * Returns true if the engine appears to be locked up, false if not.
4819 */
4820bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4821{
cc066715
AD
4822 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4823 u32 mask;
21a93e13
AD
4824
4825 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 4826 mask = RADEON_RESET_DMA;
21a93e13 4827 else
cc066715
AD
4828 mask = RADEON_RESET_DMA1;
4829
4830 if (!(reset_mask & mask)) {
21a93e13
AD
4831 radeon_ring_lockup_update(ring);
4832 return false;
4833 }
4834 /* force ring activities */
4835 radeon_ring_force_activity(rdev, ring);
4836 return radeon_ring_test_lockup(rdev, ring);
4837}
4838
1c49165d
AD
4839/* MC */
4840/**
4841 * cik_mc_program - program the GPU memory controller
4842 *
4843 * @rdev: radeon_device pointer
4844 *
4845 * Set the location of vram, gart, and AGP in the GPU's
4846 * physical address space (CIK).
4847 */
4848static void cik_mc_program(struct radeon_device *rdev)
4849{
4850 struct evergreen_mc_save save;
4851 u32 tmp;
4852 int i, j;
4853
4854 /* Initialize HDP */
4855 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4856 WREG32((0x2c14 + j), 0x00000000);
4857 WREG32((0x2c18 + j), 0x00000000);
4858 WREG32((0x2c1c + j), 0x00000000);
4859 WREG32((0x2c20 + j), 0x00000000);
4860 WREG32((0x2c24 + j), 0x00000000);
4861 }
4862 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4863
4864 evergreen_mc_stop(rdev, &save);
4865 if (radeon_mc_wait_for_idle(rdev)) {
4866 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4867 }
4868 /* Lockout access through VGA aperture*/
4869 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4870 /* Update configuration */
4871 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4872 rdev->mc.vram_start >> 12);
4873 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4874 rdev->mc.vram_end >> 12);
4875 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4876 rdev->vram_scratch.gpu_addr >> 12);
4877 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4878 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4879 WREG32(MC_VM_FB_LOCATION, tmp);
4880 /* XXX double check these! */
4881 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4882 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4883 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4884 WREG32(MC_VM_AGP_BASE, 0);
4885 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4886 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4887 if (radeon_mc_wait_for_idle(rdev)) {
4888 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4889 }
4890 evergreen_mc_resume(rdev, &save);
4891 /* we need to own VRAM, so turn off the VGA renderer here
4892 * to stop it overwriting our objects */
4893 rv515_vga_render_disable(rdev);
4894}
4895
4896/**
4897 * cik_mc_init - initialize the memory controller driver params
4898 *
4899 * @rdev: radeon_device pointer
4900 *
4901 * Look up the amount of vram, vram width, and decide how to place
4902 * vram and gart within the GPU's physical address space (CIK).
4903 * Returns 0 for success.
4904 */
4905static int cik_mc_init(struct radeon_device *rdev)
4906{
4907 u32 tmp;
4908 int chansize, numchan;
4909
4910 /* Get VRAM informations */
4911 rdev->mc.vram_is_ddr = true;
4912 tmp = RREG32(MC_ARB_RAMCFG);
4913 if (tmp & CHANSIZE_MASK) {
4914 chansize = 64;
4915 } else {
4916 chansize = 32;
4917 }
4918 tmp = RREG32(MC_SHARED_CHMAP);
4919 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4920 case 0:
4921 default:
4922 numchan = 1;
4923 break;
4924 case 1:
4925 numchan = 2;
4926 break;
4927 case 2:
4928 numchan = 4;
4929 break;
4930 case 3:
4931 numchan = 8;
4932 break;
4933 case 4:
4934 numchan = 3;
4935 break;
4936 case 5:
4937 numchan = 6;
4938 break;
4939 case 6:
4940 numchan = 10;
4941 break;
4942 case 7:
4943 numchan = 12;
4944 break;
4945 case 8:
4946 numchan = 16;
4947 break;
4948 }
4949 rdev->mc.vram_width = numchan * chansize;
4950 /* Could aper size report 0 ? */
4951 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4952 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4953 /* size in MB on si */
4954 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4955 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4956 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4957 si_vram_gtt_location(rdev, &rdev->mc);
4958 radeon_update_bandwidth_info(rdev);
4959
4960 return 0;
4961}
4962
4963/*
4964 * GART
4965 * VMID 0 is the physical GPU addresses as used by the kernel.
4966 * VMIDs 1-15 are used for userspace clients and are handled
4967 * by the radeon vm/hsa code.
4968 */
4969/**
4970 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4971 *
4972 * @rdev: radeon_device pointer
4973 *
4974 * Flush the TLB for the VMID 0 page table (CIK).
4975 */
4976void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4977{
4978 /* flush hdp cache */
4979 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4980
4981 /* bits 0-15 are the VM contexts0-15 */
4982 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4983}
4984
4985/**
4986 * cik_pcie_gart_enable - gart enable
4987 *
4988 * @rdev: radeon_device pointer
4989 *
4990 * This sets up the TLBs, programs the page tables for VMID0,
4991 * sets up the hw for VMIDs 1-15 which are allocated on
4992 * demand, and sets up the global locations for the LDS, GDS,
4993 * and GPUVM for FSA64 clients (CIK).
4994 * Returns 0 for success, errors for failure.
4995 */
4996static int cik_pcie_gart_enable(struct radeon_device *rdev)
4997{
4998 int r, i;
4999
5000 if (rdev->gart.robj == NULL) {
5001 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5002 return -EINVAL;
5003 }
5004 r = radeon_gart_table_vram_pin(rdev);
5005 if (r)
5006 return r;
5007 radeon_gart_restore(rdev);
5008 /* Setup TLB control */
5009 WREG32(MC_VM_MX_L1_TLB_CNTL,
5010 (0xA << 7) |
5011 ENABLE_L1_TLB |
5012 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5013 ENABLE_ADVANCED_DRIVER_MODEL |
5014 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5015 /* Setup L2 cache */
5016 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5017 ENABLE_L2_FRAGMENT_PROCESSING |
5018 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5019 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5020 EFFECTIVE_L2_QUEUE_SIZE(7) |
5021 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5022 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5023 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5024 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5025 /* setup context0 */
5026 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5027 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5028 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5029 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5030 (u32)(rdev->dummy_page.addr >> 12));
5031 WREG32(VM_CONTEXT0_CNTL2, 0);
5032 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5033 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5034
5035 WREG32(0x15D4, 0);
5036 WREG32(0x15D8, 0);
5037 WREG32(0x15DC, 0);
5038
5039 /* empty context1-15 */
5040 /* FIXME start with 4G, once using 2 level pt switch to full
5041 * vm size space
5042 */
5043 /* set vm size, must be a multiple of 4 */
5044 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5045 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5046 for (i = 1; i < 16; i++) {
5047 if (i < 8)
5048 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5049 rdev->gart.table_addr >> 12);
5050 else
5051 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5052 rdev->gart.table_addr >> 12);
5053 }
5054
5055 /* enable context1-15 */
5056 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5057 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 5058 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 5059 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
5060 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5061 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5062 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5063 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5064 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5065 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5066 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5067 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5068 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5069 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5070 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5071 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
5072
5073 /* TC cache setup ??? */
5074 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5075 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5076 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5077
5078 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5079 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5080 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5081 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5082 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5083
5084 WREG32(TC_CFG_L1_VOLATILE, 0);
5085 WREG32(TC_CFG_L2_VOLATILE, 0);
5086
5087 if (rdev->family == CHIP_KAVERI) {
5088 u32 tmp = RREG32(CHUB_CONTROL);
5089 tmp &= ~BYPASS_VM;
5090 WREG32(CHUB_CONTROL, tmp);
5091 }
5092
5093 /* XXX SH_MEM regs */
5094 /* where to put LDS, scratch, GPUVM in FSA64 space */
f61d5b46 5095 mutex_lock(&rdev->srbm_mutex);
1c49165d 5096 for (i = 0; i < 16; i++) {
b556b12e 5097 cik_srbm_select(rdev, 0, 0, 0, i);
21a93e13 5098 /* CP and shaders */
1c49165d
AD
5099 WREG32(SH_MEM_CONFIG, 0);
5100 WREG32(SH_MEM_APE1_BASE, 1);
5101 WREG32(SH_MEM_APE1_LIMIT, 0);
5102 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
5103 /* SDMA GFX */
5104 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5105 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5106 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5107 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5108 /* XXX SDMA RLC - todo */
1c49165d 5109 }
b556b12e 5110 cik_srbm_select(rdev, 0, 0, 0, 0);
f61d5b46 5111 mutex_unlock(&rdev->srbm_mutex);
1c49165d
AD
5112
5113 cik_pcie_gart_tlb_flush(rdev);
5114 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5115 (unsigned)(rdev->mc.gtt_size >> 20),
5116 (unsigned long long)rdev->gart.table_addr);
5117 rdev->gart.ready = true;
5118 return 0;
5119}
5120
5121/**
5122 * cik_pcie_gart_disable - gart disable
5123 *
5124 * @rdev: radeon_device pointer
5125 *
5126 * This disables all VM page table (CIK).
5127 */
5128static void cik_pcie_gart_disable(struct radeon_device *rdev)
5129{
5130 /* Disable all tables */
5131 WREG32(VM_CONTEXT0_CNTL, 0);
5132 WREG32(VM_CONTEXT1_CNTL, 0);
5133 /* Setup TLB control */
5134 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5135 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5136 /* Setup L2 cache */
5137 WREG32(VM_L2_CNTL,
5138 ENABLE_L2_FRAGMENT_PROCESSING |
5139 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5140 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5141 EFFECTIVE_L2_QUEUE_SIZE(7) |
5142 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5143 WREG32(VM_L2_CNTL2, 0);
5144 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5145 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5146 radeon_gart_table_vram_unpin(rdev);
5147}
5148
5149/**
5150 * cik_pcie_gart_fini - vm fini callback
5151 *
5152 * @rdev: radeon_device pointer
5153 *
5154 * Tears down the driver GART/VM setup (CIK).
5155 */
5156static void cik_pcie_gart_fini(struct radeon_device *rdev)
5157{
5158 cik_pcie_gart_disable(rdev);
5159 radeon_gart_table_vram_free(rdev);
5160 radeon_gart_fini(rdev);
5161}
5162
5163/* vm parser */
5164/**
5165 * cik_ib_parse - vm ib_parse callback
5166 *
5167 * @rdev: radeon_device pointer
5168 * @ib: indirect buffer pointer
5169 *
5170 * CIK uses hw IB checking so this is a nop (CIK).
5171 */
5172int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5173{
5174 return 0;
5175}
5176
5177/*
5178 * vm
5179 * VMID 0 is the physical GPU addresses as used by the kernel.
5180 * VMIDs 1-15 are used for userspace clients and are handled
5181 * by the radeon vm/hsa code.
5182 */
5183/**
5184 * cik_vm_init - cik vm init callback
5185 *
5186 * @rdev: radeon_device pointer
5187 *
5188 * Inits cik specific vm parameters (number of VMs, base of vram for
5189 * VMIDs 1-15) (CIK).
5190 * Returns 0 for success.
5191 */
5192int cik_vm_init(struct radeon_device *rdev)
5193{
5194 /* number of VMs */
5195 rdev->vm_manager.nvm = 16;
5196 /* base offset of vram pages */
5197 if (rdev->flags & RADEON_IS_IGP) {
5198 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5199 tmp <<= 22;
5200 rdev->vm_manager.vram_base_offset = tmp;
5201 } else
5202 rdev->vm_manager.vram_base_offset = 0;
5203
5204 return 0;
5205}
5206
5207/**
5208 * cik_vm_fini - cik vm fini callback
5209 *
5210 * @rdev: radeon_device pointer
5211 *
5212 * Tear down any asic specific VM setup (CIK).
5213 */
5214void cik_vm_fini(struct radeon_device *rdev)
5215{
5216}
5217
3ec7d11b
AD
5218/**
5219 * cik_vm_decode_fault - print human readable fault info
5220 *
5221 * @rdev: radeon_device pointer
5222 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5223 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5224 *
5225 * Print human readable fault information (CIK).
5226 */
5227static void cik_vm_decode_fault(struct radeon_device *rdev,
5228 u32 status, u32 addr, u32 mc_client)
5229{
5230 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5231 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5232 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5233 char *block = (char *)&mc_client;
5234
5235 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5236 protections, vmid, addr,
5237 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5238 block, mc_id);
5239}
5240
f96ab484
AD
5241/**
5242 * cik_vm_flush - cik vm flush using the CP
5243 *
5244 * @rdev: radeon_device pointer
5245 *
5246 * Update the page table base and flush the VM TLB
5247 * using the CP (CIK).
5248 */
5249void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5250{
5251 struct radeon_ring *ring = &rdev->ring[ridx];
5252
5253 if (vm == NULL)
5254 return;
5255
5256 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5257 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5258 WRITE_DATA_DST_SEL(0)));
5259 if (vm->id < 8) {
5260 radeon_ring_write(ring,
5261 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5262 } else {
5263 radeon_ring_write(ring,
5264 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5265 }
5266 radeon_ring_write(ring, 0);
5267 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5268
5269 /* update SH_MEM_* regs */
5270 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5271 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5272 WRITE_DATA_DST_SEL(0)));
5273 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5274 radeon_ring_write(ring, 0);
5275 radeon_ring_write(ring, VMID(vm->id));
5276
5277 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5278 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5279 WRITE_DATA_DST_SEL(0)));
5280 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5281 radeon_ring_write(ring, 0);
5282
5283 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5284 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5285 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5286 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5287
5288 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5289 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5290 WRITE_DATA_DST_SEL(0)));
5291 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5292 radeon_ring_write(ring, 0);
5293 radeon_ring_write(ring, VMID(0));
5294
5295 /* HDP flush */
5296 /* We should be using the WAIT_REG_MEM packet here like in
5297 * cik_fence_ring_emit(), but it causes the CP to hang in this
5298 * context...
5299 */
5300 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5301 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5302 WRITE_DATA_DST_SEL(0)));
5303 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5304 radeon_ring_write(ring, 0);
5305 radeon_ring_write(ring, 0);
5306
5307 /* bits 0-15 are the VM contexts0-15 */
5308 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5309 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5310 WRITE_DATA_DST_SEL(0)));
5311 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5312 radeon_ring_write(ring, 0);
5313 radeon_ring_write(ring, 1 << vm->id);
5314
b07fdd38
AD
5315 /* compute doesn't have PFP */
5316 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5317 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5318 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5319 radeon_ring_write(ring, 0x0);
5320 }
f96ab484
AD
5321}
5322
d0e092d9
AD
5323/**
5324 * cik_vm_set_page - update the page tables using sDMA
5325 *
5326 * @rdev: radeon_device pointer
5327 * @ib: indirect buffer to fill with commands
5328 * @pe: addr of the page entry
5329 * @addr: dst addr to write into pe
5330 * @count: number of page entries to update
5331 * @incr: increase next addr by incr bytes
5332 * @flags: access flags
5333 *
5334 * Update the page tables using CP or sDMA (CIK).
5335 */
5336void cik_vm_set_page(struct radeon_device *rdev,
5337 struct radeon_ib *ib,
5338 uint64_t pe,
5339 uint64_t addr, unsigned count,
5340 uint32_t incr, uint32_t flags)
5341{
5342 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5343 uint64_t value;
5344 unsigned ndw;
5345
5346 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5347 /* CP */
5348 while (count) {
5349 ndw = 2 + count * 2;
5350 if (ndw > 0x3FFE)
5351 ndw = 0x3FFE;
5352
5353 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5354 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5355 WRITE_DATA_DST_SEL(1));
5356 ib->ptr[ib->length_dw++] = pe;
5357 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5358 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5359 if (flags & RADEON_VM_PAGE_SYSTEM) {
5360 value = radeon_vm_map_gart(rdev, addr);
5361 value &= 0xFFFFFFFFFFFFF000ULL;
5362 } else if (flags & RADEON_VM_PAGE_VALID) {
5363 value = addr;
5364 } else {
5365 value = 0;
5366 }
5367 addr += incr;
5368 value |= r600_flags;
5369 ib->ptr[ib->length_dw++] = value;
5370 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5371 }
5372 }
5373 } else {
5374 /* DMA */
5375 if (flags & RADEON_VM_PAGE_SYSTEM) {
5376 while (count) {
5377 ndw = count * 2;
5378 if (ndw > 0xFFFFE)
5379 ndw = 0xFFFFE;
5380
5381 /* for non-physically contiguous pages (system) */
5382 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5383 ib->ptr[ib->length_dw++] = pe;
5384 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5385 ib->ptr[ib->length_dw++] = ndw;
5386 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5387 if (flags & RADEON_VM_PAGE_SYSTEM) {
5388 value = radeon_vm_map_gart(rdev, addr);
5389 value &= 0xFFFFFFFFFFFFF000ULL;
5390 } else if (flags & RADEON_VM_PAGE_VALID) {
5391 value = addr;
5392 } else {
5393 value = 0;
5394 }
5395 addr += incr;
5396 value |= r600_flags;
5397 ib->ptr[ib->length_dw++] = value;
5398 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5399 }
5400 }
5401 } else {
5402 while (count) {
5403 ndw = count;
5404 if (ndw > 0x7FFFF)
5405 ndw = 0x7FFFF;
5406
5407 if (flags & RADEON_VM_PAGE_VALID)
5408 value = addr;
5409 else
5410 value = 0;
5411 /* for physically contiguous pages (vram) */
5412 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5413 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5414 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5415 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5416 ib->ptr[ib->length_dw++] = 0;
5417 ib->ptr[ib->length_dw++] = value; /* value */
5418 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5419 ib->ptr[ib->length_dw++] = incr; /* increment size */
5420 ib->ptr[ib->length_dw++] = 0;
5421 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5422 pe += ndw * 8;
5423 addr += ndw * incr;
5424 count -= ndw;
5425 }
5426 }
5427 while (ib->length_dw & 0x7)
5428 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5429 }
5430}
5431
605de6b9
AD
5432/**
5433 * cik_dma_vm_flush - cik vm flush using sDMA
5434 *
5435 * @rdev: radeon_device pointer
5436 *
5437 * Update the page table base and flush the VM TLB
5438 * using sDMA (CIK).
5439 */
5440void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5441{
5442 struct radeon_ring *ring = &rdev->ring[ridx];
5443 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5444 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5445 u32 ref_and_mask;
5446
5447 if (vm == NULL)
5448 return;
5449
5450 if (ridx == R600_RING_TYPE_DMA_INDEX)
5451 ref_and_mask = SDMA0;
5452 else
5453 ref_and_mask = SDMA1;
5454
5455 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5456 if (vm->id < 8) {
5457 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5458 } else {
5459 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5460 }
5461 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5462
5463 /* update SH_MEM_* regs */
5464 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5465 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5466 radeon_ring_write(ring, VMID(vm->id));
5467
5468 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5469 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5470 radeon_ring_write(ring, 0);
5471
5472 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5473 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5474 radeon_ring_write(ring, 0);
5475
5476 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5477 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5478 radeon_ring_write(ring, 1);
5479
5480 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5481 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5482 radeon_ring_write(ring, 0);
5483
5484 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5485 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5486 radeon_ring_write(ring, VMID(0));
5487
5488 /* flush HDP */
5489 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5490 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5491 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5492 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5493 radeon_ring_write(ring, ref_and_mask); /* MASK */
5494 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5495
5496 /* flush TLB */
5497 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5498 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5499 radeon_ring_write(ring, 1 << vm->id);
5500}
5501
f6796cae
AD
5502/*
5503 * RLC
5504 * The RLC is a multi-purpose microengine that handles a
5505 * variety of functions, the most important of which is
5506 * the interrupt controller.
5507 */
866d83de
AD
5508static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5509 bool enable)
f6796cae 5510{
866d83de 5511 u32 tmp = RREG32(CP_INT_CNTL_RING0);
f6796cae 5512
866d83de
AD
5513 if (enable)
5514 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5515 else
5516 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
f6796cae 5517 WREG32(CP_INT_CNTL_RING0, tmp);
866d83de 5518}
f6796cae 5519
866d83de
AD
5520static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5521{
5522 u32 tmp;
f6796cae 5523
866d83de
AD
5524 tmp = RREG32(RLC_LB_CNTL);
5525 if (enable)
5526 tmp |= LOAD_BALANCE_ENABLE;
5527 else
5528 tmp &= ~LOAD_BALANCE_ENABLE;
5529 WREG32(RLC_LB_CNTL, tmp);
5530}
f6796cae 5531
866d83de
AD
5532static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5533{
5534 u32 i, j, k;
5535 u32 mask;
f6796cae
AD
5536
5537 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5538 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5539 cik_select_se_sh(rdev, i, j);
5540 for (k = 0; k < rdev->usec_timeout; k++) {
5541 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5542 break;
5543 udelay(1);
5544 }
5545 }
5546 }
5547 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5548
5549 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5550 for (k = 0; k < rdev->usec_timeout; k++) {
5551 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5552 break;
5553 udelay(1);
5554 }
5555}
5556
22c775ce
AD
5557static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5558{
5559 u32 tmp;
5560
5561 tmp = RREG32(RLC_CNTL);
5562 if (tmp != rlc)
5563 WREG32(RLC_CNTL, rlc);
5564}
5565
5566static u32 cik_halt_rlc(struct radeon_device *rdev)
5567{
5568 u32 data, orig;
5569
5570 orig = data = RREG32(RLC_CNTL);
5571
5572 if (data & RLC_ENABLE) {
5573 u32 i;
5574
5575 data &= ~RLC_ENABLE;
5576 WREG32(RLC_CNTL, data);
5577
5578 for (i = 0; i < rdev->usec_timeout; i++) {
5579 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5580 break;
5581 udelay(1);
5582 }
5583
5584 cik_wait_for_rlc_serdes(rdev);
5585 }
5586
5587 return orig;
5588}
5589
866d83de
AD
5590/**
5591 * cik_rlc_stop - stop the RLC ME
5592 *
5593 * @rdev: radeon_device pointer
5594 *
5595 * Halt the RLC ME (MicroEngine) (CIK).
5596 */
5597static void cik_rlc_stop(struct radeon_device *rdev)
5598{
22c775ce 5599 WREG32(RLC_CNTL, 0);
866d83de
AD
5600
5601 cik_enable_gui_idle_interrupt(rdev, false);
5602
866d83de
AD
5603 cik_wait_for_rlc_serdes(rdev);
5604}
5605
f6796cae
AD
5606/**
5607 * cik_rlc_start - start the RLC ME
5608 *
5609 * @rdev: radeon_device pointer
5610 *
5611 * Unhalt the RLC ME (MicroEngine) (CIK).
5612 */
5613static void cik_rlc_start(struct radeon_device *rdev)
5614{
f6796cae
AD
5615 WREG32(RLC_CNTL, RLC_ENABLE);
5616
866d83de 5617 cik_enable_gui_idle_interrupt(rdev, true);
f6796cae
AD
5618
5619 udelay(50);
5620}
5621
5622/**
5623 * cik_rlc_resume - setup the RLC hw
5624 *
5625 * @rdev: radeon_device pointer
5626 *
5627 * Initialize the RLC registers, load the ucode,
5628 * and start the RLC (CIK).
5629 * Returns 0 for success, -EINVAL if the ucode is not available.
5630 */
5631static int cik_rlc_resume(struct radeon_device *rdev)
5632{
22c775ce 5633 u32 i, size, tmp;
f6796cae
AD
5634 const __be32 *fw_data;
5635
5636 if (!rdev->rlc_fw)
5637 return -EINVAL;
5638
5639 switch (rdev->family) {
5640 case CHIP_BONAIRE:
5641 default:
5642 size = BONAIRE_RLC_UCODE_SIZE;
5643 break;
5644 case CHIP_KAVERI:
5645 size = KV_RLC_UCODE_SIZE;
5646 break;
5647 case CHIP_KABINI:
5648 size = KB_RLC_UCODE_SIZE;
5649 break;
5650 }
5651
5652 cik_rlc_stop(rdev);
5653
22c775ce
AD
5654 /* disable CG */
5655 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5656 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5657
866d83de 5658 si_rlc_reset(rdev);
f6796cae 5659
22c775ce
AD
5660 cik_init_pg(rdev);
5661
5662 cik_init_cg(rdev);
5663
f6796cae
AD
5664 WREG32(RLC_LB_CNTR_INIT, 0);
5665 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5666
5667 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5668 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5669 WREG32(RLC_LB_PARAMS, 0x00600408);
5670 WREG32(RLC_LB_CNTL, 0x80000004);
5671
5672 WREG32(RLC_MC_CNTL, 0);
5673 WREG32(RLC_UCODE_CNTL, 0);
5674
5675 fw_data = (const __be32 *)rdev->rlc_fw->data;
5676 WREG32(RLC_GPM_UCODE_ADDR, 0);
5677 for (i = 0; i < size; i++)
5678 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5679 WREG32(RLC_GPM_UCODE_ADDR, 0);
5680
866d83de
AD
5681 /* XXX - find out what chips support lbpw */
5682 cik_enable_lbpw(rdev, false);
5683
22c775ce
AD
5684 if (rdev->family == CHIP_BONAIRE)
5685 WREG32(RLC_DRIVER_DMA_STATUS, 0);
f6796cae
AD
5686
5687 cik_rlc_start(rdev);
5688
5689 return 0;
5690}
a59781bb 5691
22c775ce
AD
5692static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5693{
5694 u32 data, orig, tmp, tmp2;
5695
5696 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5697
5698 cik_enable_gui_idle_interrupt(rdev, enable);
5699
5700 if (enable) {
5701 tmp = cik_halt_rlc(rdev);
5702
5703 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5704 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5705 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5706 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5707 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5708
5709 cik_update_rlc(rdev, tmp);
5710
5711 data |= CGCG_EN | CGLS_EN;
5712 } else {
5713 RREG32(CB_CGTT_SCLK_CTRL);
5714 RREG32(CB_CGTT_SCLK_CTRL);
5715 RREG32(CB_CGTT_SCLK_CTRL);
5716 RREG32(CB_CGTT_SCLK_CTRL);
5717
5718 data &= ~(CGCG_EN | CGLS_EN);
5719 }
5720
5721 if (orig != data)
5722 WREG32(RLC_CGCG_CGLS_CTRL, data);
5723
5724}
5725
5726static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5727{
5728 u32 data, orig, tmp = 0;
5729
5730 if (enable) {
5731 orig = data = RREG32(CP_MEM_SLP_CNTL);
5732 data |= CP_MEM_LS_EN;
5733 if (orig != data)
5734 WREG32(CP_MEM_SLP_CNTL, data);
5735
5736 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5737 data &= 0xfffffffd;
5738 if (orig != data)
5739 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5740
5741 tmp = cik_halt_rlc(rdev);
5742
5743 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5744 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5745 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5746 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5747 WREG32(RLC_SERDES_WR_CTRL, data);
5748
5749 cik_update_rlc(rdev, tmp);
5750
5751 orig = data = RREG32(CGTS_SM_CTRL_REG);
5752 data &= ~SM_MODE_MASK;
5753 data |= SM_MODE(0x2);
5754 data |= SM_MODE_ENABLE;
5755 data &= ~CGTS_OVERRIDE;
5756 data &= ~CGTS_LS_OVERRIDE;
5757 data &= ~ON_MONITOR_ADD_MASK;
5758 data |= ON_MONITOR_ADD_EN;
5759 data |= ON_MONITOR_ADD(0x96);
5760 if (orig != data)
5761 WREG32(CGTS_SM_CTRL_REG, data);
5762 } else {
5763 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5764 data |= 0x00000002;
5765 if (orig != data)
5766 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5767
5768 data = RREG32(RLC_MEM_SLP_CNTL);
5769 if (data & RLC_MEM_LS_EN) {
5770 data &= ~RLC_MEM_LS_EN;
5771 WREG32(RLC_MEM_SLP_CNTL, data);
5772 }
5773
5774 data = RREG32(CP_MEM_SLP_CNTL);
5775 if (data & CP_MEM_LS_EN) {
5776 data &= ~CP_MEM_LS_EN;
5777 WREG32(CP_MEM_SLP_CNTL, data);
5778 }
5779
5780 orig = data = RREG32(CGTS_SM_CTRL_REG);
5781 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5782 if (orig != data)
5783 WREG32(CGTS_SM_CTRL_REG, data);
5784
5785 tmp = cik_halt_rlc(rdev);
5786
5787 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5788 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5789 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5790 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5791 WREG32(RLC_SERDES_WR_CTRL, data);
5792
5793 cik_update_rlc(rdev, tmp);
5794 }
5795}
5796
5797static const u32 mc_cg_registers[] =
5798{
5799 MC_HUB_MISC_HUB_CG,
5800 MC_HUB_MISC_SIP_CG,
5801 MC_HUB_MISC_VM_CG,
5802 MC_XPB_CLK_GAT,
5803 ATC_MISC_CG,
5804 MC_CITF_MISC_WR_CG,
5805 MC_CITF_MISC_RD_CG,
5806 MC_CITF_MISC_VM_CG,
5807 VM_L2_CG,
5808};
5809
5810static void cik_enable_mc_ls(struct radeon_device *rdev,
5811 bool enable)
5812{
5813 int i;
5814 u32 orig, data;
5815
5816 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5817 orig = data = RREG32(mc_cg_registers[i]);
5818 if (enable)
5819 data |= MC_LS_ENABLE;
5820 else
5821 data &= ~MC_LS_ENABLE;
5822 if (data != orig)
5823 WREG32(mc_cg_registers[i], data);
5824 }
5825}
5826
5827static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5828 bool enable)
5829{
5830 int i;
5831 u32 orig, data;
5832
5833 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5834 orig = data = RREG32(mc_cg_registers[i]);
5835 if (enable)
5836 data |= MC_CG_ENABLE;
5837 else
5838 data &= ~MC_CG_ENABLE;
5839 if (data != orig)
5840 WREG32(mc_cg_registers[i], data);
5841 }
5842}
5843
5844static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5845 bool enable)
5846{
5847 u32 orig, data;
5848
5849 if (enable) {
5850 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5851 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5852 } else {
5853 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5854 data |= 0xff000000;
5855 if (data != orig)
5856 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5857
5858 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5859 data |= 0xff000000;
5860 if (data != orig)
5861 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5862 }
5863}
5864
5865static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5866 bool enable)
5867{
5868 u32 orig, data;
5869
5870 if (enable) {
5871 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5872 data |= 0x100;
5873 if (orig != data)
5874 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5875
5876 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5877 data |= 0x100;
5878 if (orig != data)
5879 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5880 } else {
5881 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5882 data &= ~0x100;
5883 if (orig != data)
5884 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5885
5886 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5887 data &= ~0x100;
5888 if (orig != data)
5889 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5890 }
5891}
5892
5893static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5894 bool enable)
5895{
5896 u32 orig, data;
5897
5898 if (enable) {
5899 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5900 data = 0xfff;
5901 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5902
5903 orig = data = RREG32(UVD_CGC_CTRL);
5904 data |= DCM;
5905 if (orig != data)
5906 WREG32(UVD_CGC_CTRL, data);
5907 } else {
5908 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5909 data &= ~0xfff;
5910 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5911
5912 orig = data = RREG32(UVD_CGC_CTRL);
5913 data &= ~DCM;
5914 if (orig != data)
5915 WREG32(UVD_CGC_CTRL, data);
5916 }
5917}
5918
5919static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5920 bool enable)
5921{
5922 u32 orig, data;
5923
5924 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5925
5926 if (enable)
5927 data &= ~CLOCK_GATING_DIS;
5928 else
5929 data |= CLOCK_GATING_DIS;
5930
5931 if (orig != data)
5932 WREG32(HDP_HOST_PATH_CNTL, data);
5933}
5934
5935static void cik_enable_hdp_ls(struct radeon_device *rdev,
5936 bool enable)
5937{
5938 u32 orig, data;
5939
5940 orig = data = RREG32(HDP_MEM_POWER_LS);
5941
5942 if (enable)
5943 data |= HDP_LS_ENABLE;
5944 else
5945 data &= ~HDP_LS_ENABLE;
5946
5947 if (orig != data)
5948 WREG32(HDP_MEM_POWER_LS, data);
5949}
5950
5951void cik_update_cg(struct radeon_device *rdev,
5952 u32 block, bool enable)
5953{
5954 if (block & RADEON_CG_BLOCK_GFX) {
5955 /* order matters! */
5956 if (enable) {
5957 cik_enable_mgcg(rdev, true);
5958 cik_enable_cgcg(rdev, true);
5959 } else {
5960 cik_enable_cgcg(rdev, false);
5961 cik_enable_mgcg(rdev, false);
5962 }
5963 }
5964
5965 if (block & RADEON_CG_BLOCK_MC) {
5966 if (!(rdev->flags & RADEON_IS_IGP)) {
5967 cik_enable_mc_mgcg(rdev, enable);
5968 cik_enable_mc_ls(rdev, enable);
5969 }
5970 }
5971
5972 if (block & RADEON_CG_BLOCK_SDMA) {
5973 cik_enable_sdma_mgcg(rdev, enable);
5974 cik_enable_sdma_mgls(rdev, enable);
5975 }
5976
5977 if (block & RADEON_CG_BLOCK_UVD) {
5978 if (rdev->has_uvd)
5979 cik_enable_uvd_mgcg(rdev, enable);
5980 }
5981
5982 if (block & RADEON_CG_BLOCK_HDP) {
5983 cik_enable_hdp_mgcg(rdev, enable);
5984 cik_enable_hdp_ls(rdev, enable);
5985 }
5986}
5987
5988static void cik_init_cg(struct radeon_device *rdev)
5989{
5990
5991 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
5992
5993 if (rdev->has_uvd)
5994 si_init_uvd_internal_cg(rdev);
5995
5996 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5997 RADEON_CG_BLOCK_SDMA |
5998 RADEON_CG_BLOCK_UVD |
5999 RADEON_CG_BLOCK_HDP), true);
6000}
6001
6002static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6003 bool enable)
6004{
6005 u32 data, orig;
6006
6007 orig = data = RREG32(RLC_PG_CNTL);
6008 if (enable)
6009 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6010 else
6011 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6012 if (orig != data)
6013 WREG32(RLC_PG_CNTL, data);
6014}
6015
6016static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6017 bool enable)
6018{
6019 u32 data, orig;
6020
6021 orig = data = RREG32(RLC_PG_CNTL);
6022 if (enable)
6023 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6024 else
6025 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6026 if (orig != data)
6027 WREG32(RLC_PG_CNTL, data);
6028}
6029
6030static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6031{
6032 u32 data, orig;
6033
6034 orig = data = RREG32(RLC_PG_CNTL);
6035 if (enable)
6036 data &= ~DISABLE_CP_PG;
6037 else
6038 data |= DISABLE_CP_PG;
6039 if (orig != data)
6040 WREG32(RLC_PG_CNTL, data);
6041}
6042
6043static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6044{
6045 u32 data, orig;
6046
6047 orig = data = RREG32(RLC_PG_CNTL);
6048 if (enable)
6049 data &= ~DISABLE_GDS_PG;
6050 else
6051 data |= DISABLE_GDS_PG;
6052 if (orig != data)
6053 WREG32(RLC_PG_CNTL, data);
6054}
6055
6056#define CP_ME_TABLE_SIZE 96
6057#define CP_ME_TABLE_OFFSET 2048
6058#define CP_MEC_TABLE_OFFSET 4096
6059
6060void cik_init_cp_pg_table(struct radeon_device *rdev)
6061{
6062 const __be32 *fw_data;
6063 volatile u32 *dst_ptr;
6064 int me, i, max_me = 4;
6065 u32 bo_offset = 0;
6066 u32 table_offset;
6067
6068 if (rdev->family == CHIP_KAVERI)
6069 max_me = 5;
6070
6071 if (rdev->rlc.cp_table_ptr == NULL)
6072 return;
6073
6074 /* write the cp table buffer */
6075 dst_ptr = rdev->rlc.cp_table_ptr;
6076 for (me = 0; me < max_me; me++) {
6077 if (me == 0) {
6078 fw_data = (const __be32 *)rdev->ce_fw->data;
6079 table_offset = CP_ME_TABLE_OFFSET;
6080 } else if (me == 1) {
6081 fw_data = (const __be32 *)rdev->pfp_fw->data;
6082 table_offset = CP_ME_TABLE_OFFSET;
6083 } else if (me == 2) {
6084 fw_data = (const __be32 *)rdev->me_fw->data;
6085 table_offset = CP_ME_TABLE_OFFSET;
6086 } else {
6087 fw_data = (const __be32 *)rdev->mec_fw->data;
6088 table_offset = CP_MEC_TABLE_OFFSET;
6089 }
6090
6091 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6092 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6093 }
6094 bo_offset += CP_ME_TABLE_SIZE;
6095 }
6096}
6097
6098static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6099 bool enable)
6100{
6101 u32 data, orig;
6102
6103 if (enable) {
6104 orig = data = RREG32(RLC_PG_CNTL);
6105 data |= GFX_PG_ENABLE;
6106 if (orig != data)
6107 WREG32(RLC_PG_CNTL, data);
6108
6109 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6110 data |= AUTO_PG_EN;
6111 if (orig != data)
6112 WREG32(RLC_AUTO_PG_CTRL, data);
6113 } else {
6114 orig = data = RREG32(RLC_PG_CNTL);
6115 data &= ~GFX_PG_ENABLE;
6116 if (orig != data)
6117 WREG32(RLC_PG_CNTL, data);
6118
6119 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6120 data &= ~AUTO_PG_EN;
6121 if (orig != data)
6122 WREG32(RLC_AUTO_PG_CTRL, data);
6123
6124 data = RREG32(DB_RENDER_CONTROL);
6125 }
6126}
6127
6128static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6129{
6130 u32 mask = 0, tmp, tmp1;
6131 int i;
6132
6133 cik_select_se_sh(rdev, se, sh);
6134 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6135 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6136 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6137
6138 tmp &= 0xffff0000;
6139
6140 tmp |= tmp1;
6141 tmp >>= 16;
6142
6143 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6144 mask <<= 1;
6145 mask |= 1;
6146 }
6147
6148 return (~tmp) & mask;
6149}
6150
6151static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6152{
6153 u32 i, j, k, active_cu_number = 0;
6154 u32 mask, counter, cu_bitmap;
6155 u32 tmp = 0;
6156
6157 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6158 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6159 mask = 1;
6160 cu_bitmap = 0;
6161 counter = 0;
6162 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6163 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6164 if (counter < 2)
6165 cu_bitmap |= mask;
6166 counter ++;
6167 }
6168 mask <<= 1;
6169 }
6170
6171 active_cu_number += counter;
6172 tmp |= (cu_bitmap << (i * 16 + j * 8));
6173 }
6174 }
6175
6176 WREG32(RLC_PG_AO_CU_MASK, tmp);
6177
6178 tmp = RREG32(RLC_MAX_PG_CU);
6179 tmp &= ~MAX_PU_CU_MASK;
6180 tmp |= MAX_PU_CU(active_cu_number);
6181 WREG32(RLC_MAX_PG_CU, tmp);
6182}
6183
6184static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6185 bool enable)
6186{
6187 u32 data, orig;
6188
6189 orig = data = RREG32(RLC_PG_CNTL);
6190 if (enable)
6191 data |= STATIC_PER_CU_PG_ENABLE;
6192 else
6193 data &= ~STATIC_PER_CU_PG_ENABLE;
6194 if (orig != data)
6195 WREG32(RLC_PG_CNTL, data);
6196}
6197
6198static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6199 bool enable)
6200{
6201 u32 data, orig;
6202
6203 orig = data = RREG32(RLC_PG_CNTL);
6204 if (enable)
6205 data |= DYN_PER_CU_PG_ENABLE;
6206 else
6207 data &= ~DYN_PER_CU_PG_ENABLE;
6208 if (orig != data)
6209 WREG32(RLC_PG_CNTL, data);
6210}
6211
6212#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6213#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6214
6215static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6216{
6217 u32 data, orig;
6218 u32 i;
6219
6220 if (rdev->rlc.cs_data) {
6221 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6222 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6223 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6224 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6225 } else {
6226 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6227 for (i = 0; i < 3; i++)
6228 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6229 }
6230 if (rdev->rlc.reg_list) {
6231 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6232 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6233 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6234 }
6235
6236 orig = data = RREG32(RLC_PG_CNTL);
6237 data |= GFX_PG_SRC;
6238 if (orig != data)
6239 WREG32(RLC_PG_CNTL, data);
6240
6241 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6242 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6243
6244 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6245 data &= ~IDLE_POLL_COUNT_MASK;
6246 data |= IDLE_POLL_COUNT(0x60);
6247 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6248
6249 data = 0x10101010;
6250 WREG32(RLC_PG_DELAY, data);
6251
6252 data = RREG32(RLC_PG_DELAY_2);
6253 data &= ~0xff;
6254 data |= 0x3;
6255 WREG32(RLC_PG_DELAY_2, data);
6256
6257 data = RREG32(RLC_AUTO_PG_CTRL);
6258 data &= ~GRBM_REG_SGIT_MASK;
6259 data |= GRBM_REG_SGIT(0x700);
6260 WREG32(RLC_AUTO_PG_CTRL, data);
6261
6262}
6263
6264static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6265{
6266 bool has_pg = false;
6267 bool has_dyn_mgpg = false;
6268 bool has_static_mgpg = false;
6269
6270 /* only APUs have PG */
6271 if (rdev->flags & RADEON_IS_IGP) {
6272 has_pg = true;
6273 has_static_mgpg = true;
6274 if (rdev->family == CHIP_KAVERI)
6275 has_dyn_mgpg = true;
6276 }
6277
6278 if (has_pg) {
6279 cik_enable_gfx_cgpg(rdev, enable);
6280 if (enable) {
6281 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6282 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6283 } else {
6284 cik_enable_gfx_static_mgpg(rdev, false);
6285 cik_enable_gfx_dynamic_mgpg(rdev, false);
6286 }
6287 }
6288
6289}
6290
6291void cik_init_pg(struct radeon_device *rdev)
6292{
6293 bool has_pg = false;
6294
6295 /* only APUs have PG */
6296 if (rdev->flags & RADEON_IS_IGP) {
6297 /* XXX disable this for now */
6298 /* has_pg = true; */
6299 }
6300
6301 if (has_pg) {
6302 cik_enable_sck_slowdown_on_pu(rdev, true);
6303 cik_enable_sck_slowdown_on_pd(rdev, true);
6304 cik_init_gfx_cgpg(rdev);
6305 cik_enable_cp_pg(rdev, true);
6306 cik_enable_gds_pg(rdev, true);
6307 cik_init_ao_cu_mask(rdev);
6308 cik_update_gfx_pg(rdev, true);
6309 }
6310}
6311
a59781bb
AD
6312/*
6313 * Interrupts
6314 * Starting with r6xx, interrupts are handled via a ring buffer.
6315 * Ring buffers are areas of GPU accessible memory that the GPU
6316 * writes interrupt vectors into and the host reads vectors out of.
6317 * There is a rptr (read pointer) that determines where the
6318 * host is currently reading, and a wptr (write pointer)
6319 * which determines where the GPU has written. When the
6320 * pointers are equal, the ring is idle. When the GPU
6321 * writes vectors to the ring buffer, it increments the
6322 * wptr. When there is an interrupt, the host then starts
6323 * fetching commands and processing them until the pointers are
6324 * equal again at which point it updates the rptr.
6325 */
6326
6327/**
6328 * cik_enable_interrupts - Enable the interrupt ring buffer
6329 *
6330 * @rdev: radeon_device pointer
6331 *
6332 * Enable the interrupt ring buffer (CIK).
6333 */
6334static void cik_enable_interrupts(struct radeon_device *rdev)
6335{
6336 u32 ih_cntl = RREG32(IH_CNTL);
6337 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6338
6339 ih_cntl |= ENABLE_INTR;
6340 ih_rb_cntl |= IH_RB_ENABLE;
6341 WREG32(IH_CNTL, ih_cntl);
6342 WREG32(IH_RB_CNTL, ih_rb_cntl);
6343 rdev->ih.enabled = true;
6344}
6345
6346/**
6347 * cik_disable_interrupts - Disable the interrupt ring buffer
6348 *
6349 * @rdev: radeon_device pointer
6350 *
6351 * Disable the interrupt ring buffer (CIK).
6352 */
6353static void cik_disable_interrupts(struct radeon_device *rdev)
6354{
6355 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6356 u32 ih_cntl = RREG32(IH_CNTL);
6357
6358 ih_rb_cntl &= ~IH_RB_ENABLE;
6359 ih_cntl &= ~ENABLE_INTR;
6360 WREG32(IH_RB_CNTL, ih_rb_cntl);
6361 WREG32(IH_CNTL, ih_cntl);
6362 /* set rptr, wptr to 0 */
6363 WREG32(IH_RB_RPTR, 0);
6364 WREG32(IH_RB_WPTR, 0);
6365 rdev->ih.enabled = false;
6366 rdev->ih.rptr = 0;
6367}
6368
6369/**
6370 * cik_disable_interrupt_state - Disable all interrupt sources
6371 *
6372 * @rdev: radeon_device pointer
6373 *
6374 * Clear all interrupt enable bits used by the driver (CIK).
6375 */
6376static void cik_disable_interrupt_state(struct radeon_device *rdev)
6377{
6378 u32 tmp;
6379
6380 /* gfx ring */
6381 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
6382 /* sdma */
6383 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6384 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6385 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6386 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
6387 /* compute queues */
6388 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6389 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6390 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6391 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6392 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6393 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6394 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6395 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6396 /* grbm */
6397 WREG32(GRBM_INT_CNTL, 0);
6398 /* vline/vblank, etc. */
6399 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6400 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6401 if (rdev->num_crtc >= 4) {
6402 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6403 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6404 }
6405 if (rdev->num_crtc >= 6) {
6406 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6407 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6408 }
6409
6410 /* dac hotplug */
6411 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6412
6413 /* digital hotplug */
6414 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6415 WREG32(DC_HPD1_INT_CONTROL, tmp);
6416 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6417 WREG32(DC_HPD2_INT_CONTROL, tmp);
6418 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6419 WREG32(DC_HPD3_INT_CONTROL, tmp);
6420 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6421 WREG32(DC_HPD4_INT_CONTROL, tmp);
6422 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6423 WREG32(DC_HPD5_INT_CONTROL, tmp);
6424 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6425 WREG32(DC_HPD6_INT_CONTROL, tmp);
6426
6427}
6428
6429/**
6430 * cik_irq_init - init and enable the interrupt ring
6431 *
6432 * @rdev: radeon_device pointer
6433 *
6434 * Allocate a ring buffer for the interrupt controller,
6435 * enable the RLC, disable interrupts, enable the IH
6436 * ring buffer and enable it (CIK).
6437 * Called at device load and reume.
6438 * Returns 0 for success, errors for failure.
6439 */
6440static int cik_irq_init(struct radeon_device *rdev)
6441{
6442 int ret = 0;
6443 int rb_bufsz;
6444 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6445
6446 /* allocate ring */
6447 ret = r600_ih_ring_alloc(rdev);
6448 if (ret)
6449 return ret;
6450
6451 /* disable irqs */
6452 cik_disable_interrupts(rdev);
6453
6454 /* init rlc */
6455 ret = cik_rlc_resume(rdev);
6456 if (ret) {
6457 r600_ih_ring_fini(rdev);
6458 return ret;
6459 }
6460
6461 /* setup interrupt control */
6462 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6463 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6464 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6465 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6466 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6467 */
6468 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6469 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6470 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6471 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6472
6473 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6474 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6475
6476 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6477 IH_WPTR_OVERFLOW_CLEAR |
6478 (rb_bufsz << 1));
6479
6480 if (rdev->wb.enabled)
6481 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6482
6483 /* set the writeback address whether it's enabled or not */
6484 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6485 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6486
6487 WREG32(IH_RB_CNTL, ih_rb_cntl);
6488
6489 /* set rptr, wptr to 0 */
6490 WREG32(IH_RB_RPTR, 0);
6491 WREG32(IH_RB_WPTR, 0);
6492
6493 /* Default settings for IH_CNTL (disabled at first) */
6494 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6495 /* RPTR_REARM only works if msi's are enabled */
6496 if (rdev->msi_enabled)
6497 ih_cntl |= RPTR_REARM;
6498 WREG32(IH_CNTL, ih_cntl);
6499
6500 /* force the active interrupt state to all disabled */
6501 cik_disable_interrupt_state(rdev);
6502
6503 pci_set_master(rdev->pdev);
6504
6505 /* enable irqs */
6506 cik_enable_interrupts(rdev);
6507
6508 return ret;
6509}
6510
6511/**
6512 * cik_irq_set - enable/disable interrupt sources
6513 *
6514 * @rdev: radeon_device pointer
6515 *
6516 * Enable interrupt sources on the GPU (vblanks, hpd,
6517 * etc.) (CIK).
6518 * Returns 0 for success, errors for failure.
6519 */
6520int cik_irq_set(struct radeon_device *rdev)
6521{
6522 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6523 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
2b0781a6
AD
6524 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6525 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
a59781bb
AD
6526 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6527 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6528 u32 grbm_int_cntl = 0;
21a93e13 6529 u32 dma_cntl, dma_cntl1;
a59781bb
AD
6530
6531 if (!rdev->irq.installed) {
6532 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6533 return -EINVAL;
6534 }
6535 /* don't enable anything if the ih is disabled */
6536 if (!rdev->ih.enabled) {
6537 cik_disable_interrupts(rdev);
6538 /* force the active interrupt state to all disabled */
6539 cik_disable_interrupt_state(rdev);
6540 return 0;
6541 }
6542
6543 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6544 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6545 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6546 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6547 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6548 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6549
21a93e13
AD
6550 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6551 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6552
2b0781a6
AD
6553 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6554 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6555 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6556 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6557 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6558 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6559 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6560 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6561
a59781bb
AD
6562 /* enable CP interrupts on all rings */
6563 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6564 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6565 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6566 }
2b0781a6
AD
6567 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6568 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6569 DRM_DEBUG("si_irq_set: sw int cp1\n");
6570 if (ring->me == 1) {
6571 switch (ring->pipe) {
6572 case 0:
6573 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6574 break;
6575 case 1:
6576 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6577 break;
6578 case 2:
6579 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6580 break;
6581 case 3:
6582 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6583 break;
6584 default:
6585 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6586 break;
6587 }
6588 } else if (ring->me == 2) {
6589 switch (ring->pipe) {
6590 case 0:
6591 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6592 break;
6593 case 1:
6594 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6595 break;
6596 case 2:
6597 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6598 break;
6599 case 3:
6600 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6601 break;
6602 default:
6603 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6604 break;
6605 }
6606 } else {
6607 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6608 }
6609 }
6610 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6611 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6612 DRM_DEBUG("si_irq_set: sw int cp2\n");
6613 if (ring->me == 1) {
6614 switch (ring->pipe) {
6615 case 0:
6616 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6617 break;
6618 case 1:
6619 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6620 break;
6621 case 2:
6622 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6623 break;
6624 case 3:
6625 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6626 break;
6627 default:
6628 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6629 break;
6630 }
6631 } else if (ring->me == 2) {
6632 switch (ring->pipe) {
6633 case 0:
6634 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6635 break;
6636 case 1:
6637 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6638 break;
6639 case 2:
6640 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6641 break;
6642 case 3:
6643 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6644 break;
6645 default:
6646 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6647 break;
6648 }
6649 } else {
6650 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6651 }
6652 }
a59781bb 6653
21a93e13
AD
6654 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6655 DRM_DEBUG("cik_irq_set: sw int dma\n");
6656 dma_cntl |= TRAP_ENABLE;
6657 }
6658
6659 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6660 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6661 dma_cntl1 |= TRAP_ENABLE;
6662 }
6663
a59781bb
AD
6664 if (rdev->irq.crtc_vblank_int[0] ||
6665 atomic_read(&rdev->irq.pflip[0])) {
6666 DRM_DEBUG("cik_irq_set: vblank 0\n");
6667 crtc1 |= VBLANK_INTERRUPT_MASK;
6668 }
6669 if (rdev->irq.crtc_vblank_int[1] ||
6670 atomic_read(&rdev->irq.pflip[1])) {
6671 DRM_DEBUG("cik_irq_set: vblank 1\n");
6672 crtc2 |= VBLANK_INTERRUPT_MASK;
6673 }
6674 if (rdev->irq.crtc_vblank_int[2] ||
6675 atomic_read(&rdev->irq.pflip[2])) {
6676 DRM_DEBUG("cik_irq_set: vblank 2\n");
6677 crtc3 |= VBLANK_INTERRUPT_MASK;
6678 }
6679 if (rdev->irq.crtc_vblank_int[3] ||
6680 atomic_read(&rdev->irq.pflip[3])) {
6681 DRM_DEBUG("cik_irq_set: vblank 3\n");
6682 crtc4 |= VBLANK_INTERRUPT_MASK;
6683 }
6684 if (rdev->irq.crtc_vblank_int[4] ||
6685 atomic_read(&rdev->irq.pflip[4])) {
6686 DRM_DEBUG("cik_irq_set: vblank 4\n");
6687 crtc5 |= VBLANK_INTERRUPT_MASK;
6688 }
6689 if (rdev->irq.crtc_vblank_int[5] ||
6690 atomic_read(&rdev->irq.pflip[5])) {
6691 DRM_DEBUG("cik_irq_set: vblank 5\n");
6692 crtc6 |= VBLANK_INTERRUPT_MASK;
6693 }
6694 if (rdev->irq.hpd[0]) {
6695 DRM_DEBUG("cik_irq_set: hpd 1\n");
6696 hpd1 |= DC_HPDx_INT_EN;
6697 }
6698 if (rdev->irq.hpd[1]) {
6699 DRM_DEBUG("cik_irq_set: hpd 2\n");
6700 hpd2 |= DC_HPDx_INT_EN;
6701 }
6702 if (rdev->irq.hpd[2]) {
6703 DRM_DEBUG("cik_irq_set: hpd 3\n");
6704 hpd3 |= DC_HPDx_INT_EN;
6705 }
6706 if (rdev->irq.hpd[3]) {
6707 DRM_DEBUG("cik_irq_set: hpd 4\n");
6708 hpd4 |= DC_HPDx_INT_EN;
6709 }
6710 if (rdev->irq.hpd[4]) {
6711 DRM_DEBUG("cik_irq_set: hpd 5\n");
6712 hpd5 |= DC_HPDx_INT_EN;
6713 }
6714 if (rdev->irq.hpd[5]) {
6715 DRM_DEBUG("cik_irq_set: hpd 6\n");
6716 hpd6 |= DC_HPDx_INT_EN;
6717 }
6718
6719 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6720
21a93e13
AD
6721 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6722 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6723
2b0781a6
AD
6724 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6725 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6726 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6727 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6728 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6729 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6730 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6731 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6732
a59781bb
AD
6733 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6734
6735 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6736 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6737 if (rdev->num_crtc >= 4) {
6738 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6739 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6740 }
6741 if (rdev->num_crtc >= 6) {
6742 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6743 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6744 }
6745
6746 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6747 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6748 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6749 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6750 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6751 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6752
6753 return 0;
6754}
6755
6756/**
6757 * cik_irq_ack - ack interrupt sources
6758 *
6759 * @rdev: radeon_device pointer
6760 *
6761 * Ack interrupt sources on the GPU (vblanks, hpd,
6762 * etc.) (CIK). Certain interrupts sources are sw
6763 * generated and do not require an explicit ack.
6764 */
6765static inline void cik_irq_ack(struct radeon_device *rdev)
6766{
6767 u32 tmp;
6768
6769 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6770 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6771 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6772 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6773 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6774 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6775 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6776
6777 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6778 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6779 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6780 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6781 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6782 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6783 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6784 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6785
6786 if (rdev->num_crtc >= 4) {
6787 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6788 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6789 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6790 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6791 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6792 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6793 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6794 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6795 }
6796
6797 if (rdev->num_crtc >= 6) {
6798 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6799 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6800 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6801 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6802 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6803 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6804 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6805 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6806 }
6807
6808 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6809 tmp = RREG32(DC_HPD1_INT_CONTROL);
6810 tmp |= DC_HPDx_INT_ACK;
6811 WREG32(DC_HPD1_INT_CONTROL, tmp);
6812 }
6813 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6814 tmp = RREG32(DC_HPD2_INT_CONTROL);
6815 tmp |= DC_HPDx_INT_ACK;
6816 WREG32(DC_HPD2_INT_CONTROL, tmp);
6817 }
6818 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6819 tmp = RREG32(DC_HPD3_INT_CONTROL);
6820 tmp |= DC_HPDx_INT_ACK;
6821 WREG32(DC_HPD3_INT_CONTROL, tmp);
6822 }
6823 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6824 tmp = RREG32(DC_HPD4_INT_CONTROL);
6825 tmp |= DC_HPDx_INT_ACK;
6826 WREG32(DC_HPD4_INT_CONTROL, tmp);
6827 }
6828 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6829 tmp = RREG32(DC_HPD5_INT_CONTROL);
6830 tmp |= DC_HPDx_INT_ACK;
6831 WREG32(DC_HPD5_INT_CONTROL, tmp);
6832 }
6833 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6834 tmp = RREG32(DC_HPD5_INT_CONTROL);
6835 tmp |= DC_HPDx_INT_ACK;
6836 WREG32(DC_HPD6_INT_CONTROL, tmp);
6837 }
6838}
6839
6840/**
6841 * cik_irq_disable - disable interrupts
6842 *
6843 * @rdev: radeon_device pointer
6844 *
6845 * Disable interrupts on the hw (CIK).
6846 */
6847static void cik_irq_disable(struct radeon_device *rdev)
6848{
6849 cik_disable_interrupts(rdev);
6850 /* Wait and acknowledge irq */
6851 mdelay(1);
6852 cik_irq_ack(rdev);
6853 cik_disable_interrupt_state(rdev);
6854}
6855
6856/**
6857 * cik_irq_disable - disable interrupts for suspend
6858 *
6859 * @rdev: radeon_device pointer
6860 *
6861 * Disable interrupts and stop the RLC (CIK).
6862 * Used for suspend.
6863 */
6864static void cik_irq_suspend(struct radeon_device *rdev)
6865{
6866 cik_irq_disable(rdev);
6867 cik_rlc_stop(rdev);
6868}
6869
6870/**
6871 * cik_irq_fini - tear down interrupt support
6872 *
6873 * @rdev: radeon_device pointer
6874 *
6875 * Disable interrupts on the hw and free the IH ring
6876 * buffer (CIK).
6877 * Used for driver unload.
6878 */
6879static void cik_irq_fini(struct radeon_device *rdev)
6880{
6881 cik_irq_suspend(rdev);
6882 r600_ih_ring_fini(rdev);
6883}
6884
6885/**
6886 * cik_get_ih_wptr - get the IH ring buffer wptr
6887 *
6888 * @rdev: radeon_device pointer
6889 *
6890 * Get the IH ring buffer wptr from either the register
6891 * or the writeback memory buffer (CIK). Also check for
6892 * ring buffer overflow and deal with it.
6893 * Used by cik_irq_process().
6894 * Returns the value of the wptr.
6895 */
6896static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6897{
6898 u32 wptr, tmp;
6899
6900 if (rdev->wb.enabled)
6901 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6902 else
6903 wptr = RREG32(IH_RB_WPTR);
6904
6905 if (wptr & RB_OVERFLOW) {
6906 /* When a ring buffer overflow happen start parsing interrupt
6907 * from the last not overwritten vector (wptr + 16). Hopefully
6908 * this should allow us to catchup.
6909 */
6910 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6911 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6912 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6913 tmp = RREG32(IH_RB_CNTL);
6914 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6915 WREG32(IH_RB_CNTL, tmp);
6916 }
6917 return (wptr & rdev->ih.ptr_mask);
6918}
6919
6920/* CIK IV Ring
6921 * Each IV ring entry is 128 bits:
6922 * [7:0] - interrupt source id
6923 * [31:8] - reserved
6924 * [59:32] - interrupt source data
6925 * [63:60] - reserved
21a93e13
AD
6926 * [71:64] - RINGID
6927 * CP:
6928 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
6929 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6930 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6931 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6932 * PIPE_ID - ME0 0=3D
6933 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
6934 * SDMA:
6935 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6936 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6937 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
6938 * [79:72] - VMID
6939 * [95:80] - PASID
6940 * [127:96] - reserved
6941 */
6942/**
6943 * cik_irq_process - interrupt handler
6944 *
6945 * @rdev: radeon_device pointer
6946 *
6947 * Interrupt hander (CIK). Walk the IH ring,
6948 * ack interrupts and schedule work to handle
6949 * interrupt events.
6950 * Returns irq process return code.
6951 */
6952int cik_irq_process(struct radeon_device *rdev)
6953{
2b0781a6
AD
6954 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6955 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
a59781bb
AD
6956 u32 wptr;
6957 u32 rptr;
6958 u32 src_id, src_data, ring_id;
6959 u8 me_id, pipe_id, queue_id;
6960 u32 ring_index;
6961 bool queue_hotplug = false;
6962 bool queue_reset = false;
3ec7d11b 6963 u32 addr, status, mc_client;
a59781bb
AD
6964
6965 if (!rdev->ih.enabled || rdev->shutdown)
6966 return IRQ_NONE;
6967
6968 wptr = cik_get_ih_wptr(rdev);
6969
6970restart_ih:
6971 /* is somebody else already processing irqs? */
6972 if (atomic_xchg(&rdev->ih.lock, 1))
6973 return IRQ_NONE;
6974
6975 rptr = rdev->ih.rptr;
6976 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6977
6978 /* Order reading of wptr vs. reading of IH ring data */
6979 rmb();
6980
6981 /* display interrupts */
6982 cik_irq_ack(rdev);
6983
6984 while (rptr != wptr) {
6985 /* wptr/rptr are in bytes! */
6986 ring_index = rptr / 4;
6987 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6988 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6989 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
6990
6991 switch (src_id) {
6992 case 1: /* D1 vblank/vline */
6993 switch (src_data) {
6994 case 0: /* D1 vblank */
6995 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6996 if (rdev->irq.crtc_vblank_int[0]) {
6997 drm_handle_vblank(rdev->ddev, 0);
6998 rdev->pm.vblank_sync = true;
6999 wake_up(&rdev->irq.vblank_queue);
7000 }
7001 if (atomic_read(&rdev->irq.pflip[0]))
7002 radeon_crtc_handle_flip(rdev, 0);
7003 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7004 DRM_DEBUG("IH: D1 vblank\n");
7005 }
7006 break;
7007 case 1: /* D1 vline */
7008 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7009 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7010 DRM_DEBUG("IH: D1 vline\n");
7011 }
7012 break;
7013 default:
7014 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7015 break;
7016 }
7017 break;
7018 case 2: /* D2 vblank/vline */
7019 switch (src_data) {
7020 case 0: /* D2 vblank */
7021 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7022 if (rdev->irq.crtc_vblank_int[1]) {
7023 drm_handle_vblank(rdev->ddev, 1);
7024 rdev->pm.vblank_sync = true;
7025 wake_up(&rdev->irq.vblank_queue);
7026 }
7027 if (atomic_read(&rdev->irq.pflip[1]))
7028 radeon_crtc_handle_flip(rdev, 1);
7029 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7030 DRM_DEBUG("IH: D2 vblank\n");
7031 }
7032 break;
7033 case 1: /* D2 vline */
7034 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7035 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7036 DRM_DEBUG("IH: D2 vline\n");
7037 }
7038 break;
7039 default:
7040 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7041 break;
7042 }
7043 break;
7044 case 3: /* D3 vblank/vline */
7045 switch (src_data) {
7046 case 0: /* D3 vblank */
7047 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7048 if (rdev->irq.crtc_vblank_int[2]) {
7049 drm_handle_vblank(rdev->ddev, 2);
7050 rdev->pm.vblank_sync = true;
7051 wake_up(&rdev->irq.vblank_queue);
7052 }
7053 if (atomic_read(&rdev->irq.pflip[2]))
7054 radeon_crtc_handle_flip(rdev, 2);
7055 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7056 DRM_DEBUG("IH: D3 vblank\n");
7057 }
7058 break;
7059 case 1: /* D3 vline */
7060 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7061 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7062 DRM_DEBUG("IH: D3 vline\n");
7063 }
7064 break;
7065 default:
7066 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7067 break;
7068 }
7069 break;
7070 case 4: /* D4 vblank/vline */
7071 switch (src_data) {
7072 case 0: /* D4 vblank */
7073 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7074 if (rdev->irq.crtc_vblank_int[3]) {
7075 drm_handle_vblank(rdev->ddev, 3);
7076 rdev->pm.vblank_sync = true;
7077 wake_up(&rdev->irq.vblank_queue);
7078 }
7079 if (atomic_read(&rdev->irq.pflip[3]))
7080 radeon_crtc_handle_flip(rdev, 3);
7081 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7082 DRM_DEBUG("IH: D4 vblank\n");
7083 }
7084 break;
7085 case 1: /* D4 vline */
7086 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7087 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7088 DRM_DEBUG("IH: D4 vline\n");
7089 }
7090 break;
7091 default:
7092 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7093 break;
7094 }
7095 break;
7096 case 5: /* D5 vblank/vline */
7097 switch (src_data) {
7098 case 0: /* D5 vblank */
7099 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7100 if (rdev->irq.crtc_vblank_int[4]) {
7101 drm_handle_vblank(rdev->ddev, 4);
7102 rdev->pm.vblank_sync = true;
7103 wake_up(&rdev->irq.vblank_queue);
7104 }
7105 if (atomic_read(&rdev->irq.pflip[4]))
7106 radeon_crtc_handle_flip(rdev, 4);
7107 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7108 DRM_DEBUG("IH: D5 vblank\n");
7109 }
7110 break;
7111 case 1: /* D5 vline */
7112 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7113 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7114 DRM_DEBUG("IH: D5 vline\n");
7115 }
7116 break;
7117 default:
7118 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7119 break;
7120 }
7121 break;
7122 case 6: /* D6 vblank/vline */
7123 switch (src_data) {
7124 case 0: /* D6 vblank */
7125 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7126 if (rdev->irq.crtc_vblank_int[5]) {
7127 drm_handle_vblank(rdev->ddev, 5);
7128 rdev->pm.vblank_sync = true;
7129 wake_up(&rdev->irq.vblank_queue);
7130 }
7131 if (atomic_read(&rdev->irq.pflip[5]))
7132 radeon_crtc_handle_flip(rdev, 5);
7133 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7134 DRM_DEBUG("IH: D6 vblank\n");
7135 }
7136 break;
7137 case 1: /* D6 vline */
7138 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7139 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7140 DRM_DEBUG("IH: D6 vline\n");
7141 }
7142 break;
7143 default:
7144 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7145 break;
7146 }
7147 break;
7148 case 42: /* HPD hotplug */
7149 switch (src_data) {
7150 case 0:
7151 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7152 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7153 queue_hotplug = true;
7154 DRM_DEBUG("IH: HPD1\n");
7155 }
7156 break;
7157 case 1:
7158 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7159 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7160 queue_hotplug = true;
7161 DRM_DEBUG("IH: HPD2\n");
7162 }
7163 break;
7164 case 2:
7165 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7166 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7167 queue_hotplug = true;
7168 DRM_DEBUG("IH: HPD3\n");
7169 }
7170 break;
7171 case 3:
7172 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7173 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7174 queue_hotplug = true;
7175 DRM_DEBUG("IH: HPD4\n");
7176 }
7177 break;
7178 case 4:
7179 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7180 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7181 queue_hotplug = true;
7182 DRM_DEBUG("IH: HPD5\n");
7183 }
7184 break;
7185 case 5:
7186 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7187 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7188 queue_hotplug = true;
7189 DRM_DEBUG("IH: HPD6\n");
7190 }
7191 break;
7192 default:
7193 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7194 break;
7195 }
7196 break;
9d97c99b
AD
7197 case 146:
7198 case 147:
3ec7d11b
AD
7199 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7200 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7201 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
9d97c99b
AD
7202 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7203 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3ec7d11b 7204 addr);
9d97c99b 7205 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3ec7d11b
AD
7206 status);
7207 cik_vm_decode_fault(rdev, status, addr, mc_client);
9d97c99b
AD
7208 /* reset addr and status */
7209 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7210 break;
a59781bb
AD
7211 case 176: /* GFX RB CP_INT */
7212 case 177: /* GFX IB CP_INT */
7213 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7214 break;
7215 case 181: /* CP EOP event */
7216 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
7217 /* XXX check the bitfield order! */
7218 me_id = (ring_id & 0x60) >> 5;
7219 pipe_id = (ring_id & 0x18) >> 3;
7220 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7221 switch (me_id) {
7222 case 0:
7223 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7224 break;
7225 case 1:
a59781bb 7226 case 2:
2b0781a6
AD
7227 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7228 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7229 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7230 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
a59781bb
AD
7231 break;
7232 }
7233 break;
7234 case 184: /* CP Privileged reg access */
7235 DRM_ERROR("Illegal register access in command stream\n");
7236 /* XXX check the bitfield order! */
7237 me_id = (ring_id & 0x60) >> 5;
7238 pipe_id = (ring_id & 0x18) >> 3;
7239 queue_id = (ring_id & 0x7) >> 0;
7240 switch (me_id) {
7241 case 0:
7242 /* This results in a full GPU reset, but all we need to do is soft
7243 * reset the CP for gfx
7244 */
7245 queue_reset = true;
7246 break;
7247 case 1:
7248 /* XXX compute */
2b0781a6 7249 queue_reset = true;
a59781bb
AD
7250 break;
7251 case 2:
7252 /* XXX compute */
2b0781a6 7253 queue_reset = true;
a59781bb
AD
7254 break;
7255 }
7256 break;
7257 case 185: /* CP Privileged inst */
7258 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
7259 /* XXX check the bitfield order! */
7260 me_id = (ring_id & 0x60) >> 5;
7261 pipe_id = (ring_id & 0x18) >> 3;
7262 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
7263 switch (me_id) {
7264 case 0:
7265 /* This results in a full GPU reset, but all we need to do is soft
7266 * reset the CP for gfx
7267 */
7268 queue_reset = true;
7269 break;
7270 case 1:
7271 /* XXX compute */
2b0781a6 7272 queue_reset = true;
a59781bb
AD
7273 break;
7274 case 2:
7275 /* XXX compute */
2b0781a6 7276 queue_reset = true;
a59781bb
AD
7277 break;
7278 }
7279 break;
21a93e13
AD
7280 case 224: /* SDMA trap event */
7281 /* XXX check the bitfield order! */
7282 me_id = (ring_id & 0x3) >> 0;
7283 queue_id = (ring_id & 0xc) >> 2;
7284 DRM_DEBUG("IH: SDMA trap\n");
7285 switch (me_id) {
7286 case 0:
7287 switch (queue_id) {
7288 case 0:
7289 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7290 break;
7291 case 1:
7292 /* XXX compute */
7293 break;
7294 case 2:
7295 /* XXX compute */
7296 break;
7297 }
7298 break;
7299 case 1:
7300 switch (queue_id) {
7301 case 0:
7302 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7303 break;
7304 case 1:
7305 /* XXX compute */
7306 break;
7307 case 2:
7308 /* XXX compute */
7309 break;
7310 }
7311 break;
7312 }
7313 break;
7314 case 241: /* SDMA Privileged inst */
7315 case 247: /* SDMA Privileged inst */
7316 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7317 /* XXX check the bitfield order! */
7318 me_id = (ring_id & 0x3) >> 0;
7319 queue_id = (ring_id & 0xc) >> 2;
7320 switch (me_id) {
7321 case 0:
7322 switch (queue_id) {
7323 case 0:
7324 queue_reset = true;
7325 break;
7326 case 1:
7327 /* XXX compute */
7328 queue_reset = true;
7329 break;
7330 case 2:
7331 /* XXX compute */
7332 queue_reset = true;
7333 break;
7334 }
7335 break;
7336 case 1:
7337 switch (queue_id) {
7338 case 0:
7339 queue_reset = true;
7340 break;
7341 case 1:
7342 /* XXX compute */
7343 queue_reset = true;
7344 break;
7345 case 2:
7346 /* XXX compute */
7347 queue_reset = true;
7348 break;
7349 }
7350 break;
7351 }
7352 break;
a59781bb
AD
7353 case 233: /* GUI IDLE */
7354 DRM_DEBUG("IH: GUI idle\n");
7355 break;
7356 default:
7357 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7358 break;
7359 }
7360
7361 /* wptr/rptr are in bytes! */
7362 rptr += 16;
7363 rptr &= rdev->ih.ptr_mask;
7364 }
7365 if (queue_hotplug)
7366 schedule_work(&rdev->hotplug_work);
7367 if (queue_reset)
7368 schedule_work(&rdev->reset_work);
7369 rdev->ih.rptr = rptr;
7370 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7371 atomic_set(&rdev->ih.lock, 0);
7372
7373 /* make sure wptr hasn't changed while processing */
7374 wptr = cik_get_ih_wptr(rdev);
7375 if (wptr != rptr)
7376 goto restart_ih;
7377
7378 return IRQ_HANDLED;
7379}
7bf94a2c
AD
7380
7381/*
7382 * startup/shutdown callbacks
7383 */
7384/**
7385 * cik_startup - program the asic to a functional state
7386 *
7387 * @rdev: radeon_device pointer
7388 *
7389 * Programs the asic to a functional state (CIK).
7390 * Called by cik_init() and cik_resume().
7391 * Returns 0 for success, error for failure.
7392 */
7393static int cik_startup(struct radeon_device *rdev)
7394{
7395 struct radeon_ring *ring;
7396 int r;
7397
8a7cd276
AD
7398 /* enable pcie gen2/3 link */
7399 cik_pcie_gen3_enable(rdev);
7235711a
AD
7400 /* enable aspm */
7401 cik_program_aspm(rdev);
8a7cd276 7402
6fab3feb
AD
7403 cik_mc_program(rdev);
7404
7bf94a2c
AD
7405 if (rdev->flags & RADEON_IS_IGP) {
7406 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7407 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7408 r = cik_init_microcode(rdev);
7409 if (r) {
7410 DRM_ERROR("Failed to load firmware!\n");
7411 return r;
7412 }
7413 }
7414 } else {
7415 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7416 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7417 !rdev->mc_fw) {
7418 r = cik_init_microcode(rdev);
7419 if (r) {
7420 DRM_ERROR("Failed to load firmware!\n");
7421 return r;
7422 }
7423 }
7424
7425 r = ci_mc_load_microcode(rdev);
7426 if (r) {
7427 DRM_ERROR("Failed to load MC firmware!\n");
7428 return r;
7429 }
7430 }
7431
7432 r = r600_vram_scratch_init(rdev);
7433 if (r)
7434 return r;
7435
7bf94a2c
AD
7436 r = cik_pcie_gart_enable(rdev);
7437 if (r)
7438 return r;
7439 cik_gpu_init(rdev);
7440
7441 /* allocate rlc buffers */
22c775ce
AD
7442 if (rdev->flags & RADEON_IS_IGP) {
7443 if (rdev->family == CHIP_KAVERI) {
7444 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7445 rdev->rlc.reg_list_size =
7446 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7447 } else {
7448 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7449 rdev->rlc.reg_list_size =
7450 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7451 }
7452 }
7453 rdev->rlc.cs_data = ci_cs_data;
7454 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
1fd11777 7455 r = sumo_rlc_init(rdev);
7bf94a2c
AD
7456 if (r) {
7457 DRM_ERROR("Failed to init rlc BOs!\n");
7458 return r;
7459 }
7460
7461 /* allocate wb buffer */
7462 r = radeon_wb_init(rdev);
7463 if (r)
7464 return r;
7465
963e81f9
AD
7466 /* allocate mec buffers */
7467 r = cik_mec_init(rdev);
7468 if (r) {
7469 DRM_ERROR("Failed to init MEC BOs!\n");
7470 return r;
7471 }
7472
7bf94a2c
AD
7473 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7474 if (r) {
7475 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7476 return r;
7477 }
7478
963e81f9
AD
7479 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7480 if (r) {
7481 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7482 return r;
7483 }
7484
7485 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7486 if (r) {
7487 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7488 return r;
7489 }
7490
7bf94a2c
AD
7491 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7492 if (r) {
7493 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7494 return r;
7495 }
7496
7497 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7498 if (r) {
7499 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7500 return r;
7501 }
7502
87167bb1
CK
7503 r = cik_uvd_resume(rdev);
7504 if (!r) {
7505 r = radeon_fence_driver_start_ring(rdev,
7506 R600_RING_TYPE_UVD_INDEX);
7507 if (r)
7508 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7509 }
7510 if (r)
7511 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7512
7bf94a2c
AD
7513 /* Enable IRQ */
7514 if (!rdev->irq.installed) {
7515 r = radeon_irq_kms_init(rdev);
7516 if (r)
7517 return r;
7518 }
7519
7520 r = cik_irq_init(rdev);
7521 if (r) {
7522 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7523 radeon_irq_kms_fini(rdev);
7524 return r;
7525 }
7526 cik_irq_set(rdev);
7527
7528 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7529 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7530 CP_RB0_RPTR, CP_RB0_WPTR,
7531 0, 0xfffff, RADEON_CP_PACKET2);
7532 if (r)
7533 return r;
7534
963e81f9 7535 /* set up the compute queues */
2615b53a 7536 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7537 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7538 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7539 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 7540 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7541 if (r)
7542 return r;
7543 ring->me = 1; /* first MEC */
7544 ring->pipe = 0; /* first pipe */
7545 ring->queue = 0; /* first queue */
7546 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7547
2615b53a 7548 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
7549 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7550 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7551 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 7552 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
7553 if (r)
7554 return r;
7555 /* dGPU only have 1 MEC */
7556 ring->me = 1; /* first MEC */
7557 ring->pipe = 0; /* first pipe */
7558 ring->queue = 1; /* second queue */
7559 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7560
7bf94a2c
AD
7561 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7562 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7563 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7564 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7565 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7566 if (r)
7567 return r;
7568
7569 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7570 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7571 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7572 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7573 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7574 if (r)
7575 return r;
7576
7577 r = cik_cp_resume(rdev);
7578 if (r)
7579 return r;
7580
7581 r = cik_sdma_resume(rdev);
7582 if (r)
7583 return r;
7584
87167bb1
CK
7585 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7586 if (ring->ring_size) {
7587 r = radeon_ring_init(rdev, ring, ring->ring_size,
7588 R600_WB_UVD_RPTR_OFFSET,
7589 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7590 0, 0xfffff, RADEON_CP_PACKET2);
7591 if (!r)
7592 r = r600_uvd_init(rdev);
7593 if (r)
7594 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7595 }
7596
7bf94a2c
AD
7597 r = radeon_ib_pool_init(rdev);
7598 if (r) {
7599 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7600 return r;
7601 }
7602
7603 r = radeon_vm_manager_init(rdev);
7604 if (r) {
7605 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7606 return r;
7607 }
7608
7609 return 0;
7610}
7611
7612/**
7613 * cik_resume - resume the asic to a functional state
7614 *
7615 * @rdev: radeon_device pointer
7616 *
7617 * Programs the asic to a functional state (CIK).
7618 * Called at resume.
7619 * Returns 0 for success, error for failure.
7620 */
7621int cik_resume(struct radeon_device *rdev)
7622{
7623 int r;
7624
7625 /* post card */
7626 atom_asic_init(rdev->mode_info.atom_context);
7627
0aafd313
AD
7628 /* init golden registers */
7629 cik_init_golden_registers(rdev);
7630
7bf94a2c
AD
7631 rdev->accel_working = true;
7632 r = cik_startup(rdev);
7633 if (r) {
7634 DRM_ERROR("cik startup failed on resume\n");
7635 rdev->accel_working = false;
7636 return r;
7637 }
7638
7639 return r;
7640
7641}
7642
7643/**
7644 * cik_suspend - suspend the asic
7645 *
7646 * @rdev: radeon_device pointer
7647 *
7648 * Bring the chip into a state suitable for suspend (CIK).
7649 * Called at suspend.
7650 * Returns 0 for success.
7651 */
7652int cik_suspend(struct radeon_device *rdev)
7653{
7654 radeon_vm_manager_fini(rdev);
7655 cik_cp_enable(rdev, false);
7656 cik_sdma_enable(rdev, false);
2858c00d 7657 r600_uvd_stop(rdev);
87167bb1 7658 radeon_uvd_suspend(rdev);
7bf94a2c
AD
7659 cik_irq_suspend(rdev);
7660 radeon_wb_disable(rdev);
7661 cik_pcie_gart_disable(rdev);
7662 return 0;
7663}
7664
7665/* Plan is to move initialization in that function and use
7666 * helper function so that radeon_device_init pretty much
7667 * do nothing more than calling asic specific function. This
7668 * should also allow to remove a bunch of callback function
7669 * like vram_info.
7670 */
7671/**
7672 * cik_init - asic specific driver and hw init
7673 *
7674 * @rdev: radeon_device pointer
7675 *
7676 * Setup asic specific driver variables and program the hw
7677 * to a functional state (CIK).
7678 * Called at driver startup.
7679 * Returns 0 for success, errors for failure.
7680 */
7681int cik_init(struct radeon_device *rdev)
7682{
7683 struct radeon_ring *ring;
7684 int r;
7685
7686 /* Read BIOS */
7687 if (!radeon_get_bios(rdev)) {
7688 if (ASIC_IS_AVIVO(rdev))
7689 return -EINVAL;
7690 }
7691 /* Must be an ATOMBIOS */
7692 if (!rdev->is_atom_bios) {
7693 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7694 return -EINVAL;
7695 }
7696 r = radeon_atombios_init(rdev);
7697 if (r)
7698 return r;
7699
7700 /* Post card if necessary */
7701 if (!radeon_card_posted(rdev)) {
7702 if (!rdev->bios) {
7703 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7704 return -EINVAL;
7705 }
7706 DRM_INFO("GPU not posted. posting now...\n");
7707 atom_asic_init(rdev->mode_info.atom_context);
7708 }
0aafd313
AD
7709 /* init golden registers */
7710 cik_init_golden_registers(rdev);
7bf94a2c
AD
7711 /* Initialize scratch registers */
7712 cik_scratch_init(rdev);
7713 /* Initialize surface registers */
7714 radeon_surface_init(rdev);
7715 /* Initialize clocks */
7716 radeon_get_clock_info(rdev->ddev);
7717
7718 /* Fence driver */
7719 r = radeon_fence_driver_init(rdev);
7720 if (r)
7721 return r;
7722
7723 /* initialize memory controller */
7724 r = cik_mc_init(rdev);
7725 if (r)
7726 return r;
7727 /* Memory manager */
7728 r = radeon_bo_init(rdev);
7729 if (r)
7730 return r;
7731
7732 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7733 ring->ring_obj = NULL;
7734 r600_ring_init(rdev, ring, 1024 * 1024);
7735
963e81f9
AD
7736 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7737 ring->ring_obj = NULL;
7738 r600_ring_init(rdev, ring, 1024 * 1024);
7739 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7740 if (r)
7741 return r;
7742
7743 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7744 ring->ring_obj = NULL;
7745 r600_ring_init(rdev, ring, 1024 * 1024);
7746 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7747 if (r)
7748 return r;
7749
7bf94a2c
AD
7750 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7751 ring->ring_obj = NULL;
7752 r600_ring_init(rdev, ring, 256 * 1024);
7753
7754 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7755 ring->ring_obj = NULL;
7756 r600_ring_init(rdev, ring, 256 * 1024);
7757
87167bb1
CK
7758 r = radeon_uvd_init(rdev);
7759 if (!r) {
7760 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7761 ring->ring_obj = NULL;
7762 r600_ring_init(rdev, ring, 4096);
7763 }
7764
7bf94a2c
AD
7765 rdev->ih.ring_obj = NULL;
7766 r600_ih_ring_init(rdev, 64 * 1024);
7767
7768 r = r600_pcie_gart_init(rdev);
7769 if (r)
7770 return r;
7771
7772 rdev->accel_working = true;
7773 r = cik_startup(rdev);
7774 if (r) {
7775 dev_err(rdev->dev, "disabling GPU acceleration\n");
7776 cik_cp_fini(rdev);
7777 cik_sdma_fini(rdev);
7778 cik_irq_fini(rdev);
1fd11777 7779 sumo_rlc_fini(rdev);
963e81f9 7780 cik_mec_fini(rdev);
7bf94a2c
AD
7781 radeon_wb_fini(rdev);
7782 radeon_ib_pool_fini(rdev);
7783 radeon_vm_manager_fini(rdev);
7784 radeon_irq_kms_fini(rdev);
7785 cik_pcie_gart_fini(rdev);
7786 rdev->accel_working = false;
7787 }
7788
7789 /* Don't start up if the MC ucode is missing.
7790 * The default clocks and voltages before the MC ucode
7791 * is loaded are not suffient for advanced operations.
7792 */
7793 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7794 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7795 return -EINVAL;
7796 }
7797
7798 return 0;
7799}
7800
7801/**
7802 * cik_fini - asic specific driver and hw fini
7803 *
7804 * @rdev: radeon_device pointer
7805 *
7806 * Tear down the asic specific driver variables and program the hw
7807 * to an idle state (CIK).
7808 * Called at driver unload.
7809 */
7810void cik_fini(struct radeon_device *rdev)
7811{
7812 cik_cp_fini(rdev);
7813 cik_sdma_fini(rdev);
7814 cik_irq_fini(rdev);
1fd11777 7815 sumo_rlc_fini(rdev);
963e81f9 7816 cik_mec_fini(rdev);
7bf94a2c
AD
7817 radeon_wb_fini(rdev);
7818 radeon_vm_manager_fini(rdev);
7819 radeon_ib_pool_fini(rdev);
7820 radeon_irq_kms_fini(rdev);
2858c00d 7821 r600_uvd_stop(rdev);
87167bb1 7822 radeon_uvd_fini(rdev);
7bf94a2c
AD
7823 cik_pcie_gart_fini(rdev);
7824 r600_vram_scratch_fini(rdev);
7825 radeon_gem_fini(rdev);
7826 radeon_fence_driver_fini(rdev);
7827 radeon_bo_fini(rdev);
7828 radeon_atombios_fini(rdev);
7829 kfree(rdev->bios);
7830 rdev->bios = NULL;
7831}
cd84a27d
AD
7832
7833/* display watermark setup */
7834/**
7835 * dce8_line_buffer_adjust - Set up the line buffer
7836 *
7837 * @rdev: radeon_device pointer
7838 * @radeon_crtc: the selected display controller
7839 * @mode: the current display mode on the selected display
7840 * controller
7841 *
7842 * Setup up the line buffer allocation for
7843 * the selected display controller (CIK).
7844 * Returns the line buffer size in pixels.
7845 */
7846static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7847 struct radeon_crtc *radeon_crtc,
7848 struct drm_display_mode *mode)
7849{
7850 u32 tmp;
7851
7852 /*
7853 * Line Buffer Setup
7854 * There are 6 line buffers, one for each display controllers.
7855 * There are 3 partitions per LB. Select the number of partitions
7856 * to enable based on the display width. For display widths larger
7857 * than 4096, you need use to use 2 display controllers and combine
7858 * them using the stereo blender.
7859 */
7860 if (radeon_crtc->base.enabled && mode) {
7861 if (mode->crtc_hdisplay < 1920)
7862 tmp = 1;
7863 else if (mode->crtc_hdisplay < 2560)
7864 tmp = 2;
7865 else if (mode->crtc_hdisplay < 4096)
7866 tmp = 0;
7867 else {
7868 DRM_DEBUG_KMS("Mode too big for LB!\n");
7869 tmp = 0;
7870 }
7871 } else
7872 tmp = 1;
7873
7874 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7875 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7876
7877 if (radeon_crtc->base.enabled && mode) {
7878 switch (tmp) {
7879 case 0:
7880 default:
7881 return 4096 * 2;
7882 case 1:
7883 return 1920 * 2;
7884 case 2:
7885 return 2560 * 2;
7886 }
7887 }
7888
7889 /* controller not enabled, so no lb used */
7890 return 0;
7891}
7892
7893/**
7894 * cik_get_number_of_dram_channels - get the number of dram channels
7895 *
7896 * @rdev: radeon_device pointer
7897 *
7898 * Look up the number of video ram channels (CIK).
7899 * Used for display watermark bandwidth calculations
7900 * Returns the number of dram channels
7901 */
7902static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7903{
7904 u32 tmp = RREG32(MC_SHARED_CHMAP);
7905
7906 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7907 case 0:
7908 default:
7909 return 1;
7910 case 1:
7911 return 2;
7912 case 2:
7913 return 4;
7914 case 3:
7915 return 8;
7916 case 4:
7917 return 3;
7918 case 5:
7919 return 6;
7920 case 6:
7921 return 10;
7922 case 7:
7923 return 12;
7924 case 8:
7925 return 16;
7926 }
7927}
7928
7929struct dce8_wm_params {
7930 u32 dram_channels; /* number of dram channels */
7931 u32 yclk; /* bandwidth per dram data pin in kHz */
7932 u32 sclk; /* engine clock in kHz */
7933 u32 disp_clk; /* display clock in kHz */
7934 u32 src_width; /* viewport width */
7935 u32 active_time; /* active display time in ns */
7936 u32 blank_time; /* blank time in ns */
7937 bool interlaced; /* mode is interlaced */
7938 fixed20_12 vsc; /* vertical scale ratio */
7939 u32 num_heads; /* number of active crtcs */
7940 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7941 u32 lb_size; /* line buffer allocated to pipe */
7942 u32 vtaps; /* vertical scaler taps */
7943};
7944
7945/**
7946 * dce8_dram_bandwidth - get the dram bandwidth
7947 *
7948 * @wm: watermark calculation data
7949 *
7950 * Calculate the raw dram bandwidth (CIK).
7951 * Used for display watermark bandwidth calculations
7952 * Returns the dram bandwidth in MBytes/s
7953 */
7954static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7955{
7956 /* Calculate raw DRAM Bandwidth */
7957 fixed20_12 dram_efficiency; /* 0.7 */
7958 fixed20_12 yclk, dram_channels, bandwidth;
7959 fixed20_12 a;
7960
7961 a.full = dfixed_const(1000);
7962 yclk.full = dfixed_const(wm->yclk);
7963 yclk.full = dfixed_div(yclk, a);
7964 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7965 a.full = dfixed_const(10);
7966 dram_efficiency.full = dfixed_const(7);
7967 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7968 bandwidth.full = dfixed_mul(dram_channels, yclk);
7969 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7970
7971 return dfixed_trunc(bandwidth);
7972}
7973
7974/**
7975 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7976 *
7977 * @wm: watermark calculation data
7978 *
7979 * Calculate the dram bandwidth used for display (CIK).
7980 * Used for display watermark bandwidth calculations
7981 * Returns the dram bandwidth for display in MBytes/s
7982 */
7983static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7984{
7985 /* Calculate DRAM Bandwidth and the part allocated to display. */
7986 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7987 fixed20_12 yclk, dram_channels, bandwidth;
7988 fixed20_12 a;
7989
7990 a.full = dfixed_const(1000);
7991 yclk.full = dfixed_const(wm->yclk);
7992 yclk.full = dfixed_div(yclk, a);
7993 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7994 a.full = dfixed_const(10);
7995 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7996 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7997 bandwidth.full = dfixed_mul(dram_channels, yclk);
7998 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7999
8000 return dfixed_trunc(bandwidth);
8001}
8002
8003/**
8004 * dce8_data_return_bandwidth - get the data return bandwidth
8005 *
8006 * @wm: watermark calculation data
8007 *
8008 * Calculate the data return bandwidth used for display (CIK).
8009 * Used for display watermark bandwidth calculations
8010 * Returns the data return bandwidth in MBytes/s
8011 */
8012static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8013{
8014 /* Calculate the display Data return Bandwidth */
8015 fixed20_12 return_efficiency; /* 0.8 */
8016 fixed20_12 sclk, bandwidth;
8017 fixed20_12 a;
8018
8019 a.full = dfixed_const(1000);
8020 sclk.full = dfixed_const(wm->sclk);
8021 sclk.full = dfixed_div(sclk, a);
8022 a.full = dfixed_const(10);
8023 return_efficiency.full = dfixed_const(8);
8024 return_efficiency.full = dfixed_div(return_efficiency, a);
8025 a.full = dfixed_const(32);
8026 bandwidth.full = dfixed_mul(a, sclk);
8027 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8028
8029 return dfixed_trunc(bandwidth);
8030}
8031
8032/**
8033 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8034 *
8035 * @wm: watermark calculation data
8036 *
8037 * Calculate the dmif bandwidth used for display (CIK).
8038 * Used for display watermark bandwidth calculations
8039 * Returns the dmif bandwidth in MBytes/s
8040 */
8041static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8042{
8043 /* Calculate the DMIF Request Bandwidth */
8044 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8045 fixed20_12 disp_clk, bandwidth;
8046 fixed20_12 a, b;
8047
8048 a.full = dfixed_const(1000);
8049 disp_clk.full = dfixed_const(wm->disp_clk);
8050 disp_clk.full = dfixed_div(disp_clk, a);
8051 a.full = dfixed_const(32);
8052 b.full = dfixed_mul(a, disp_clk);
8053
8054 a.full = dfixed_const(10);
8055 disp_clk_request_efficiency.full = dfixed_const(8);
8056 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8057
8058 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8059
8060 return dfixed_trunc(bandwidth);
8061}
8062
8063/**
8064 * dce8_available_bandwidth - get the min available bandwidth
8065 *
8066 * @wm: watermark calculation data
8067 *
8068 * Calculate the min available bandwidth used for display (CIK).
8069 * Used for display watermark bandwidth calculations
8070 * Returns the min available bandwidth in MBytes/s
8071 */
8072static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8073{
8074 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8075 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8076 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8077 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8078
8079 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8080}
8081
8082/**
8083 * dce8_average_bandwidth - get the average available bandwidth
8084 *
8085 * @wm: watermark calculation data
8086 *
8087 * Calculate the average available bandwidth used for display (CIK).
8088 * Used for display watermark bandwidth calculations
8089 * Returns the average available bandwidth in MBytes/s
8090 */
8091static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8092{
8093 /* Calculate the display mode Average Bandwidth
8094 * DisplayMode should contain the source and destination dimensions,
8095 * timing, etc.
8096 */
8097 fixed20_12 bpp;
8098 fixed20_12 line_time;
8099 fixed20_12 src_width;
8100 fixed20_12 bandwidth;
8101 fixed20_12 a;
8102
8103 a.full = dfixed_const(1000);
8104 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8105 line_time.full = dfixed_div(line_time, a);
8106 bpp.full = dfixed_const(wm->bytes_per_pixel);
8107 src_width.full = dfixed_const(wm->src_width);
8108 bandwidth.full = dfixed_mul(src_width, bpp);
8109 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8110 bandwidth.full = dfixed_div(bandwidth, line_time);
8111
8112 return dfixed_trunc(bandwidth);
8113}
8114
8115/**
8116 * dce8_latency_watermark - get the latency watermark
8117 *
8118 * @wm: watermark calculation data
8119 *
8120 * Calculate the latency watermark (CIK).
8121 * Used for display watermark bandwidth calculations
8122 * Returns the latency watermark in ns
8123 */
8124static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8125{
8126 /* First calculate the latency in ns */
8127 u32 mc_latency = 2000; /* 2000 ns. */
8128 u32 available_bandwidth = dce8_available_bandwidth(wm);
8129 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8130 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8131 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8132 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8133 (wm->num_heads * cursor_line_pair_return_time);
8134 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8135 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8136 u32 tmp, dmif_size = 12288;
8137 fixed20_12 a, b, c;
8138
8139 if (wm->num_heads == 0)
8140 return 0;
8141
8142 a.full = dfixed_const(2);
8143 b.full = dfixed_const(1);
8144 if ((wm->vsc.full > a.full) ||
8145 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8146 (wm->vtaps >= 5) ||
8147 ((wm->vsc.full >= a.full) && wm->interlaced))
8148 max_src_lines_per_dst_line = 4;
8149 else
8150 max_src_lines_per_dst_line = 2;
8151
8152 a.full = dfixed_const(available_bandwidth);
8153 b.full = dfixed_const(wm->num_heads);
8154 a.full = dfixed_div(a, b);
8155
8156 b.full = dfixed_const(mc_latency + 512);
8157 c.full = dfixed_const(wm->disp_clk);
8158 b.full = dfixed_div(b, c);
8159
8160 c.full = dfixed_const(dmif_size);
8161 b.full = dfixed_div(c, b);
8162
8163 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8164
8165 b.full = dfixed_const(1000);
8166 c.full = dfixed_const(wm->disp_clk);
8167 b.full = dfixed_div(c, b);
8168 c.full = dfixed_const(wm->bytes_per_pixel);
8169 b.full = dfixed_mul(b, c);
8170
8171 lb_fill_bw = min(tmp, dfixed_trunc(b));
8172
8173 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8174 b.full = dfixed_const(1000);
8175 c.full = dfixed_const(lb_fill_bw);
8176 b.full = dfixed_div(c, b);
8177 a.full = dfixed_div(a, b);
8178 line_fill_time = dfixed_trunc(a);
8179
8180 if (line_fill_time < wm->active_time)
8181 return latency;
8182 else
8183 return latency + (line_fill_time - wm->active_time);
8184
8185}
8186
8187/**
8188 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8189 * average and available dram bandwidth
8190 *
8191 * @wm: watermark calculation data
8192 *
8193 * Check if the display average bandwidth fits in the display
8194 * dram bandwidth (CIK).
8195 * Used for display watermark bandwidth calculations
8196 * Returns true if the display fits, false if not.
8197 */
8198static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8199{
8200 if (dce8_average_bandwidth(wm) <=
8201 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8202 return true;
8203 else
8204 return false;
8205}
8206
8207/**
8208 * dce8_average_bandwidth_vs_available_bandwidth - check
8209 * average and available bandwidth
8210 *
8211 * @wm: watermark calculation data
8212 *
8213 * Check if the display average bandwidth fits in the display
8214 * available bandwidth (CIK).
8215 * Used for display watermark bandwidth calculations
8216 * Returns true if the display fits, false if not.
8217 */
8218static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8219{
8220 if (dce8_average_bandwidth(wm) <=
8221 (dce8_available_bandwidth(wm) / wm->num_heads))
8222 return true;
8223 else
8224 return false;
8225}
8226
8227/**
8228 * dce8_check_latency_hiding - check latency hiding
8229 *
8230 * @wm: watermark calculation data
8231 *
8232 * Check latency hiding (CIK).
8233 * Used for display watermark bandwidth calculations
8234 * Returns true if the display fits, false if not.
8235 */
8236static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8237{
8238 u32 lb_partitions = wm->lb_size / wm->src_width;
8239 u32 line_time = wm->active_time + wm->blank_time;
8240 u32 latency_tolerant_lines;
8241 u32 latency_hiding;
8242 fixed20_12 a;
8243
8244 a.full = dfixed_const(1);
8245 if (wm->vsc.full > a.full)
8246 latency_tolerant_lines = 1;
8247 else {
8248 if (lb_partitions <= (wm->vtaps + 1))
8249 latency_tolerant_lines = 1;
8250 else
8251 latency_tolerant_lines = 2;
8252 }
8253
8254 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8255
8256 if (dce8_latency_watermark(wm) <= latency_hiding)
8257 return true;
8258 else
8259 return false;
8260}
8261
8262/**
8263 * dce8_program_watermarks - program display watermarks
8264 *
8265 * @rdev: radeon_device pointer
8266 * @radeon_crtc: the selected display controller
8267 * @lb_size: line buffer size
8268 * @num_heads: number of display controllers in use
8269 *
8270 * Calculate and program the display watermarks for the
8271 * selected display controller (CIK).
8272 */
8273static void dce8_program_watermarks(struct radeon_device *rdev,
8274 struct radeon_crtc *radeon_crtc,
8275 u32 lb_size, u32 num_heads)
8276{
8277 struct drm_display_mode *mode = &radeon_crtc->base.mode;
58ea2dea 8278 struct dce8_wm_params wm_low, wm_high;
cd84a27d
AD
8279 u32 pixel_period;
8280 u32 line_time = 0;
8281 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8282 u32 tmp, wm_mask;
8283
8284 if (radeon_crtc->base.enabled && num_heads && mode) {
8285 pixel_period = 1000000 / (u32)mode->clock;
8286 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8287
58ea2dea
AD
8288 /* watermark for high clocks */
8289 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8290 rdev->pm.dpm_enabled) {
8291 wm_high.yclk =
8292 radeon_dpm_get_mclk(rdev, false) * 10;
8293 wm_high.sclk =
8294 radeon_dpm_get_sclk(rdev, false) * 10;
8295 } else {
8296 wm_high.yclk = rdev->pm.current_mclk * 10;
8297 wm_high.sclk = rdev->pm.current_sclk * 10;
8298 }
8299
8300 wm_high.disp_clk = mode->clock;
8301 wm_high.src_width = mode->crtc_hdisplay;
8302 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8303 wm_high.blank_time = line_time - wm_high.active_time;
8304 wm_high.interlaced = false;
cd84a27d 8305 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
58ea2dea
AD
8306 wm_high.interlaced = true;
8307 wm_high.vsc = radeon_crtc->vsc;
8308 wm_high.vtaps = 1;
cd84a27d 8309 if (radeon_crtc->rmx_type != RMX_OFF)
58ea2dea
AD
8310 wm_high.vtaps = 2;
8311 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8312 wm_high.lb_size = lb_size;
8313 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8314 wm_high.num_heads = num_heads;
cd84a27d
AD
8315
8316 /* set for high clocks */
58ea2dea
AD
8317 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8318
8319 /* possibly force display priority to high */
8320 /* should really do this at mode validation time... */
8321 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8322 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8323 !dce8_check_latency_hiding(&wm_high) ||
8324 (rdev->disp_priority == 2)) {
8325 DRM_DEBUG_KMS("force priority to high\n");
8326 }
8327
8328 /* watermark for low clocks */
8329 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8330 rdev->pm.dpm_enabled) {
8331 wm_low.yclk =
8332 radeon_dpm_get_mclk(rdev, true) * 10;
8333 wm_low.sclk =
8334 radeon_dpm_get_sclk(rdev, true) * 10;
8335 } else {
8336 wm_low.yclk = rdev->pm.current_mclk * 10;
8337 wm_low.sclk = rdev->pm.current_sclk * 10;
8338 }
8339
8340 wm_low.disp_clk = mode->clock;
8341 wm_low.src_width = mode->crtc_hdisplay;
8342 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8343 wm_low.blank_time = line_time - wm_low.active_time;
8344 wm_low.interlaced = false;
8345 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8346 wm_low.interlaced = true;
8347 wm_low.vsc = radeon_crtc->vsc;
8348 wm_low.vtaps = 1;
8349 if (radeon_crtc->rmx_type != RMX_OFF)
8350 wm_low.vtaps = 2;
8351 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8352 wm_low.lb_size = lb_size;
8353 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8354 wm_low.num_heads = num_heads;
8355
cd84a27d 8356 /* set for low clocks */
58ea2dea 8357 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
cd84a27d
AD
8358
8359 /* possibly force display priority to high */
8360 /* should really do this at mode validation time... */
58ea2dea
AD
8361 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8362 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8363 !dce8_check_latency_hiding(&wm_low) ||
cd84a27d
AD
8364 (rdev->disp_priority == 2)) {
8365 DRM_DEBUG_KMS("force priority to high\n");
8366 }
8367 }
8368
8369 /* select wm A */
8370 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8371 tmp = wm_mask;
8372 tmp &= ~LATENCY_WATERMARK_MASK(3);
8373 tmp |= LATENCY_WATERMARK_MASK(1);
8374 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8375 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8376 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8377 LATENCY_HIGH_WATERMARK(line_time)));
8378 /* select wm B */
8379 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8380 tmp &= ~LATENCY_WATERMARK_MASK(3);
8381 tmp |= LATENCY_WATERMARK_MASK(2);
8382 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8383 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8384 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8385 LATENCY_HIGH_WATERMARK(line_time)));
8386 /* restore original selection */
8387 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
58ea2dea
AD
8388
8389 /* save values for DPM */
8390 radeon_crtc->line_time = line_time;
8391 radeon_crtc->wm_high = latency_watermark_a;
8392 radeon_crtc->wm_low = latency_watermark_b;
cd84a27d
AD
8393}
8394
8395/**
8396 * dce8_bandwidth_update - program display watermarks
8397 *
8398 * @rdev: radeon_device pointer
8399 *
8400 * Calculate and program the display watermarks and line
8401 * buffer allocation (CIK).
8402 */
8403void dce8_bandwidth_update(struct radeon_device *rdev)
8404{
8405 struct drm_display_mode *mode = NULL;
8406 u32 num_heads = 0, lb_size;
8407 int i;
8408
8409 radeon_update_display_priority(rdev);
8410
8411 for (i = 0; i < rdev->num_crtc; i++) {
8412 if (rdev->mode_info.crtcs[i]->base.enabled)
8413 num_heads++;
8414 }
8415 for (i = 0; i < rdev->num_crtc; i++) {
8416 mode = &rdev->mode_info.crtcs[i]->base.mode;
8417 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8418 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8419 }
8420}
44fa346f
AD
8421
8422/**
8423 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8424 *
8425 * @rdev: radeon_device pointer
8426 *
8427 * Fetches a GPU clock counter snapshot (SI).
8428 * Returns the 64 bit clock counter snapshot.
8429 */
8430uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8431{
8432 uint64_t clock;
8433
8434 mutex_lock(&rdev->gpu_clock_mutex);
8435 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8436 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8437 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8438 mutex_unlock(&rdev->gpu_clock_mutex);
8439 return clock;
8440}
8441
87167bb1
CK
8442static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8443 u32 cntl_reg, u32 status_reg)
8444{
8445 int r, i;
8446 struct atom_clock_dividers dividers;
8447 uint32_t tmp;
8448
8449 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8450 clock, false, &dividers);
8451 if (r)
8452 return r;
8453
8454 tmp = RREG32_SMC(cntl_reg);
8455 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8456 tmp |= dividers.post_divider;
8457 WREG32_SMC(cntl_reg, tmp);
8458
8459 for (i = 0; i < 100; i++) {
8460 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8461 break;
8462 mdelay(10);
8463 }
8464 if (i == 100)
8465 return -ETIMEDOUT;
8466
8467 return 0;
8468}
8469
8470int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8471{
8472 int r = 0;
8473
8474 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8475 if (r)
8476 return r;
8477
8478 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8479 return r;
8480}
8481
8482int cik_uvd_resume(struct radeon_device *rdev)
8483{
8484 uint64_t addr;
8485 uint32_t size;
8486 int r;
8487
8488 r = radeon_uvd_resume(rdev);
8489 if (r)
8490 return r;
8491
8492 /* programm the VCPU memory controller bits 0-27 */
8493 addr = rdev->uvd.gpu_addr >> 3;
4ad9c1c7 8494 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
87167bb1
CK
8495 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8496 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8497
8498 addr += size;
8499 size = RADEON_UVD_STACK_SIZE >> 3;
8500 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8501 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8502
8503 addr += size;
8504 size = RADEON_UVD_HEAP_SIZE >> 3;
8505 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8506 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8507
8508 /* bits 28-31 */
8509 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8510 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8511
8512 /* bits 32-39 */
8513 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8514 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8515
8516 return 0;
8517}
8a7cd276
AD
8518
8519static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8520{
8521 struct pci_dev *root = rdev->pdev->bus->self;
8522 int bridge_pos, gpu_pos;
8523 u32 speed_cntl, mask, current_data_rate;
8524 int ret, i;
8525 u16 tmp16;
8526
8527 if (radeon_pcie_gen2 == 0)
8528 return;
8529
8530 if (rdev->flags & RADEON_IS_IGP)
8531 return;
8532
8533 if (!(rdev->flags & RADEON_IS_PCIE))
8534 return;
8535
8536 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8537 if (ret != 0)
8538 return;
8539
8540 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8541 return;
8542
8543 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8544 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8545 LC_CURRENT_DATA_RATE_SHIFT;
8546 if (mask & DRM_PCIE_SPEED_80) {
8547 if (current_data_rate == 2) {
8548 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8549 return;
8550 }
8551 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8552 } else if (mask & DRM_PCIE_SPEED_50) {
8553 if (current_data_rate == 1) {
8554 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8555 return;
8556 }
8557 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8558 }
8559
8560 bridge_pos = pci_pcie_cap(root);
8561 if (!bridge_pos)
8562 return;
8563
8564 gpu_pos = pci_pcie_cap(rdev->pdev);
8565 if (!gpu_pos)
8566 return;
8567
8568 if (mask & DRM_PCIE_SPEED_80) {
8569 /* re-try equalization if gen3 is not already enabled */
8570 if (current_data_rate != 2) {
8571 u16 bridge_cfg, gpu_cfg;
8572 u16 bridge_cfg2, gpu_cfg2;
8573 u32 max_lw, current_lw, tmp;
8574
8575 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8576 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8577
8578 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8579 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8580
8581 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8582 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8583
8584 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8585 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8586 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8587
8588 if (current_lw < max_lw) {
8589 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8590 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8591 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8592 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8593 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8594 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8595 }
8596 }
8597
8598 for (i = 0; i < 10; i++) {
8599 /* check status */
8600 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8601 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8602 break;
8603
8604 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8605 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8606
8607 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8608 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8609
8610 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8611 tmp |= LC_SET_QUIESCE;
8612 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8613
8614 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8615 tmp |= LC_REDO_EQ;
8616 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8617
8618 mdelay(100);
8619
8620 /* linkctl */
8621 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8622 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8623 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8624 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8625
8626 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8627 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8628 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8629 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8630
8631 /* linkctl2 */
8632 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8633 tmp16 &= ~((1 << 4) | (7 << 9));
8634 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8635 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8636
8637 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8638 tmp16 &= ~((1 << 4) | (7 << 9));
8639 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8640 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8641
8642 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8643 tmp &= ~LC_SET_QUIESCE;
8644 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8645 }
8646 }
8647 }
8648
8649 /* set the link speed */
8650 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8651 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8652 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8653
8654 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8655 tmp16 &= ~0xf;
8656 if (mask & DRM_PCIE_SPEED_80)
8657 tmp16 |= 3; /* gen3 */
8658 else if (mask & DRM_PCIE_SPEED_50)
8659 tmp16 |= 2; /* gen2 */
8660 else
8661 tmp16 |= 1; /* gen1 */
8662 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8663
8664 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8665 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8666 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8667
8668 for (i = 0; i < rdev->usec_timeout; i++) {
8669 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8670 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8671 break;
8672 udelay(1);
8673 }
8674}
7235711a
AD
8675
8676static void cik_program_aspm(struct radeon_device *rdev)
8677{
8678 u32 data, orig;
8679 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8680 bool disable_clkreq = false;
8681
8682 if (radeon_aspm == 0)
8683 return;
8684
8685 /* XXX double check IGPs */
8686 if (rdev->flags & RADEON_IS_IGP)
8687 return;
8688
8689 if (!(rdev->flags & RADEON_IS_PCIE))
8690 return;
8691
8692 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8693 data &= ~LC_XMIT_N_FTS_MASK;
8694 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8695 if (orig != data)
8696 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8697
8698 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8699 data |= LC_GO_TO_RECOVERY;
8700 if (orig != data)
8701 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8702
8703 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8704 data |= P_IGNORE_EDB_ERR;
8705 if (orig != data)
8706 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8707
8708 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8709 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8710 data |= LC_PMI_TO_L1_DIS;
8711 if (!disable_l0s)
8712 data |= LC_L0S_INACTIVITY(7);
8713
8714 if (!disable_l1) {
8715 data |= LC_L1_INACTIVITY(7);
8716 data &= ~LC_PMI_TO_L1_DIS;
8717 if (orig != data)
8718 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8719
8720 if (!disable_plloff_in_l1) {
8721 bool clk_req_support;
8722
8723 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8724 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8725 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8726 if (orig != data)
8727 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8728
8729 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8730 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8731 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8732 if (orig != data)
8733 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8734
8735 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8736 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8737 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8738 if (orig != data)
8739 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8740
8741 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8742 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8743 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8744 if (orig != data)
8745 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8746
8747 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8748 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8749 data |= LC_DYN_LANES_PWR_STATE(3);
8750 if (orig != data)
8751 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8752
8753 if (!disable_clkreq) {
8754 struct pci_dev *root = rdev->pdev->bus->self;
8755 u32 lnkcap;
8756
8757 clk_req_support = false;
8758 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8759 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8760 clk_req_support = true;
8761 } else {
8762 clk_req_support = false;
8763 }
8764
8765 if (clk_req_support) {
8766 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8767 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8768 if (orig != data)
8769 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8770
8771 orig = data = RREG32_SMC(THM_CLK_CNTL);
8772 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8773 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8774 if (orig != data)
8775 WREG32_SMC(THM_CLK_CNTL, data);
8776
8777 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8778 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8779 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8780 if (orig != data)
8781 WREG32_SMC(MISC_CLK_CTRL, data);
8782
8783 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8784 data &= ~BCLK_AS_XCLK;
8785 if (orig != data)
8786 WREG32_SMC(CG_CLKPIN_CNTL, data);
8787
8788 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8789 data &= ~FORCE_BIF_REFCLK_EN;
8790 if (orig != data)
8791 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8792
8793 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8794 data &= ~MPLL_CLKOUT_SEL_MASK;
8795 data |= MPLL_CLKOUT_SEL(4);
8796 if (orig != data)
8797 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8798 }
8799 }
8800 } else {
8801 if (orig != data)
8802 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8803 }
8804
8805 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8806 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8807 if (orig != data)
8808 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8809
8810 if (!disable_l0s) {
8811 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8812 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8813 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8814 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8815 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8816 data &= ~LC_L0S_INACTIVITY_MASK;
8817 if (orig != data)
8818 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8819 }
8820 }
8821 }
8822}