]> git.ipfire.org Git - people/ms/linux.git/blame - drivers/gpu/drm/radeon/cik.c
drm/radeon/cik: Add support for compute queues (v4)
[people/ms/linux.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
6f2043ce 30#include "radeon_asic.h"
8cc1a532
AD
31#include "cikd.h"
32#include "atom.h"
841cf442 33#include "cik_blit_shaders.h"
8cc1a532 34
02c81327
AD
35/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
21a93e13
AD
47/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
02c81327
AD
50
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 57MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
58MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 63MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
64MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 69MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 70
a59781bb
AD
71extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
73extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 75extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1c49165d 76extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
7bf94a2c
AD
77extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
cc066715 79static void cik_rlc_stop(struct radeon_device *rdev);
6f2043ce 80
6e2c3c0a
AD
81/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
2c67912c
AD
102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
75efdee1
AD
124/**
125 * cik_mm_rdoorbell - read a doorbell dword
126 *
127 * @rdev: radeon_device pointer
128 * @offset: byte offset into the aperture
129 *
130 * Returns the value in the doorbell aperture at the
131 * requested offset (CIK).
132 */
133u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
134{
135 if (offset < rdev->doorbell.size) {
136 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
137 } else {
138 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
139 return 0;
140 }
141}
142
143/**
144 * cik_mm_wdoorbell - write a doorbell dword
145 *
146 * @rdev: radeon_device pointer
147 * @offset: byte offset into the aperture
148 * @v: value to write
149 *
150 * Writes @v to the doorbell aperture at the
151 * requested offset (CIK).
152 */
153void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
154{
155 if (offset < rdev->doorbell.size) {
156 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
157 } else {
158 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
159 }
160}
161
bc8273fe
AD
162#define BONAIRE_IO_MC_REGS_SIZE 36
163
164static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
165{
166 {0x00000070, 0x04400000},
167 {0x00000071, 0x80c01803},
168 {0x00000072, 0x00004004},
169 {0x00000073, 0x00000100},
170 {0x00000074, 0x00ff0000},
171 {0x00000075, 0x34000000},
172 {0x00000076, 0x08000014},
173 {0x00000077, 0x00cc08ec},
174 {0x00000078, 0x00000400},
175 {0x00000079, 0x00000000},
176 {0x0000007a, 0x04090000},
177 {0x0000007c, 0x00000000},
178 {0x0000007e, 0x4408a8e8},
179 {0x0000007f, 0x00000304},
180 {0x00000080, 0x00000000},
181 {0x00000082, 0x00000001},
182 {0x00000083, 0x00000002},
183 {0x00000084, 0xf3e4f400},
184 {0x00000085, 0x052024e3},
185 {0x00000087, 0x00000000},
186 {0x00000088, 0x01000000},
187 {0x0000008a, 0x1c0a0000},
188 {0x0000008b, 0xff010000},
189 {0x0000008d, 0xffffefff},
190 {0x0000008e, 0xfff3efff},
191 {0x0000008f, 0xfff3efbf},
192 {0x00000092, 0xf7ffffff},
193 {0x00000093, 0xffffff7f},
194 {0x00000095, 0x00101101},
195 {0x00000096, 0x00000fff},
196 {0x00000097, 0x00116fff},
197 {0x00000098, 0x60010000},
198 {0x00000099, 0x10010000},
199 {0x0000009a, 0x00006000},
200 {0x0000009b, 0x00001000},
201 {0x0000009f, 0x00b48000}
202};
203
b556b12e
AD
204/**
205 * cik_srbm_select - select specific register instances
206 *
207 * @rdev: radeon_device pointer
208 * @me: selected ME (micro engine)
209 * @pipe: pipe
210 * @queue: queue
211 * @vmid: VMID
212 *
213 * Switches the currently active registers instances. Some
214 * registers are instanced per VMID, others are instanced per
215 * me/pipe/queue combination.
216 */
217static void cik_srbm_select(struct radeon_device *rdev,
218 u32 me, u32 pipe, u32 queue, u32 vmid)
219{
220 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
221 MEID(me & 0x3) |
222 VMID(vmid & 0xf) |
223 QUEUEID(queue & 0x7));
224 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
225}
226
bc8273fe
AD
227/* ucode loading */
228/**
229 * ci_mc_load_microcode - load MC ucode into the hw
230 *
231 * @rdev: radeon_device pointer
232 *
233 * Load the GDDR MC ucode into the hw (CIK).
234 * Returns 0 on success, error on failure.
235 */
236static int ci_mc_load_microcode(struct radeon_device *rdev)
237{
238 const __be32 *fw_data;
239 u32 running, blackout = 0;
240 u32 *io_mc_regs;
241 int i, ucode_size, regs_size;
242
243 if (!rdev->mc_fw)
244 return -EINVAL;
245
246 switch (rdev->family) {
247 case CHIP_BONAIRE:
248 default:
249 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
250 ucode_size = CIK_MC_UCODE_SIZE;
251 regs_size = BONAIRE_IO_MC_REGS_SIZE;
252 break;
253 }
254
255 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
256
257 if (running == 0) {
258 if (running) {
259 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
260 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
261 }
262
263 /* reset the engine and set to writable */
264 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
265 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
266
267 /* load mc io regs */
268 for (i = 0; i < regs_size; i++) {
269 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
270 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
271 }
272 /* load the MC ucode */
273 fw_data = (const __be32 *)rdev->mc_fw->data;
274 for (i = 0; i < ucode_size; i++)
275 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
276
277 /* put the engine back into the active state */
278 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
279 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
280 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
281
282 /* wait for training to complete */
283 for (i = 0; i < rdev->usec_timeout; i++) {
284 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
285 break;
286 udelay(1);
287 }
288 for (i = 0; i < rdev->usec_timeout; i++) {
289 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
290 break;
291 udelay(1);
292 }
293
294 if (running)
295 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
296 }
297
298 return 0;
299}
300
02c81327
AD
301/**
302 * cik_init_microcode - load ucode images from disk
303 *
304 * @rdev: radeon_device pointer
305 *
306 * Use the firmware interface to load the ucode images into
307 * the driver (not loaded into hw).
308 * Returns 0 on success, error on failure.
309 */
310static int cik_init_microcode(struct radeon_device *rdev)
311{
312 struct platform_device *pdev;
313 const char *chip_name;
314 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
315 mec_req_size, rlc_req_size, mc_req_size,
316 sdma_req_size;
02c81327
AD
317 char fw_name[30];
318 int err;
319
320 DRM_DEBUG("\n");
321
322 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
323 err = IS_ERR(pdev);
324 if (err) {
325 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
326 return -EINVAL;
327 }
328
329 switch (rdev->family) {
330 case CHIP_BONAIRE:
331 chip_name = "BONAIRE";
332 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
333 me_req_size = CIK_ME_UCODE_SIZE * 4;
334 ce_req_size = CIK_CE_UCODE_SIZE * 4;
335 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
336 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
337 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 338 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
339 break;
340 case CHIP_KAVERI:
341 chip_name = "KAVERI";
342 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
343 me_req_size = CIK_ME_UCODE_SIZE * 4;
344 ce_req_size = CIK_CE_UCODE_SIZE * 4;
345 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
346 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 347 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
348 break;
349 case CHIP_KABINI:
350 chip_name = "KABINI";
351 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
352 me_req_size = CIK_ME_UCODE_SIZE * 4;
353 ce_req_size = CIK_CE_UCODE_SIZE * 4;
354 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
355 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 356 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
357 break;
358 default: BUG();
359 }
360
361 DRM_INFO("Loading %s Microcode\n", chip_name);
362
363 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
364 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
365 if (err)
366 goto out;
367 if (rdev->pfp_fw->size != pfp_req_size) {
368 printk(KERN_ERR
369 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
370 rdev->pfp_fw->size, fw_name);
371 err = -EINVAL;
372 goto out;
373 }
374
375 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
376 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
377 if (err)
378 goto out;
379 if (rdev->me_fw->size != me_req_size) {
380 printk(KERN_ERR
381 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
382 rdev->me_fw->size, fw_name);
383 err = -EINVAL;
384 }
385
386 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
387 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
388 if (err)
389 goto out;
390 if (rdev->ce_fw->size != ce_req_size) {
391 printk(KERN_ERR
392 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
393 rdev->ce_fw->size, fw_name);
394 err = -EINVAL;
395 }
396
397 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
398 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
399 if (err)
400 goto out;
401 if (rdev->mec_fw->size != mec_req_size) {
402 printk(KERN_ERR
403 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
404 rdev->mec_fw->size, fw_name);
405 err = -EINVAL;
406 }
407
408 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
409 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
410 if (err)
411 goto out;
412 if (rdev->rlc_fw->size != rlc_req_size) {
413 printk(KERN_ERR
414 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
415 rdev->rlc_fw->size, fw_name);
416 err = -EINVAL;
417 }
418
21a93e13
AD
419 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
420 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
421 if (err)
422 goto out;
423 if (rdev->sdma_fw->size != sdma_req_size) {
424 printk(KERN_ERR
425 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
426 rdev->sdma_fw->size, fw_name);
427 err = -EINVAL;
428 }
429
02c81327
AD
430 /* No MC ucode on APUs */
431 if (!(rdev->flags & RADEON_IS_IGP)) {
432 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
433 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
434 if (err)
435 goto out;
436 if (rdev->mc_fw->size != mc_req_size) {
437 printk(KERN_ERR
438 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
439 rdev->mc_fw->size, fw_name);
440 err = -EINVAL;
441 }
442 }
443
444out:
445 platform_device_unregister(pdev);
446
447 if (err) {
448 if (err != -EINVAL)
449 printk(KERN_ERR
450 "cik_cp: Failed to load firmware \"%s\"\n",
451 fw_name);
452 release_firmware(rdev->pfp_fw);
453 rdev->pfp_fw = NULL;
454 release_firmware(rdev->me_fw);
455 rdev->me_fw = NULL;
456 release_firmware(rdev->ce_fw);
457 rdev->ce_fw = NULL;
458 release_firmware(rdev->rlc_fw);
459 rdev->rlc_fw = NULL;
460 release_firmware(rdev->mc_fw);
461 rdev->mc_fw = NULL;
462 }
463 return err;
464}
465
8cc1a532
AD
466/*
467 * Core functions
468 */
469/**
470 * cik_tiling_mode_table_init - init the hw tiling table
471 *
472 * @rdev: radeon_device pointer
473 *
474 * Starting with SI, the tiling setup is done globally in a
475 * set of 32 tiling modes. Rather than selecting each set of
476 * parameters per surface as on older asics, we just select
477 * which index in the tiling table we want to use, and the
478 * surface uses those parameters (CIK).
479 */
480static void cik_tiling_mode_table_init(struct radeon_device *rdev)
481{
482 const u32 num_tile_mode_states = 32;
483 const u32 num_secondary_tile_mode_states = 16;
484 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
485 u32 num_pipe_configs;
486 u32 num_rbs = rdev->config.cik.max_backends_per_se *
487 rdev->config.cik.max_shader_engines;
488
489 switch (rdev->config.cik.mem_row_size_in_kb) {
490 case 1:
491 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
492 break;
493 case 2:
494 default:
495 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
496 break;
497 case 4:
498 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
499 break;
500 }
501
502 num_pipe_configs = rdev->config.cik.max_tile_pipes;
503 if (num_pipe_configs > 8)
504 num_pipe_configs = 8; /* ??? */
505
506 if (num_pipe_configs == 8) {
507 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
508 switch (reg_offset) {
509 case 0:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
514 break;
515 case 1:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
520 break;
521 case 2:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
526 break;
527 case 3:
528 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
532 break;
533 case 4:
534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
537 TILE_SPLIT(split_equal_to_row_size));
538 break;
539 case 5:
540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
542 break;
543 case 6:
544 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
548 break;
549 case 7:
550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
553 TILE_SPLIT(split_equal_to_row_size));
554 break;
555 case 8:
556 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
558 break;
559 case 9:
560 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
562 break;
563 case 10:
564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
565 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
568 break;
569 case 11:
570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 break;
575 case 12:
576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
580 break;
581 case 13:
582 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
584 break;
585 case 14:
586 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
590 break;
591 case 16:
592 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
596 break;
597 case 17:
598 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
602 break;
603 case 27:
604 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
606 break;
607 case 28:
608 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
609 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
612 break;
613 case 29:
614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
616 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
618 break;
619 case 30:
620 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
624 break;
625 default:
626 gb_tile_moden = 0;
627 break;
628 }
629 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
630 }
631 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
632 switch (reg_offset) {
633 case 0:
634 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
637 NUM_BANKS(ADDR_SURF_16_BANK));
638 break;
639 case 1:
640 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
643 NUM_BANKS(ADDR_SURF_16_BANK));
644 break;
645 case 2:
646 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
649 NUM_BANKS(ADDR_SURF_16_BANK));
650 break;
651 case 3:
652 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
655 NUM_BANKS(ADDR_SURF_16_BANK));
656 break;
657 case 4:
658 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
661 NUM_BANKS(ADDR_SURF_8_BANK));
662 break;
663 case 5:
664 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
667 NUM_BANKS(ADDR_SURF_4_BANK));
668 break;
669 case 6:
670 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
673 NUM_BANKS(ADDR_SURF_2_BANK));
674 break;
675 case 8:
676 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
679 NUM_BANKS(ADDR_SURF_16_BANK));
680 break;
681 case 9:
682 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
685 NUM_BANKS(ADDR_SURF_16_BANK));
686 break;
687 case 10:
688 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
691 NUM_BANKS(ADDR_SURF_16_BANK));
692 break;
693 case 11:
694 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
697 NUM_BANKS(ADDR_SURF_16_BANK));
698 break;
699 case 12:
700 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
703 NUM_BANKS(ADDR_SURF_8_BANK));
704 break;
705 case 13:
706 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
709 NUM_BANKS(ADDR_SURF_4_BANK));
710 break;
711 case 14:
712 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
715 NUM_BANKS(ADDR_SURF_2_BANK));
716 break;
717 default:
718 gb_tile_moden = 0;
719 break;
720 }
721 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
722 }
723 } else if (num_pipe_configs == 4) {
724 if (num_rbs == 4) {
725 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
726 switch (reg_offset) {
727 case 0:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
732 break;
733 case 1:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
738 break;
739 case 2:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
744 break;
745 case 3:
746 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
750 break;
751 case 4:
752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
755 TILE_SPLIT(split_equal_to_row_size));
756 break;
757 case 5:
758 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
760 break;
761 case 6:
762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
766 break;
767 case 7:
768 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
769 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
771 TILE_SPLIT(split_equal_to_row_size));
772 break;
773 case 8:
774 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
775 PIPE_CONFIG(ADDR_SURF_P4_16x16));
776 break;
777 case 9:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
780 break;
781 case 10:
782 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
783 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
786 break;
787 case 11:
788 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792 break;
793 case 12:
794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
798 break;
799 case 13:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
802 break;
803 case 14:
804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
808 break;
809 case 16:
810 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
814 break;
815 case 17:
816 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
820 break;
821 case 27:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
824 break;
825 case 28:
826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
830 break;
831 case 29:
832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
836 break;
837 case 30:
838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
842 break;
843 default:
844 gb_tile_moden = 0;
845 break;
846 }
847 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
848 }
849 } else if (num_rbs < 4) {
850 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
851 switch (reg_offset) {
852 case 0:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
857 break;
858 case 1:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
863 break;
864 case 2:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
867 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
869 break;
870 case 3:
871 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
873 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
875 break;
876 case 4:
877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
879 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
880 TILE_SPLIT(split_equal_to_row_size));
881 break;
882 case 5:
883 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
885 break;
886 case 6:
887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
889 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
890 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
891 break;
892 case 7:
893 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
895 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
896 TILE_SPLIT(split_equal_to_row_size));
897 break;
898 case 8:
899 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
900 PIPE_CONFIG(ADDR_SURF_P4_8x16));
901 break;
902 case 9:
903 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
905 break;
906 case 10:
907 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
908 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
909 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
911 break;
912 case 11:
913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
917 break;
918 case 12:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
923 break;
924 case 13:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
927 break;
928 case 14:
929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
931 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
933 break;
934 case 16:
935 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
937 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
939 break;
940 case 17:
941 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
943 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
945 break;
946 case 27:
947 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
949 break;
950 case 28:
951 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
955 break;
956 case 29:
957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
959 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
961 break;
962 case 30:
963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
965 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
967 break;
968 default:
969 gb_tile_moden = 0;
970 break;
971 }
972 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
973 }
974 }
975 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
976 switch (reg_offset) {
977 case 0:
978 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
981 NUM_BANKS(ADDR_SURF_16_BANK));
982 break;
983 case 1:
984 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
987 NUM_BANKS(ADDR_SURF_16_BANK));
988 break;
989 case 2:
990 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
993 NUM_BANKS(ADDR_SURF_16_BANK));
994 break;
995 case 3:
996 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
999 NUM_BANKS(ADDR_SURF_16_BANK));
1000 break;
1001 case 4:
1002 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1005 NUM_BANKS(ADDR_SURF_16_BANK));
1006 break;
1007 case 5:
1008 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1011 NUM_BANKS(ADDR_SURF_8_BANK));
1012 break;
1013 case 6:
1014 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1017 NUM_BANKS(ADDR_SURF_4_BANK));
1018 break;
1019 case 8:
1020 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1023 NUM_BANKS(ADDR_SURF_16_BANK));
1024 break;
1025 case 9:
1026 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1029 NUM_BANKS(ADDR_SURF_16_BANK));
1030 break;
1031 case 10:
1032 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1035 NUM_BANKS(ADDR_SURF_16_BANK));
1036 break;
1037 case 11:
1038 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1041 NUM_BANKS(ADDR_SURF_16_BANK));
1042 break;
1043 case 12:
1044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1047 NUM_BANKS(ADDR_SURF_16_BANK));
1048 break;
1049 case 13:
1050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1053 NUM_BANKS(ADDR_SURF_8_BANK));
1054 break;
1055 case 14:
1056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1059 NUM_BANKS(ADDR_SURF_4_BANK));
1060 break;
1061 default:
1062 gb_tile_moden = 0;
1063 break;
1064 }
1065 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1066 }
1067 } else if (num_pipe_configs == 2) {
1068 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1069 switch (reg_offset) {
1070 case 0:
1071 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1073 PIPE_CONFIG(ADDR_SURF_P2) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1075 break;
1076 case 1:
1077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1079 PIPE_CONFIG(ADDR_SURF_P2) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1081 break;
1082 case 2:
1083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1085 PIPE_CONFIG(ADDR_SURF_P2) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1087 break;
1088 case 3:
1089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1091 PIPE_CONFIG(ADDR_SURF_P2) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1093 break;
1094 case 4:
1095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1097 PIPE_CONFIG(ADDR_SURF_P2) |
1098 TILE_SPLIT(split_equal_to_row_size));
1099 break;
1100 case 5:
1101 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1103 break;
1104 case 6:
1105 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1107 PIPE_CONFIG(ADDR_SURF_P2) |
1108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1109 break;
1110 case 7:
1111 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1113 PIPE_CONFIG(ADDR_SURF_P2) |
1114 TILE_SPLIT(split_equal_to_row_size));
1115 break;
1116 case 8:
1117 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1118 break;
1119 case 9:
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1122 break;
1123 case 10:
1124 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1126 PIPE_CONFIG(ADDR_SURF_P2) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1128 break;
1129 case 11:
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P2) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1134 break;
1135 case 12:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P2) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1140 break;
1141 case 13:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1144 break;
1145 case 14:
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P2) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1150 break;
1151 case 16:
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P2) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1156 break;
1157 case 17:
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P2) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1162 break;
1163 case 27:
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1166 break;
1167 case 28:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P2) |
1171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1172 break;
1173 case 29:
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P2) |
1177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1178 break;
1179 case 30:
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P2) |
1183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1184 break;
1185 default:
1186 gb_tile_moden = 0;
1187 break;
1188 }
1189 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1190 }
1191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1192 switch (reg_offset) {
1193 case 0:
1194 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 break;
1199 case 1:
1200 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1203 NUM_BANKS(ADDR_SURF_16_BANK));
1204 break;
1205 case 2:
1206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1209 NUM_BANKS(ADDR_SURF_16_BANK));
1210 break;
1211 case 3:
1212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1215 NUM_BANKS(ADDR_SURF_16_BANK));
1216 break;
1217 case 4:
1218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1221 NUM_BANKS(ADDR_SURF_16_BANK));
1222 break;
1223 case 5:
1224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1227 NUM_BANKS(ADDR_SURF_16_BANK));
1228 break;
1229 case 6:
1230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1233 NUM_BANKS(ADDR_SURF_8_BANK));
1234 break;
1235 case 8:
1236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1239 NUM_BANKS(ADDR_SURF_16_BANK));
1240 break;
1241 case 9:
1242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1245 NUM_BANKS(ADDR_SURF_16_BANK));
1246 break;
1247 case 10:
1248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1251 NUM_BANKS(ADDR_SURF_16_BANK));
1252 break;
1253 case 11:
1254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1257 NUM_BANKS(ADDR_SURF_16_BANK));
1258 break;
1259 case 12:
1260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1263 NUM_BANKS(ADDR_SURF_16_BANK));
1264 break;
1265 case 13:
1266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1269 NUM_BANKS(ADDR_SURF_16_BANK));
1270 break;
1271 case 14:
1272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1275 NUM_BANKS(ADDR_SURF_8_BANK));
1276 break;
1277 default:
1278 gb_tile_moden = 0;
1279 break;
1280 }
1281 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1282 }
1283 } else
1284 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1285}
1286
1287/**
1288 * cik_select_se_sh - select which SE, SH to address
1289 *
1290 * @rdev: radeon_device pointer
1291 * @se_num: shader engine to address
1292 * @sh_num: sh block to address
1293 *
1294 * Select which SE, SH combinations to address. Certain
1295 * registers are instanced per SE or SH. 0xffffffff means
1296 * broadcast to all SEs or SHs (CIK).
1297 */
1298static void cik_select_se_sh(struct radeon_device *rdev,
1299 u32 se_num, u32 sh_num)
1300{
1301 u32 data = INSTANCE_BROADCAST_WRITES;
1302
1303 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1304 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1305 else if (se_num == 0xffffffff)
1306 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1307 else if (sh_num == 0xffffffff)
1308 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1309 else
1310 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1311 WREG32(GRBM_GFX_INDEX, data);
1312}
1313
1314/**
1315 * cik_create_bitmask - create a bitmask
1316 *
1317 * @bit_width: length of the mask
1318 *
1319 * create a variable length bit mask (CIK).
1320 * Returns the bitmask.
1321 */
1322static u32 cik_create_bitmask(u32 bit_width)
1323{
1324 u32 i, mask = 0;
1325
1326 for (i = 0; i < bit_width; i++) {
1327 mask <<= 1;
1328 mask |= 1;
1329 }
1330 return mask;
1331}
1332
1333/**
1334 * cik_select_se_sh - select which SE, SH to address
1335 *
1336 * @rdev: radeon_device pointer
1337 * @max_rb_num: max RBs (render backends) for the asic
1338 * @se_num: number of SEs (shader engines) for the asic
1339 * @sh_per_se: number of SH blocks per SE for the asic
1340 *
1341 * Calculates the bitmask of disabled RBs (CIK).
1342 * Returns the disabled RB bitmask.
1343 */
1344static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1345 u32 max_rb_num, u32 se_num,
1346 u32 sh_per_se)
1347{
1348 u32 data, mask;
1349
1350 data = RREG32(CC_RB_BACKEND_DISABLE);
1351 if (data & 1)
1352 data &= BACKEND_DISABLE_MASK;
1353 else
1354 data = 0;
1355 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1356
1357 data >>= BACKEND_DISABLE_SHIFT;
1358
1359 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1360
1361 return data & mask;
1362}
1363
1364/**
1365 * cik_setup_rb - setup the RBs on the asic
1366 *
1367 * @rdev: radeon_device pointer
1368 * @se_num: number of SEs (shader engines) for the asic
1369 * @sh_per_se: number of SH blocks per SE for the asic
1370 * @max_rb_num: max RBs (render backends) for the asic
1371 *
1372 * Configures per-SE/SH RB registers (CIK).
1373 */
1374static void cik_setup_rb(struct radeon_device *rdev,
1375 u32 se_num, u32 sh_per_se,
1376 u32 max_rb_num)
1377{
1378 int i, j;
1379 u32 data, mask;
1380 u32 disabled_rbs = 0;
1381 u32 enabled_rbs = 0;
1382
1383 for (i = 0; i < se_num; i++) {
1384 for (j = 0; j < sh_per_se; j++) {
1385 cik_select_se_sh(rdev, i, j);
1386 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1387 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1388 }
1389 }
1390 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1391
1392 mask = 1;
1393 for (i = 0; i < max_rb_num; i++) {
1394 if (!(disabled_rbs & mask))
1395 enabled_rbs |= mask;
1396 mask <<= 1;
1397 }
1398
1399 for (i = 0; i < se_num; i++) {
1400 cik_select_se_sh(rdev, i, 0xffffffff);
1401 data = 0;
1402 for (j = 0; j < sh_per_se; j++) {
1403 switch (enabled_rbs & 3) {
1404 case 1:
1405 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1406 break;
1407 case 2:
1408 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1409 break;
1410 case 3:
1411 default:
1412 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1413 break;
1414 }
1415 enabled_rbs >>= 2;
1416 }
1417 WREG32(PA_SC_RASTER_CONFIG, data);
1418 }
1419 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1420}
1421
1422/**
1423 * cik_gpu_init - setup the 3D engine
1424 *
1425 * @rdev: radeon_device pointer
1426 *
1427 * Configures the 3D engine and tiling configuration
1428 * registers so that the 3D engine is usable.
1429 */
1430static void cik_gpu_init(struct radeon_device *rdev)
1431{
1432 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1433 u32 mc_shared_chmap, mc_arb_ramcfg;
1434 u32 hdp_host_path_cntl;
1435 u32 tmp;
1436 int i, j;
1437
1438 switch (rdev->family) {
1439 case CHIP_BONAIRE:
1440 rdev->config.cik.max_shader_engines = 2;
1441 rdev->config.cik.max_tile_pipes = 4;
1442 rdev->config.cik.max_cu_per_sh = 7;
1443 rdev->config.cik.max_sh_per_se = 1;
1444 rdev->config.cik.max_backends_per_se = 2;
1445 rdev->config.cik.max_texture_channel_caches = 4;
1446 rdev->config.cik.max_gprs = 256;
1447 rdev->config.cik.max_gs_threads = 32;
1448 rdev->config.cik.max_hw_contexts = 8;
1449
1450 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1451 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1452 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1453 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1454 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1455 break;
1456 case CHIP_KAVERI:
1457 /* TODO */
1458 break;
1459 case CHIP_KABINI:
1460 default:
1461 rdev->config.cik.max_shader_engines = 1;
1462 rdev->config.cik.max_tile_pipes = 2;
1463 rdev->config.cik.max_cu_per_sh = 2;
1464 rdev->config.cik.max_sh_per_se = 1;
1465 rdev->config.cik.max_backends_per_se = 1;
1466 rdev->config.cik.max_texture_channel_caches = 2;
1467 rdev->config.cik.max_gprs = 256;
1468 rdev->config.cik.max_gs_threads = 16;
1469 rdev->config.cik.max_hw_contexts = 8;
1470
1471 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1472 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1473 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1474 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1475 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1476 break;
1477 }
1478
1479 /* Initialize HDP */
1480 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1481 WREG32((0x2c14 + j), 0x00000000);
1482 WREG32((0x2c18 + j), 0x00000000);
1483 WREG32((0x2c1c + j), 0x00000000);
1484 WREG32((0x2c20 + j), 0x00000000);
1485 WREG32((0x2c24 + j), 0x00000000);
1486 }
1487
1488 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1489
1490 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1491
1492 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1493 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1494
1495 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1496 rdev->config.cik.mem_max_burst_length_bytes = 256;
1497 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1498 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1499 if (rdev->config.cik.mem_row_size_in_kb > 4)
1500 rdev->config.cik.mem_row_size_in_kb = 4;
1501 /* XXX use MC settings? */
1502 rdev->config.cik.shader_engine_tile_size = 32;
1503 rdev->config.cik.num_gpus = 1;
1504 rdev->config.cik.multi_gpu_tile_size = 64;
1505
1506 /* fix up row size */
1507 gb_addr_config &= ~ROW_SIZE_MASK;
1508 switch (rdev->config.cik.mem_row_size_in_kb) {
1509 case 1:
1510 default:
1511 gb_addr_config |= ROW_SIZE(0);
1512 break;
1513 case 2:
1514 gb_addr_config |= ROW_SIZE(1);
1515 break;
1516 case 4:
1517 gb_addr_config |= ROW_SIZE(2);
1518 break;
1519 }
1520
1521 /* setup tiling info dword. gb_addr_config is not adequate since it does
1522 * not have bank info, so create a custom tiling dword.
1523 * bits 3:0 num_pipes
1524 * bits 7:4 num_banks
1525 * bits 11:8 group_size
1526 * bits 15:12 row_size
1527 */
1528 rdev->config.cik.tile_config = 0;
1529 switch (rdev->config.cik.num_tile_pipes) {
1530 case 1:
1531 rdev->config.cik.tile_config |= (0 << 0);
1532 break;
1533 case 2:
1534 rdev->config.cik.tile_config |= (1 << 0);
1535 break;
1536 case 4:
1537 rdev->config.cik.tile_config |= (2 << 0);
1538 break;
1539 case 8:
1540 default:
1541 /* XXX what about 12? */
1542 rdev->config.cik.tile_config |= (3 << 0);
1543 break;
1544 }
1545 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1546 rdev->config.cik.tile_config |= 1 << 4;
1547 else
1548 rdev->config.cik.tile_config |= 0 << 4;
1549 rdev->config.cik.tile_config |=
1550 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1551 rdev->config.cik.tile_config |=
1552 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1553
1554 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1555 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1556 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
1557 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1558 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
1559 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1560 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1561 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
1562
1563 cik_tiling_mode_table_init(rdev);
1564
1565 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1566 rdev->config.cik.max_sh_per_se,
1567 rdev->config.cik.max_backends_per_se);
1568
1569 /* set HW defaults for 3D engine */
1570 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1571
1572 WREG32(SX_DEBUG_1, 0x20);
1573
1574 WREG32(TA_CNTL_AUX, 0x00010000);
1575
1576 tmp = RREG32(SPI_CONFIG_CNTL);
1577 tmp |= 0x03000000;
1578 WREG32(SPI_CONFIG_CNTL, tmp);
1579
1580 WREG32(SQ_CONFIG, 1);
1581
1582 WREG32(DB_DEBUG, 0);
1583
1584 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1585 tmp |= 0x00000400;
1586 WREG32(DB_DEBUG2, tmp);
1587
1588 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1589 tmp |= 0x00020200;
1590 WREG32(DB_DEBUG3, tmp);
1591
1592 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1593 tmp |= 0x00018208;
1594 WREG32(CB_HW_CONTROL, tmp);
1595
1596 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1597
1598 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1599 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1600 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1601 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1602
1603 WREG32(VGT_NUM_INSTANCES, 1);
1604
1605 WREG32(CP_PERFMON_CNTL, 0);
1606
1607 WREG32(SQ_CONFIG, 0);
1608
1609 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1610 FORCE_EOV_MAX_REZ_CNT(255)));
1611
1612 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1613 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1614
1615 WREG32(VGT_GS_VERTEX_REUSE, 16);
1616 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1617
1618 tmp = RREG32(HDP_MISC_CNTL);
1619 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1620 WREG32(HDP_MISC_CNTL, tmp);
1621
1622 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1623 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1624
1625 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1626 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1627
1628 udelay(50);
1629}
1630
2cae3bc3
AD
1631/*
1632 * GPU scratch registers helpers function.
1633 */
1634/**
1635 * cik_scratch_init - setup driver info for CP scratch regs
1636 *
1637 * @rdev: radeon_device pointer
1638 *
1639 * Set up the number and offset of the CP scratch registers.
1640 * NOTE: use of CP scratch registers is a legacy inferface and
1641 * is not used by default on newer asics (r6xx+). On newer asics,
1642 * memory buffers are used for fences rather than scratch regs.
1643 */
1644static void cik_scratch_init(struct radeon_device *rdev)
1645{
1646 int i;
1647
1648 rdev->scratch.num_reg = 7;
1649 rdev->scratch.reg_base = SCRATCH_REG0;
1650 for (i = 0; i < rdev->scratch.num_reg; i++) {
1651 rdev->scratch.free[i] = true;
1652 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1653 }
1654}
1655
fbc832c7
AD
1656/**
1657 * cik_ring_test - basic gfx ring test
1658 *
1659 * @rdev: radeon_device pointer
1660 * @ring: radeon_ring structure holding ring information
1661 *
1662 * Allocate a scratch register and write to it using the gfx ring (CIK).
1663 * Provides a basic gfx ring test to verify that the ring is working.
1664 * Used by cik_cp_gfx_resume();
1665 * Returns 0 on success, error on failure.
1666 */
1667int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1668{
1669 uint32_t scratch;
1670 uint32_t tmp = 0;
1671 unsigned i;
1672 int r;
1673
1674 r = radeon_scratch_get(rdev, &scratch);
1675 if (r) {
1676 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1677 return r;
1678 }
1679 WREG32(scratch, 0xCAFEDEAD);
1680 r = radeon_ring_lock(rdev, ring, 3);
1681 if (r) {
1682 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1683 radeon_scratch_free(rdev, scratch);
1684 return r;
1685 }
1686 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1687 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1688 radeon_ring_write(ring, 0xDEADBEEF);
1689 radeon_ring_unlock_commit(rdev, ring);
963e81f9 1690
fbc832c7
AD
1691 for (i = 0; i < rdev->usec_timeout; i++) {
1692 tmp = RREG32(scratch);
1693 if (tmp == 0xDEADBEEF)
1694 break;
1695 DRM_UDELAY(1);
1696 }
1697 if (i < rdev->usec_timeout) {
1698 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1699 } else {
1700 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1701 ring->idx, scratch, tmp);
1702 r = -EINVAL;
1703 }
1704 radeon_scratch_free(rdev, scratch);
1705 return r;
1706}
1707
2cae3bc3
AD
1708/**
1709 * cik_fence_ring_emit - emit a fence on the gfx ring
1710 *
1711 * @rdev: radeon_device pointer
1712 * @fence: radeon fence object
1713 *
1714 * Emits a fence sequnce number on the gfx ring and flushes
1715 * GPU caches.
1716 */
1717void cik_fence_ring_emit(struct radeon_device *rdev,
1718 struct radeon_fence *fence)
1719{
1720 struct radeon_ring *ring = &rdev->ring[fence->ring];
1721 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1722
1723 /* EVENT_WRITE_EOP - flush caches, send int */
1724 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1725 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1726 EOP_TC_ACTION_EN |
1727 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1728 EVENT_INDEX(5)));
1729 radeon_ring_write(ring, addr & 0xfffffffc);
1730 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1731 radeon_ring_write(ring, fence->seq);
1732 radeon_ring_write(ring, 0);
1733 /* HDP flush */
1734 /* We should be using the new WAIT_REG_MEM special op packet here
1735 * but it causes the CP to hang
1736 */
1737 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1738 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1739 WRITE_DATA_DST_SEL(0)));
1740 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1741 radeon_ring_write(ring, 0);
1742 radeon_ring_write(ring, 0);
1743}
1744
1745void cik_semaphore_ring_emit(struct radeon_device *rdev,
1746 struct radeon_ring *ring,
1747 struct radeon_semaphore *semaphore,
1748 bool emit_wait)
1749{
1750 uint64_t addr = semaphore->gpu_addr;
1751 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1752
1753 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1754 radeon_ring_write(ring, addr & 0xffffffff);
1755 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1756}
1757
1758/*
1759 * IB stuff
1760 */
1761/**
1762 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1763 *
1764 * @rdev: radeon_device pointer
1765 * @ib: radeon indirect buffer object
1766 *
1767 * Emits an DE (drawing engine) or CE (constant engine) IB
1768 * on the gfx ring. IBs are usually generated by userspace
1769 * acceleration drivers and submitted to the kernel for
1770 * sheduling on the ring. This function schedules the IB
1771 * on the gfx ring for execution by the GPU.
1772 */
1773void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1774{
1775 struct radeon_ring *ring = &rdev->ring[ib->ring];
1776 u32 header, control = INDIRECT_BUFFER_VALID;
1777
1778 if (ib->is_const_ib) {
1779 /* set switch buffer packet before const IB */
1780 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1781 radeon_ring_write(ring, 0);
1782
1783 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1784 } else {
1785 u32 next_rptr;
1786 if (ring->rptr_save_reg) {
1787 next_rptr = ring->wptr + 3 + 4;
1788 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1789 radeon_ring_write(ring, ((ring->rptr_save_reg -
1790 PACKET3_SET_UCONFIG_REG_START) >> 2));
1791 radeon_ring_write(ring, next_rptr);
1792 } else if (rdev->wb.enabled) {
1793 next_rptr = ring->wptr + 5 + 4;
1794 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1795 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1796 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1797 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1798 radeon_ring_write(ring, next_rptr);
1799 }
1800
1801 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1802 }
1803
1804 control |= ib->length_dw |
1805 (ib->vm ? (ib->vm->id << 24) : 0);
1806
1807 radeon_ring_write(ring, header);
1808 radeon_ring_write(ring,
1809#ifdef __BIG_ENDIAN
1810 (2 << 0) |
1811#endif
1812 (ib->gpu_addr & 0xFFFFFFFC));
1813 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1814 radeon_ring_write(ring, control);
1815}
1816
fbc832c7
AD
1817/**
1818 * cik_ib_test - basic gfx ring IB test
1819 *
1820 * @rdev: radeon_device pointer
1821 * @ring: radeon_ring structure holding ring information
1822 *
1823 * Allocate an IB and execute it on the gfx ring (CIK).
1824 * Provides a basic gfx ring test to verify that IBs are working.
1825 * Returns 0 on success, error on failure.
1826 */
1827int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1828{
1829 struct radeon_ib ib;
1830 uint32_t scratch;
1831 uint32_t tmp = 0;
1832 unsigned i;
1833 int r;
1834
1835 r = radeon_scratch_get(rdev, &scratch);
1836 if (r) {
1837 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1838 return r;
1839 }
1840 WREG32(scratch, 0xCAFEDEAD);
1841 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1842 if (r) {
1843 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1844 return r;
1845 }
1846 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1847 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1848 ib.ptr[2] = 0xDEADBEEF;
1849 ib.length_dw = 3;
1850 r = radeon_ib_schedule(rdev, &ib, NULL);
1851 if (r) {
1852 radeon_scratch_free(rdev, scratch);
1853 radeon_ib_free(rdev, &ib);
1854 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1855 return r;
1856 }
1857 r = radeon_fence_wait(ib.fence, false);
1858 if (r) {
1859 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1860 return r;
1861 }
1862 for (i = 0; i < rdev->usec_timeout; i++) {
1863 tmp = RREG32(scratch);
1864 if (tmp == 0xDEADBEEF)
1865 break;
1866 DRM_UDELAY(1);
1867 }
1868 if (i < rdev->usec_timeout) {
1869 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1870 } else {
1871 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1872 scratch, tmp);
1873 r = -EINVAL;
1874 }
1875 radeon_scratch_free(rdev, scratch);
1876 radeon_ib_free(rdev, &ib);
1877 return r;
1878}
1879
841cf442
AD
1880/*
1881 * CP.
1882 * On CIK, gfx and compute now have independant command processors.
1883 *
1884 * GFX
1885 * Gfx consists of a single ring and can process both gfx jobs and
1886 * compute jobs. The gfx CP consists of three microengines (ME):
1887 * PFP - Pre-Fetch Parser
1888 * ME - Micro Engine
1889 * CE - Constant Engine
1890 * The PFP and ME make up what is considered the Drawing Engine (DE).
1891 * The CE is an asynchronous engine used for updating buffer desciptors
1892 * used by the DE so that they can be loaded into cache in parallel
1893 * while the DE is processing state update packets.
1894 *
1895 * Compute
1896 * The compute CP consists of two microengines (ME):
1897 * MEC1 - Compute MicroEngine 1
1898 * MEC2 - Compute MicroEngine 2
1899 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1900 * The queues are exposed to userspace and are programmed directly
1901 * by the compute runtime.
1902 */
1903/**
1904 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1905 *
1906 * @rdev: radeon_device pointer
1907 * @enable: enable or disable the MEs
1908 *
1909 * Halts or unhalts the gfx MEs.
1910 */
1911static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1912{
1913 if (enable)
1914 WREG32(CP_ME_CNTL, 0);
1915 else {
1916 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1917 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1918 }
1919 udelay(50);
1920}
1921
1922/**
1923 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1924 *
1925 * @rdev: radeon_device pointer
1926 *
1927 * Loads the gfx PFP, ME, and CE ucode.
1928 * Returns 0 for success, -EINVAL if the ucode is not available.
1929 */
1930static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1931{
1932 const __be32 *fw_data;
1933 int i;
1934
1935 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1936 return -EINVAL;
1937
1938 cik_cp_gfx_enable(rdev, false);
1939
1940 /* PFP */
1941 fw_data = (const __be32 *)rdev->pfp_fw->data;
1942 WREG32(CP_PFP_UCODE_ADDR, 0);
1943 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1944 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1945 WREG32(CP_PFP_UCODE_ADDR, 0);
1946
1947 /* CE */
1948 fw_data = (const __be32 *)rdev->ce_fw->data;
1949 WREG32(CP_CE_UCODE_ADDR, 0);
1950 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1951 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1952 WREG32(CP_CE_UCODE_ADDR, 0);
1953
1954 /* ME */
1955 fw_data = (const __be32 *)rdev->me_fw->data;
1956 WREG32(CP_ME_RAM_WADDR, 0);
1957 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1958 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1959 WREG32(CP_ME_RAM_WADDR, 0);
1960
1961 WREG32(CP_PFP_UCODE_ADDR, 0);
1962 WREG32(CP_CE_UCODE_ADDR, 0);
1963 WREG32(CP_ME_RAM_WADDR, 0);
1964 WREG32(CP_ME_RAM_RADDR, 0);
1965 return 0;
1966}
1967
1968/**
1969 * cik_cp_gfx_start - start the gfx ring
1970 *
1971 * @rdev: radeon_device pointer
1972 *
1973 * Enables the ring and loads the clear state context and other
1974 * packets required to init the ring.
1975 * Returns 0 for success, error for failure.
1976 */
1977static int cik_cp_gfx_start(struct radeon_device *rdev)
1978{
1979 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1980 int r, i;
1981
1982 /* init the CP */
1983 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1984 WREG32(CP_ENDIAN_SWAP, 0);
1985 WREG32(CP_DEVICE_ID, 1);
1986
1987 cik_cp_gfx_enable(rdev, true);
1988
1989 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1990 if (r) {
1991 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1992 return r;
1993 }
1994
1995 /* init the CE partitions. CE only used for gfx on CIK */
1996 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1997 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1998 radeon_ring_write(ring, 0xc000);
1999 radeon_ring_write(ring, 0xc000);
2000
2001 /* setup clear context state */
2002 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2003 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2004
2005 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2006 radeon_ring_write(ring, 0x80000000);
2007 radeon_ring_write(ring, 0x80000000);
2008
2009 for (i = 0; i < cik_default_size; i++)
2010 radeon_ring_write(ring, cik_default_state[i]);
2011
2012 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2013 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2014
2015 /* set clear context state */
2016 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2017 radeon_ring_write(ring, 0);
2018
2019 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2020 radeon_ring_write(ring, 0x00000316);
2021 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2022 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2023
2024 radeon_ring_unlock_commit(rdev, ring);
2025
2026 return 0;
2027}
2028
2029/**
2030 * cik_cp_gfx_fini - stop the gfx ring
2031 *
2032 * @rdev: radeon_device pointer
2033 *
2034 * Stop the gfx ring and tear down the driver ring
2035 * info.
2036 */
2037static void cik_cp_gfx_fini(struct radeon_device *rdev)
2038{
2039 cik_cp_gfx_enable(rdev, false);
2040 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2041}
2042
2043/**
2044 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2045 *
2046 * @rdev: radeon_device pointer
2047 *
2048 * Program the location and size of the gfx ring buffer
2049 * and test it to make sure it's working.
2050 * Returns 0 for success, error for failure.
2051 */
2052static int cik_cp_gfx_resume(struct radeon_device *rdev)
2053{
2054 struct radeon_ring *ring;
2055 u32 tmp;
2056 u32 rb_bufsz;
2057 u64 rb_addr;
2058 int r;
2059
2060 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2061 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2062
2063 /* Set the write pointer delay */
2064 WREG32(CP_RB_WPTR_DELAY, 0);
2065
2066 /* set the RB to use vmid 0 */
2067 WREG32(CP_RB_VMID, 0);
2068
2069 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2070
2071 /* ring 0 - compute and gfx */
2072 /* Set ring buffer size */
2073 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2074 rb_bufsz = drm_order(ring->ring_size / 8);
2075 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2076#ifdef __BIG_ENDIAN
2077 tmp |= BUF_SWAP_32BIT;
2078#endif
2079 WREG32(CP_RB0_CNTL, tmp);
2080
2081 /* Initialize the ring buffer's read and write pointers */
2082 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2083 ring->wptr = 0;
2084 WREG32(CP_RB0_WPTR, ring->wptr);
2085
2086 /* set the wb address wether it's enabled or not */
2087 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2088 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2089
2090 /* scratch register shadowing is no longer supported */
2091 WREG32(SCRATCH_UMSK, 0);
2092
2093 if (!rdev->wb.enabled)
2094 tmp |= RB_NO_UPDATE;
2095
2096 mdelay(1);
2097 WREG32(CP_RB0_CNTL, tmp);
2098
2099 rb_addr = ring->gpu_addr >> 8;
2100 WREG32(CP_RB0_BASE, rb_addr);
2101 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2102
2103 ring->rptr = RREG32(CP_RB0_RPTR);
2104
2105 /* start the ring */
2106 cik_cp_gfx_start(rdev);
2107 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2108 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2109 if (r) {
2110 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2111 return r;
2112 }
2113 return 0;
2114}
2115
963e81f9
AD
2116u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2117 struct radeon_ring *ring)
2118{
2119 u32 rptr;
2120
2121
2122
2123 if (rdev->wb.enabled) {
2124 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2125 } else {
2126 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2127 rptr = RREG32(CP_HQD_PQ_RPTR);
2128 cik_srbm_select(rdev, 0, 0, 0, 0);
2129 }
2130 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2131
2132 return rptr;
2133}
2134
2135u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2136 struct radeon_ring *ring)
2137{
2138 u32 wptr;
2139
2140 if (rdev->wb.enabled) {
2141 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2142 } else {
2143 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2144 wptr = RREG32(CP_HQD_PQ_WPTR);
2145 cik_srbm_select(rdev, 0, 0, 0, 0);
2146 }
2147 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2148
2149 return wptr;
2150}
2151
2152void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2153 struct radeon_ring *ring)
2154{
2155 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2156
2157 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2158 WDOORBELL32(ring->doorbell_offset, wptr);
2159}
2160
841cf442
AD
2161/**
2162 * cik_cp_compute_enable - enable/disable the compute CP MEs
2163 *
2164 * @rdev: radeon_device pointer
2165 * @enable: enable or disable the MEs
2166 *
2167 * Halts or unhalts the compute MEs.
2168 */
2169static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2170{
2171 if (enable)
2172 WREG32(CP_MEC_CNTL, 0);
2173 else
2174 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2175 udelay(50);
2176}
2177
2178/**
2179 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2180 *
2181 * @rdev: radeon_device pointer
2182 *
2183 * Loads the compute MEC1&2 ucode.
2184 * Returns 0 for success, -EINVAL if the ucode is not available.
2185 */
2186static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2187{
2188 const __be32 *fw_data;
2189 int i;
2190
2191 if (!rdev->mec_fw)
2192 return -EINVAL;
2193
2194 cik_cp_compute_enable(rdev, false);
2195
2196 /* MEC1 */
2197 fw_data = (const __be32 *)rdev->mec_fw->data;
2198 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2199 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2200 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2201 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2202
2203 if (rdev->family == CHIP_KAVERI) {
2204 /* MEC2 */
2205 fw_data = (const __be32 *)rdev->mec_fw->data;
2206 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2207 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2208 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2209 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2210 }
2211
2212 return 0;
2213}
2214
2215/**
2216 * cik_cp_compute_start - start the compute queues
2217 *
2218 * @rdev: radeon_device pointer
2219 *
2220 * Enable the compute queues.
2221 * Returns 0 for success, error for failure.
2222 */
2223static int cik_cp_compute_start(struct radeon_device *rdev)
2224{
963e81f9
AD
2225 cik_cp_compute_enable(rdev, true);
2226
841cf442
AD
2227 return 0;
2228}
2229
2230/**
2231 * cik_cp_compute_fini - stop the compute queues
2232 *
2233 * @rdev: radeon_device pointer
2234 *
2235 * Stop the compute queues and tear down the driver queue
2236 * info.
2237 */
2238static void cik_cp_compute_fini(struct radeon_device *rdev)
2239{
963e81f9
AD
2240 int i, idx, r;
2241
841cf442 2242 cik_cp_compute_enable(rdev, false);
963e81f9
AD
2243
2244 for (i = 0; i < 2; i++) {
2245 if (i == 0)
2246 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2247 else
2248 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2249
2250 if (rdev->ring[idx].mqd_obj) {
2251 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2252 if (unlikely(r != 0))
2253 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2254
2255 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2256 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2257
2258 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2259 rdev->ring[idx].mqd_obj = NULL;
2260 }
2261 }
841cf442
AD
2262}
2263
963e81f9
AD
2264static void cik_mec_fini(struct radeon_device *rdev)
2265{
2266 int r;
2267
2268 if (rdev->mec.hpd_eop_obj) {
2269 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2270 if (unlikely(r != 0))
2271 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2272 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2273 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2274
2275 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2276 rdev->mec.hpd_eop_obj = NULL;
2277 }
2278}
2279
2280#define MEC_HPD_SIZE 2048
2281
2282static int cik_mec_init(struct radeon_device *rdev)
2283{
2284 int r;
2285 u32 *hpd;
2286
2287 /*
2288 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2289 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2290 */
2291 if (rdev->family == CHIP_KAVERI)
2292 rdev->mec.num_mec = 2;
2293 else
2294 rdev->mec.num_mec = 1;
2295 rdev->mec.num_pipe = 4;
2296 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2297
2298 if (rdev->mec.hpd_eop_obj == NULL) {
2299 r = radeon_bo_create(rdev,
2300 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2301 PAGE_SIZE, true,
2302 RADEON_GEM_DOMAIN_GTT, NULL,
2303 &rdev->mec.hpd_eop_obj);
2304 if (r) {
2305 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2306 return r;
2307 }
2308 }
2309
2310 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2311 if (unlikely(r != 0)) {
2312 cik_mec_fini(rdev);
2313 return r;
2314 }
2315 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2316 &rdev->mec.hpd_eop_gpu_addr);
2317 if (r) {
2318 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2319 cik_mec_fini(rdev);
2320 return r;
2321 }
2322 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2323 if (r) {
2324 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2325 cik_mec_fini(rdev);
2326 return r;
2327 }
2328
2329 /* clear memory. Not sure if this is required or not */
2330 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2331
2332 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2333 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2334
2335 return 0;
2336}
2337
2338struct hqd_registers
2339{
2340 u32 cp_mqd_base_addr;
2341 u32 cp_mqd_base_addr_hi;
2342 u32 cp_hqd_active;
2343 u32 cp_hqd_vmid;
2344 u32 cp_hqd_persistent_state;
2345 u32 cp_hqd_pipe_priority;
2346 u32 cp_hqd_queue_priority;
2347 u32 cp_hqd_quantum;
2348 u32 cp_hqd_pq_base;
2349 u32 cp_hqd_pq_base_hi;
2350 u32 cp_hqd_pq_rptr;
2351 u32 cp_hqd_pq_rptr_report_addr;
2352 u32 cp_hqd_pq_rptr_report_addr_hi;
2353 u32 cp_hqd_pq_wptr_poll_addr;
2354 u32 cp_hqd_pq_wptr_poll_addr_hi;
2355 u32 cp_hqd_pq_doorbell_control;
2356 u32 cp_hqd_pq_wptr;
2357 u32 cp_hqd_pq_control;
2358 u32 cp_hqd_ib_base_addr;
2359 u32 cp_hqd_ib_base_addr_hi;
2360 u32 cp_hqd_ib_rptr;
2361 u32 cp_hqd_ib_control;
2362 u32 cp_hqd_iq_timer;
2363 u32 cp_hqd_iq_rptr;
2364 u32 cp_hqd_dequeue_request;
2365 u32 cp_hqd_dma_offload;
2366 u32 cp_hqd_sema_cmd;
2367 u32 cp_hqd_msg_type;
2368 u32 cp_hqd_atomic0_preop_lo;
2369 u32 cp_hqd_atomic0_preop_hi;
2370 u32 cp_hqd_atomic1_preop_lo;
2371 u32 cp_hqd_atomic1_preop_hi;
2372 u32 cp_hqd_hq_scheduler0;
2373 u32 cp_hqd_hq_scheduler1;
2374 u32 cp_mqd_control;
2375};
2376
2377struct bonaire_mqd
2378{
2379 u32 header;
2380 u32 dispatch_initiator;
2381 u32 dimensions[3];
2382 u32 start_idx[3];
2383 u32 num_threads[3];
2384 u32 pipeline_stat_enable;
2385 u32 perf_counter_enable;
2386 u32 pgm[2];
2387 u32 tba[2];
2388 u32 tma[2];
2389 u32 pgm_rsrc[2];
2390 u32 vmid;
2391 u32 resource_limits;
2392 u32 static_thread_mgmt01[2];
2393 u32 tmp_ring_size;
2394 u32 static_thread_mgmt23[2];
2395 u32 restart[3];
2396 u32 thread_trace_enable;
2397 u32 reserved1;
2398 u32 user_data[16];
2399 u32 vgtcs_invoke_count[2];
2400 struct hqd_registers queue_state;
2401 u32 dequeue_cntr;
2402 u32 interrupt_queue[64];
2403};
2404
841cf442
AD
2405/**
2406 * cik_cp_compute_resume - setup the compute queue registers
2407 *
2408 * @rdev: radeon_device pointer
2409 *
2410 * Program the compute queues and test them to make sure they
2411 * are working.
2412 * Returns 0 for success, error for failure.
2413 */
2414static int cik_cp_compute_resume(struct radeon_device *rdev)
2415{
963e81f9
AD
2416 int r, i, idx;
2417 u32 tmp;
2418 bool use_doorbell = true;
2419 u64 hqd_gpu_addr;
2420 u64 mqd_gpu_addr;
2421 u64 eop_gpu_addr;
2422 u64 wb_gpu_addr;
2423 u32 *buf;
2424 struct bonaire_mqd *mqd;
841cf442 2425
841cf442
AD
2426 r = cik_cp_compute_start(rdev);
2427 if (r)
2428 return r;
963e81f9
AD
2429
2430 /* fix up chicken bits */
2431 tmp = RREG32(CP_CPF_DEBUG);
2432 tmp |= (1 << 23);
2433 WREG32(CP_CPF_DEBUG, tmp);
2434
2435 /* init the pipes */
2436 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2437 int me = (i < 4) ? 1 : 2;
2438 int pipe = (i < 4) ? i : (i - 4);
2439
2440 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2441
2442 cik_srbm_select(rdev, me, pipe, 0, 0);
2443
2444 /* write the EOP addr */
2445 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2446 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2447
2448 /* set the VMID assigned */
2449 WREG32(CP_HPD_EOP_VMID, 0);
2450
2451 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2452 tmp = RREG32(CP_HPD_EOP_CONTROL);
2453 tmp &= ~EOP_SIZE_MASK;
2454 tmp |= drm_order(MEC_HPD_SIZE / 8);
2455 WREG32(CP_HPD_EOP_CONTROL, tmp);
2456 }
2457 cik_srbm_select(rdev, 0, 0, 0, 0);
2458
2459 /* init the queues. Just two for now. */
2460 for (i = 0; i < 2; i++) {
2461 if (i == 0)
2462 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2463 else
2464 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2465
2466 if (rdev->ring[idx].mqd_obj == NULL) {
2467 r = radeon_bo_create(rdev,
2468 sizeof(struct bonaire_mqd),
2469 PAGE_SIZE, true,
2470 RADEON_GEM_DOMAIN_GTT, NULL,
2471 &rdev->ring[idx].mqd_obj);
2472 if (r) {
2473 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2474 return r;
2475 }
2476 }
2477
2478 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2479 if (unlikely(r != 0)) {
2480 cik_cp_compute_fini(rdev);
2481 return r;
2482 }
2483 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2484 &mqd_gpu_addr);
2485 if (r) {
2486 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2487 cik_cp_compute_fini(rdev);
2488 return r;
2489 }
2490 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2491 if (r) {
2492 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2493 cik_cp_compute_fini(rdev);
2494 return r;
2495 }
2496
2497 /* doorbell offset */
2498 rdev->ring[idx].doorbell_offset =
2499 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2500
2501 /* init the mqd struct */
2502 memset(buf, 0, sizeof(struct bonaire_mqd));
2503
2504 mqd = (struct bonaire_mqd *)buf;
2505 mqd->header = 0xC0310800;
2506 mqd->static_thread_mgmt01[0] = 0xffffffff;
2507 mqd->static_thread_mgmt01[1] = 0xffffffff;
2508 mqd->static_thread_mgmt23[0] = 0xffffffff;
2509 mqd->static_thread_mgmt23[1] = 0xffffffff;
2510
2511 cik_srbm_select(rdev, rdev->ring[idx].me,
2512 rdev->ring[idx].pipe,
2513 rdev->ring[idx].queue, 0);
2514
2515 /* disable wptr polling */
2516 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2517 tmp &= ~WPTR_POLL_EN;
2518 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2519
2520 /* enable doorbell? */
2521 mqd->queue_state.cp_hqd_pq_doorbell_control =
2522 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2523 if (use_doorbell)
2524 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2525 else
2526 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2527 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2528 mqd->queue_state.cp_hqd_pq_doorbell_control);
2529
2530 /* disable the queue if it's active */
2531 mqd->queue_state.cp_hqd_dequeue_request = 0;
2532 mqd->queue_state.cp_hqd_pq_rptr = 0;
2533 mqd->queue_state.cp_hqd_pq_wptr= 0;
2534 if (RREG32(CP_HQD_ACTIVE) & 1) {
2535 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2536 for (i = 0; i < rdev->usec_timeout; i++) {
2537 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2538 break;
2539 udelay(1);
2540 }
2541 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
2542 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
2543 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2544 }
2545
2546 /* set the pointer to the MQD */
2547 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
2548 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2549 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
2550 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
2551 /* set MQD vmid to 0 */
2552 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
2553 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
2554 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
2555
2556 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2557 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
2558 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
2559 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2560 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
2561 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
2562
2563 /* set up the HQD, this is similar to CP_RB0_CNTL */
2564 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
2565 mqd->queue_state.cp_hqd_pq_control &=
2566 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
2567
2568 mqd->queue_state.cp_hqd_pq_control |=
2569 drm_order(rdev->ring[idx].ring_size / 8);
2570 mqd->queue_state.cp_hqd_pq_control |=
2571 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
2572#ifdef __BIG_ENDIAN
2573 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
2574#endif
2575 mqd->queue_state.cp_hqd_pq_control &=
2576 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
2577 mqd->queue_state.cp_hqd_pq_control |=
2578 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
2579 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
2580
2581 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
2582 if (i == 0)
2583 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
2584 else
2585 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
2586 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
2587 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2588 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
2589 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
2590 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
2591
2592 /* set the wb address wether it's enabled or not */
2593 if (i == 0)
2594 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
2595 else
2596 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
2597 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
2598 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
2599 upper_32_bits(wb_gpu_addr) & 0xffff;
2600 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
2601 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
2602 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2603 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
2604
2605 /* enable the doorbell if requested */
2606 if (use_doorbell) {
2607 mqd->queue_state.cp_hqd_pq_doorbell_control =
2608 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2609 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
2610 mqd->queue_state.cp_hqd_pq_doorbell_control |=
2611 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
2612 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2613 mqd->queue_state.cp_hqd_pq_doorbell_control &=
2614 ~(DOORBELL_SOURCE | DOORBELL_HIT);
2615
2616 } else {
2617 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
2618 }
2619 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2620 mqd->queue_state.cp_hqd_pq_doorbell_control);
2621
2622 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2623 rdev->ring[idx].wptr = 0;
2624 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
2625 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2626 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
2627 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
2628
2629 /* set the vmid for the queue */
2630 mqd->queue_state.cp_hqd_vmid = 0;
2631 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
2632
2633 /* activate the queue */
2634 mqd->queue_state.cp_hqd_active = 1;
2635 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
2636
2637 cik_srbm_select(rdev, 0, 0, 0, 0);
2638
2639 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
2640 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2641
2642 rdev->ring[idx].ready = true;
2643 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
2644 if (r)
2645 rdev->ring[idx].ready = false;
2646 }
2647
841cf442
AD
2648 return 0;
2649}
2650
841cf442
AD
2651static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2652{
2653 cik_cp_gfx_enable(rdev, enable);
2654 cik_cp_compute_enable(rdev, enable);
2655}
2656
841cf442
AD
2657static int cik_cp_load_microcode(struct radeon_device *rdev)
2658{
2659 int r;
2660
2661 r = cik_cp_gfx_load_microcode(rdev);
2662 if (r)
2663 return r;
2664 r = cik_cp_compute_load_microcode(rdev);
2665 if (r)
2666 return r;
2667
2668 return 0;
2669}
2670
841cf442
AD
2671static void cik_cp_fini(struct radeon_device *rdev)
2672{
2673 cik_cp_gfx_fini(rdev);
2674 cik_cp_compute_fini(rdev);
2675}
2676
841cf442
AD
2677static int cik_cp_resume(struct radeon_device *rdev)
2678{
2679 int r;
2680
2681 /* Reset all cp blocks */
2682 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2683 RREG32(GRBM_SOFT_RESET);
2684 mdelay(15);
2685 WREG32(GRBM_SOFT_RESET, 0);
2686 RREG32(GRBM_SOFT_RESET);
2687
2688 r = cik_cp_load_microcode(rdev);
2689 if (r)
2690 return r;
2691
2692 r = cik_cp_gfx_resume(rdev);
2693 if (r)
2694 return r;
2695 r = cik_cp_compute_resume(rdev);
2696 if (r)
2697 return r;
2698
2699 return 0;
2700}
2701
21a93e13
AD
2702/*
2703 * sDMA - System DMA
2704 * Starting with CIK, the GPU has new asynchronous
2705 * DMA engines. These engines are used for compute
2706 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2707 * and each one supports 1 ring buffer used for gfx
2708 * and 2 queues used for compute.
2709 *
2710 * The programming model is very similar to the CP
2711 * (ring buffer, IBs, etc.), but sDMA has it's own
2712 * packet format that is different from the PM4 format
2713 * used by the CP. sDMA supports copying data, writing
2714 * embedded data, solid fills, and a number of other
2715 * things. It also has support for tiling/detiling of
2716 * buffers.
2717 */
2718/**
2719 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2720 *
2721 * @rdev: radeon_device pointer
2722 * @ib: IB object to schedule
2723 *
2724 * Schedule an IB in the DMA ring (CIK).
2725 */
2726void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2727 struct radeon_ib *ib)
2728{
2729 struct radeon_ring *ring = &rdev->ring[ib->ring];
2730 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2731
2732 if (rdev->wb.enabled) {
2733 u32 next_rptr = ring->wptr + 5;
2734 while ((next_rptr & 7) != 4)
2735 next_rptr++;
2736 next_rptr += 4;
2737 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2738 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2739 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2740 radeon_ring_write(ring, 1); /* number of DWs to follow */
2741 radeon_ring_write(ring, next_rptr);
2742 }
2743
2744 /* IB packet must end on a 8 DW boundary */
2745 while ((ring->wptr & 7) != 4)
2746 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2747 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2748 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2749 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2750 radeon_ring_write(ring, ib->length_dw);
2751
2752}
2753
2754/**
2755 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2756 *
2757 * @rdev: radeon_device pointer
2758 * @fence: radeon fence object
2759 *
2760 * Add a DMA fence packet to the ring to write
2761 * the fence seq number and DMA trap packet to generate
2762 * an interrupt if needed (CIK).
2763 */
2764void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2765 struct radeon_fence *fence)
2766{
2767 struct radeon_ring *ring = &rdev->ring[fence->ring];
2768 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2769 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2770 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2771 u32 ref_and_mask;
2772
2773 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2774 ref_and_mask = SDMA0;
2775 else
2776 ref_and_mask = SDMA1;
2777
2778 /* write the fence */
2779 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2780 radeon_ring_write(ring, addr & 0xffffffff);
2781 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2782 radeon_ring_write(ring, fence->seq);
2783 /* generate an interrupt */
2784 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2785 /* flush HDP */
2786 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2787 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2788 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2789 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2790 radeon_ring_write(ring, ref_and_mask); /* MASK */
2791 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2792}
2793
2794/**
2795 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2796 *
2797 * @rdev: radeon_device pointer
2798 * @ring: radeon_ring structure holding ring information
2799 * @semaphore: radeon semaphore object
2800 * @emit_wait: wait or signal semaphore
2801 *
2802 * Add a DMA semaphore packet to the ring wait on or signal
2803 * other rings (CIK).
2804 */
2805void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2806 struct radeon_ring *ring,
2807 struct radeon_semaphore *semaphore,
2808 bool emit_wait)
2809{
2810 u64 addr = semaphore->gpu_addr;
2811 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2812
2813 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2814 radeon_ring_write(ring, addr & 0xfffffff8);
2815 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2816}
2817
2818/**
2819 * cik_sdma_gfx_stop - stop the gfx async dma engines
2820 *
2821 * @rdev: radeon_device pointer
2822 *
2823 * Stop the gfx async dma ring buffers (CIK).
2824 */
2825static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2826{
2827 u32 rb_cntl, reg_offset;
2828 int i;
2829
2830 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2831
2832 for (i = 0; i < 2; i++) {
2833 if (i == 0)
2834 reg_offset = SDMA0_REGISTER_OFFSET;
2835 else
2836 reg_offset = SDMA1_REGISTER_OFFSET;
2837 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2838 rb_cntl &= ~SDMA_RB_ENABLE;
2839 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2840 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2841 }
2842}
2843
2844/**
2845 * cik_sdma_rlc_stop - stop the compute async dma engines
2846 *
2847 * @rdev: radeon_device pointer
2848 *
2849 * Stop the compute async dma queues (CIK).
2850 */
2851static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2852{
2853 /* XXX todo */
2854}
2855
2856/**
2857 * cik_sdma_enable - stop the async dma engines
2858 *
2859 * @rdev: radeon_device pointer
2860 * @enable: enable/disable the DMA MEs.
2861 *
2862 * Halt or unhalt the async dma engines (CIK).
2863 */
2864static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2865{
2866 u32 me_cntl, reg_offset;
2867 int i;
2868
2869 for (i = 0; i < 2; i++) {
2870 if (i == 0)
2871 reg_offset = SDMA0_REGISTER_OFFSET;
2872 else
2873 reg_offset = SDMA1_REGISTER_OFFSET;
2874 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2875 if (enable)
2876 me_cntl &= ~SDMA_HALT;
2877 else
2878 me_cntl |= SDMA_HALT;
2879 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2880 }
2881}
2882
2883/**
2884 * cik_sdma_gfx_resume - setup and start the async dma engines
2885 *
2886 * @rdev: radeon_device pointer
2887 *
2888 * Set up the gfx DMA ring buffers and enable them (CIK).
2889 * Returns 0 for success, error for failure.
2890 */
2891static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2892{
2893 struct radeon_ring *ring;
2894 u32 rb_cntl, ib_cntl;
2895 u32 rb_bufsz;
2896 u32 reg_offset, wb_offset;
2897 int i, r;
2898
2899 for (i = 0; i < 2; i++) {
2900 if (i == 0) {
2901 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2902 reg_offset = SDMA0_REGISTER_OFFSET;
2903 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2904 } else {
2905 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2906 reg_offset = SDMA1_REGISTER_OFFSET;
2907 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2908 }
2909
2910 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2911 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2912
2913 /* Set ring buffer size in dwords */
2914 rb_bufsz = drm_order(ring->ring_size / 4);
2915 rb_cntl = rb_bufsz << 1;
2916#ifdef __BIG_ENDIAN
2917 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2918#endif
2919 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2920
2921 /* Initialize the ring buffer's read and write pointers */
2922 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2923 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2924
2925 /* set the wb address whether it's enabled or not */
2926 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2927 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2928 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2929 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2930
2931 if (rdev->wb.enabled)
2932 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2933
2934 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2935 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2936
2937 ring->wptr = 0;
2938 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2939
2940 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2941
2942 /* enable DMA RB */
2943 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2944
2945 ib_cntl = SDMA_IB_ENABLE;
2946#ifdef __BIG_ENDIAN
2947 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2948#endif
2949 /* enable DMA IBs */
2950 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2951
2952 ring->ready = true;
2953
2954 r = radeon_ring_test(rdev, ring->idx, ring);
2955 if (r) {
2956 ring->ready = false;
2957 return r;
2958 }
2959 }
2960
2961 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2962
2963 return 0;
2964}
2965
2966/**
2967 * cik_sdma_rlc_resume - setup and start the async dma engines
2968 *
2969 * @rdev: radeon_device pointer
2970 *
2971 * Set up the compute DMA queues and enable them (CIK).
2972 * Returns 0 for success, error for failure.
2973 */
2974static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2975{
2976 /* XXX todo */
2977 return 0;
2978}
2979
2980/**
2981 * cik_sdma_load_microcode - load the sDMA ME ucode
2982 *
2983 * @rdev: radeon_device pointer
2984 *
2985 * Loads the sDMA0/1 ucode.
2986 * Returns 0 for success, -EINVAL if the ucode is not available.
2987 */
2988static int cik_sdma_load_microcode(struct radeon_device *rdev)
2989{
2990 const __be32 *fw_data;
2991 int i;
2992
2993 if (!rdev->sdma_fw)
2994 return -EINVAL;
2995
2996 /* stop the gfx rings and rlc compute queues */
2997 cik_sdma_gfx_stop(rdev);
2998 cik_sdma_rlc_stop(rdev);
2999
3000 /* halt the MEs */
3001 cik_sdma_enable(rdev, false);
3002
3003 /* sdma0 */
3004 fw_data = (const __be32 *)rdev->sdma_fw->data;
3005 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3006 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3007 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3008 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3009
3010 /* sdma1 */
3011 fw_data = (const __be32 *)rdev->sdma_fw->data;
3012 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3013 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3014 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3015 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3016
3017 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3018 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3019 return 0;
3020}
3021
3022/**
3023 * cik_sdma_resume - setup and start the async dma engines
3024 *
3025 * @rdev: radeon_device pointer
3026 *
3027 * Set up the DMA engines and enable them (CIK).
3028 * Returns 0 for success, error for failure.
3029 */
3030static int cik_sdma_resume(struct radeon_device *rdev)
3031{
3032 int r;
3033
3034 /* Reset dma */
3035 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3036 RREG32(SRBM_SOFT_RESET);
3037 udelay(50);
3038 WREG32(SRBM_SOFT_RESET, 0);
3039 RREG32(SRBM_SOFT_RESET);
3040
3041 r = cik_sdma_load_microcode(rdev);
3042 if (r)
3043 return r;
3044
3045 /* unhalt the MEs */
3046 cik_sdma_enable(rdev, true);
3047
3048 /* start the gfx rings and rlc compute queues */
3049 r = cik_sdma_gfx_resume(rdev);
3050 if (r)
3051 return r;
3052 r = cik_sdma_rlc_resume(rdev);
3053 if (r)
3054 return r;
3055
3056 return 0;
3057}
3058
3059/**
3060 * cik_sdma_fini - tear down the async dma engines
3061 *
3062 * @rdev: radeon_device pointer
3063 *
3064 * Stop the async dma engines and free the rings (CIK).
3065 */
3066static void cik_sdma_fini(struct radeon_device *rdev)
3067{
3068 /* stop the gfx rings and rlc compute queues */
3069 cik_sdma_gfx_stop(rdev);
3070 cik_sdma_rlc_stop(rdev);
3071 /* halt the MEs */
3072 cik_sdma_enable(rdev, false);
3073 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3074 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3075 /* XXX - compute dma queue tear down */
3076}
3077
3078/**
3079 * cik_copy_dma - copy pages using the DMA engine
3080 *
3081 * @rdev: radeon_device pointer
3082 * @src_offset: src GPU address
3083 * @dst_offset: dst GPU address
3084 * @num_gpu_pages: number of GPU pages to xfer
3085 * @fence: radeon fence object
3086 *
3087 * Copy GPU paging using the DMA engine (CIK).
3088 * Used by the radeon ttm implementation to move pages if
3089 * registered as the asic copy callback.
3090 */
3091int cik_copy_dma(struct radeon_device *rdev,
3092 uint64_t src_offset, uint64_t dst_offset,
3093 unsigned num_gpu_pages,
3094 struct radeon_fence **fence)
3095{
3096 struct radeon_semaphore *sem = NULL;
3097 int ring_index = rdev->asic->copy.dma_ring_index;
3098 struct radeon_ring *ring = &rdev->ring[ring_index];
3099 u32 size_in_bytes, cur_size_in_bytes;
3100 int i, num_loops;
3101 int r = 0;
3102
3103 r = radeon_semaphore_create(rdev, &sem);
3104 if (r) {
3105 DRM_ERROR("radeon: moving bo (%d).\n", r);
3106 return r;
3107 }
3108
3109 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3110 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3111 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3112 if (r) {
3113 DRM_ERROR("radeon: moving bo (%d).\n", r);
3114 radeon_semaphore_free(rdev, &sem, NULL);
3115 return r;
3116 }
3117
3118 if (radeon_fence_need_sync(*fence, ring->idx)) {
3119 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3120 ring->idx);
3121 radeon_fence_note_sync(*fence, ring->idx);
3122 } else {
3123 radeon_semaphore_free(rdev, &sem, NULL);
3124 }
3125
3126 for (i = 0; i < num_loops; i++) {
3127 cur_size_in_bytes = size_in_bytes;
3128 if (cur_size_in_bytes > 0x1fffff)
3129 cur_size_in_bytes = 0x1fffff;
3130 size_in_bytes -= cur_size_in_bytes;
3131 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3132 radeon_ring_write(ring, cur_size_in_bytes);
3133 radeon_ring_write(ring, 0); /* src/dst endian swap */
3134 radeon_ring_write(ring, src_offset & 0xffffffff);
3135 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3136 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3137 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3138 src_offset += cur_size_in_bytes;
3139 dst_offset += cur_size_in_bytes;
3140 }
3141
3142 r = radeon_fence_emit(rdev, fence, ring->idx);
3143 if (r) {
3144 radeon_ring_unlock_undo(rdev, ring);
3145 return r;
3146 }
3147
3148 radeon_ring_unlock_commit(rdev, ring);
3149 radeon_semaphore_free(rdev, &sem, *fence);
3150
3151 return r;
3152}
3153
3154/**
3155 * cik_sdma_ring_test - simple async dma engine test
3156 *
3157 * @rdev: radeon_device pointer
3158 * @ring: radeon_ring structure holding ring information
3159 *
3160 * Test the DMA engine by writing using it to write an
3161 * value to memory. (CIK).
3162 * Returns 0 for success, error for failure.
3163 */
3164int cik_sdma_ring_test(struct radeon_device *rdev,
3165 struct radeon_ring *ring)
3166{
3167 unsigned i;
3168 int r;
3169 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3170 u32 tmp;
3171
3172 if (!ptr) {
3173 DRM_ERROR("invalid vram scratch pointer\n");
3174 return -EINVAL;
3175 }
3176
3177 tmp = 0xCAFEDEAD;
3178 writel(tmp, ptr);
3179
3180 r = radeon_ring_lock(rdev, ring, 4);
3181 if (r) {
3182 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3183 return r;
3184 }
3185 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3186 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3187 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3188 radeon_ring_write(ring, 1); /* number of DWs to follow */
3189 radeon_ring_write(ring, 0xDEADBEEF);
3190 radeon_ring_unlock_commit(rdev, ring);
3191
3192 for (i = 0; i < rdev->usec_timeout; i++) {
3193 tmp = readl(ptr);
3194 if (tmp == 0xDEADBEEF)
3195 break;
3196 DRM_UDELAY(1);
3197 }
3198
3199 if (i < rdev->usec_timeout) {
3200 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3201 } else {
3202 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3203 ring->idx, tmp);
3204 r = -EINVAL;
3205 }
3206 return r;
3207}
3208
3209/**
3210 * cik_sdma_ib_test - test an IB on the DMA engine
3211 *
3212 * @rdev: radeon_device pointer
3213 * @ring: radeon_ring structure holding ring information
3214 *
3215 * Test a simple IB in the DMA ring (CIK).
3216 * Returns 0 on success, error on failure.
3217 */
3218int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3219{
3220 struct radeon_ib ib;
3221 unsigned i;
3222 int r;
3223 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3224 u32 tmp = 0;
3225
3226 if (!ptr) {
3227 DRM_ERROR("invalid vram scratch pointer\n");
3228 return -EINVAL;
3229 }
3230
3231 tmp = 0xCAFEDEAD;
3232 writel(tmp, ptr);
3233
3234 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3235 if (r) {
3236 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3237 return r;
3238 }
3239
3240 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3241 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3242 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3243 ib.ptr[3] = 1;
3244 ib.ptr[4] = 0xDEADBEEF;
3245 ib.length_dw = 5;
3246
3247 r = radeon_ib_schedule(rdev, &ib, NULL);
3248 if (r) {
3249 radeon_ib_free(rdev, &ib);
3250 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3251 return r;
3252 }
3253 r = radeon_fence_wait(ib.fence, false);
3254 if (r) {
3255 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3256 return r;
3257 }
3258 for (i = 0; i < rdev->usec_timeout; i++) {
3259 tmp = readl(ptr);
3260 if (tmp == 0xDEADBEEF)
3261 break;
3262 DRM_UDELAY(1);
3263 }
3264 if (i < rdev->usec_timeout) {
3265 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3266 } else {
3267 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3268 r = -EINVAL;
3269 }
3270 radeon_ib_free(rdev, &ib);
3271 return r;
3272}
3273
6f2043ce 3274
cc066715 3275static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 3276{
6f2043ce
AD
3277 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3278 RREG32(GRBM_STATUS));
3279 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3280 RREG32(GRBM_STATUS2));
3281 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3282 RREG32(GRBM_STATUS_SE0));
3283 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3284 RREG32(GRBM_STATUS_SE1));
3285 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3286 RREG32(GRBM_STATUS_SE2));
3287 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3288 RREG32(GRBM_STATUS_SE3));
3289 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3290 RREG32(SRBM_STATUS));
3291 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3292 RREG32(SRBM_STATUS2));
cc066715
AD
3293 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3294 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3295 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3296 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
963e81f9
AD
3297 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3298 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3299 RREG32(CP_STALLED_STAT1));
3300 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3301 RREG32(CP_STALLED_STAT2));
3302 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3303 RREG32(CP_STALLED_STAT3));
3304 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3305 RREG32(CP_CPF_BUSY_STAT));
3306 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3307 RREG32(CP_CPF_STALLED_STAT1));
3308 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3309 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3310 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3311 RREG32(CP_CPC_STALLED_STAT1));
3312 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
cc066715 3313}
6f2043ce 3314
cc066715
AD
3315/**
3316 * cik_gpu_check_soft_reset - check which blocks are busy
3317 *
3318 * @rdev: radeon_device pointer
3319 *
3320 * Check which blocks are busy and return the relevant reset
3321 * mask to be used by cik_gpu_soft_reset().
3322 * Returns a mask of the blocks to be reset.
3323 */
3324static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3325{
3326 u32 reset_mask = 0;
3327 u32 tmp;
6f2043ce 3328
cc066715
AD
3329 /* GRBM_STATUS */
3330 tmp = RREG32(GRBM_STATUS);
3331 if (tmp & (PA_BUSY | SC_BUSY |
3332 BCI_BUSY | SX_BUSY |
3333 TA_BUSY | VGT_BUSY |
3334 DB_BUSY | CB_BUSY |
3335 GDS_BUSY | SPI_BUSY |
3336 IA_BUSY | IA_BUSY_NO_DMA))
3337 reset_mask |= RADEON_RESET_GFX;
3338
3339 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3340 reset_mask |= RADEON_RESET_CP;
3341
3342 /* GRBM_STATUS2 */
3343 tmp = RREG32(GRBM_STATUS2);
3344 if (tmp & RLC_BUSY)
3345 reset_mask |= RADEON_RESET_RLC;
3346
3347 /* SDMA0_STATUS_REG */
3348 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3349 if (!(tmp & SDMA_IDLE))
3350 reset_mask |= RADEON_RESET_DMA;
3351
3352 /* SDMA1_STATUS_REG */
3353 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3354 if (!(tmp & SDMA_IDLE))
3355 reset_mask |= RADEON_RESET_DMA1;
3356
3357 /* SRBM_STATUS2 */
3358 tmp = RREG32(SRBM_STATUS2);
3359 if (tmp & SDMA_BUSY)
3360 reset_mask |= RADEON_RESET_DMA;
3361
3362 if (tmp & SDMA1_BUSY)
3363 reset_mask |= RADEON_RESET_DMA1;
3364
3365 /* SRBM_STATUS */
3366 tmp = RREG32(SRBM_STATUS);
3367
3368 if (tmp & IH_BUSY)
3369 reset_mask |= RADEON_RESET_IH;
3370
3371 if (tmp & SEM_BUSY)
3372 reset_mask |= RADEON_RESET_SEM;
3373
3374 if (tmp & GRBM_RQ_PENDING)
3375 reset_mask |= RADEON_RESET_GRBM;
3376
3377 if (tmp & VMC_BUSY)
3378 reset_mask |= RADEON_RESET_VMC;
3379
3380 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3381 MCC_BUSY | MCD_BUSY))
3382 reset_mask |= RADEON_RESET_MC;
3383
3384 if (evergreen_is_display_hung(rdev))
3385 reset_mask |= RADEON_RESET_DISPLAY;
3386
3387 /* Skip MC reset as it's mostly likely not hung, just busy */
3388 if (reset_mask & RADEON_RESET_MC) {
3389 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3390 reset_mask &= ~RADEON_RESET_MC;
3391 }
3392
3393 return reset_mask;
6f2043ce
AD
3394}
3395
3396/**
cc066715 3397 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
3398 *
3399 * @rdev: radeon_device pointer
cc066715 3400 * @reset_mask: mask of which blocks to reset
6f2043ce 3401 *
cc066715 3402 * Soft reset the blocks specified in @reset_mask.
6f2043ce 3403 */
cc066715 3404static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
3405{
3406 struct evergreen_mc_save save;
cc066715
AD
3407 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3408 u32 tmp;
3409
3410 if (reset_mask == 0)
3411 return;
3412
3413 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3414
3415 cik_print_gpu_status_regs(rdev);
3416 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3417 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3418 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3419 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3420
3421 /* stop the rlc */
3422 cik_rlc_stop(rdev);
3423
3424 /* Disable GFX parsing/prefetching */
3425 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3426
3427 /* Disable MEC parsing/prefetching */
3428 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3429
3430 if (reset_mask & RADEON_RESET_DMA) {
3431 /* sdma0 */
3432 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3433 tmp |= SDMA_HALT;
3434 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3435 }
3436 if (reset_mask & RADEON_RESET_DMA1) {
3437 /* sdma1 */
3438 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3439 tmp |= SDMA_HALT;
3440 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3441 }
6f2043ce 3442
6f2043ce 3443 evergreen_mc_stop(rdev, &save);
cc066715 3444 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
3445 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3446 }
6f2043ce 3447
cc066715
AD
3448 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3449 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3450
3451 if (reset_mask & RADEON_RESET_CP) {
3452 grbm_soft_reset |= SOFT_RESET_CP;
3453
3454 srbm_soft_reset |= SOFT_RESET_GRBM;
3455 }
3456
3457 if (reset_mask & RADEON_RESET_DMA)
3458 srbm_soft_reset |= SOFT_RESET_SDMA;
3459
3460 if (reset_mask & RADEON_RESET_DMA1)
3461 srbm_soft_reset |= SOFT_RESET_SDMA1;
3462
3463 if (reset_mask & RADEON_RESET_DISPLAY)
3464 srbm_soft_reset |= SOFT_RESET_DC;
3465
3466 if (reset_mask & RADEON_RESET_RLC)
3467 grbm_soft_reset |= SOFT_RESET_RLC;
3468
3469 if (reset_mask & RADEON_RESET_SEM)
3470 srbm_soft_reset |= SOFT_RESET_SEM;
3471
3472 if (reset_mask & RADEON_RESET_IH)
3473 srbm_soft_reset |= SOFT_RESET_IH;
3474
3475 if (reset_mask & RADEON_RESET_GRBM)
3476 srbm_soft_reset |= SOFT_RESET_GRBM;
3477
3478 if (reset_mask & RADEON_RESET_VMC)
3479 srbm_soft_reset |= SOFT_RESET_VMC;
3480
3481 if (!(rdev->flags & RADEON_IS_IGP)) {
3482 if (reset_mask & RADEON_RESET_MC)
3483 srbm_soft_reset |= SOFT_RESET_MC;
3484 }
3485
3486 if (grbm_soft_reset) {
3487 tmp = RREG32(GRBM_SOFT_RESET);
3488 tmp |= grbm_soft_reset;
3489 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3490 WREG32(GRBM_SOFT_RESET, tmp);
3491 tmp = RREG32(GRBM_SOFT_RESET);
3492
3493 udelay(50);
3494
3495 tmp &= ~grbm_soft_reset;
3496 WREG32(GRBM_SOFT_RESET, tmp);
3497 tmp = RREG32(GRBM_SOFT_RESET);
3498 }
3499
3500 if (srbm_soft_reset) {
3501 tmp = RREG32(SRBM_SOFT_RESET);
3502 tmp |= srbm_soft_reset;
3503 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3504 WREG32(SRBM_SOFT_RESET, tmp);
3505 tmp = RREG32(SRBM_SOFT_RESET);
3506
3507 udelay(50);
3508
3509 tmp &= ~srbm_soft_reset;
3510 WREG32(SRBM_SOFT_RESET, tmp);
3511 tmp = RREG32(SRBM_SOFT_RESET);
3512 }
6f2043ce 3513
6f2043ce
AD
3514 /* Wait a little for things to settle down */
3515 udelay(50);
cc066715 3516
6f2043ce 3517 evergreen_mc_resume(rdev, &save);
cc066715
AD
3518 udelay(50);
3519
3520 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
3521}
3522
3523/**
cc066715 3524 * cik_asic_reset - soft reset GPU
6f2043ce
AD
3525 *
3526 * @rdev: radeon_device pointer
3527 *
cc066715
AD
3528 * Look up which blocks are hung and attempt
3529 * to reset them.
6f2043ce
AD
3530 * Returns 0 for success.
3531 */
3532int cik_asic_reset(struct radeon_device *rdev)
3533{
cc066715 3534 u32 reset_mask;
6f2043ce 3535
cc066715
AD
3536 reset_mask = cik_gpu_check_soft_reset(rdev);
3537
3538 if (reset_mask)
3539 r600_set_bios_scratch_engine_hung(rdev, true);
3540
3541 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 3542
cc066715
AD
3543 reset_mask = cik_gpu_check_soft_reset(rdev);
3544
3545 if (!reset_mask)
3546 r600_set_bios_scratch_engine_hung(rdev, false);
3547
3548 return 0;
3549}
3550
3551/**
3552 * cik_gfx_is_lockup - check if the 3D engine is locked up
3553 *
3554 * @rdev: radeon_device pointer
3555 * @ring: radeon_ring structure holding ring information
3556 *
3557 * Check if the 3D engine is locked up (CIK).
3558 * Returns true if the engine is locked, false if not.
3559 */
3560bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3561{
3562 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3563
3564 if (!(reset_mask & (RADEON_RESET_GFX |
3565 RADEON_RESET_COMPUTE |
3566 RADEON_RESET_CP))) {
3567 radeon_ring_lockup_update(ring);
3568 return false;
3569 }
3570 /* force CP activities */
3571 radeon_ring_force_activity(rdev, ring);
3572 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 3573}
1c49165d 3574
21a93e13
AD
3575/**
3576 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3577 *
3578 * @rdev: radeon_device pointer
3579 * @ring: radeon_ring structure holding ring information
3580 *
3581 * Check if the async DMA engine is locked up (CIK).
3582 * Returns true if the engine appears to be locked up, false if not.
3583 */
3584bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3585{
cc066715
AD
3586 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3587 u32 mask;
21a93e13
AD
3588
3589 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 3590 mask = RADEON_RESET_DMA;
21a93e13 3591 else
cc066715
AD
3592 mask = RADEON_RESET_DMA1;
3593
3594 if (!(reset_mask & mask)) {
21a93e13
AD
3595 radeon_ring_lockup_update(ring);
3596 return false;
3597 }
3598 /* force ring activities */
3599 radeon_ring_force_activity(rdev, ring);
3600 return radeon_ring_test_lockup(rdev, ring);
3601}
3602
1c49165d
AD
3603/* MC */
3604/**
3605 * cik_mc_program - program the GPU memory controller
3606 *
3607 * @rdev: radeon_device pointer
3608 *
3609 * Set the location of vram, gart, and AGP in the GPU's
3610 * physical address space (CIK).
3611 */
3612static void cik_mc_program(struct radeon_device *rdev)
3613{
3614 struct evergreen_mc_save save;
3615 u32 tmp;
3616 int i, j;
3617
3618 /* Initialize HDP */
3619 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3620 WREG32((0x2c14 + j), 0x00000000);
3621 WREG32((0x2c18 + j), 0x00000000);
3622 WREG32((0x2c1c + j), 0x00000000);
3623 WREG32((0x2c20 + j), 0x00000000);
3624 WREG32((0x2c24 + j), 0x00000000);
3625 }
3626 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3627
3628 evergreen_mc_stop(rdev, &save);
3629 if (radeon_mc_wait_for_idle(rdev)) {
3630 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3631 }
3632 /* Lockout access through VGA aperture*/
3633 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3634 /* Update configuration */
3635 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3636 rdev->mc.vram_start >> 12);
3637 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3638 rdev->mc.vram_end >> 12);
3639 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3640 rdev->vram_scratch.gpu_addr >> 12);
3641 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3642 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3643 WREG32(MC_VM_FB_LOCATION, tmp);
3644 /* XXX double check these! */
3645 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3646 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3647 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3648 WREG32(MC_VM_AGP_BASE, 0);
3649 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3650 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3651 if (radeon_mc_wait_for_idle(rdev)) {
3652 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3653 }
3654 evergreen_mc_resume(rdev, &save);
3655 /* we need to own VRAM, so turn off the VGA renderer here
3656 * to stop it overwriting our objects */
3657 rv515_vga_render_disable(rdev);
3658}
3659
3660/**
3661 * cik_mc_init - initialize the memory controller driver params
3662 *
3663 * @rdev: radeon_device pointer
3664 *
3665 * Look up the amount of vram, vram width, and decide how to place
3666 * vram and gart within the GPU's physical address space (CIK).
3667 * Returns 0 for success.
3668 */
3669static int cik_mc_init(struct radeon_device *rdev)
3670{
3671 u32 tmp;
3672 int chansize, numchan;
3673
3674 /* Get VRAM informations */
3675 rdev->mc.vram_is_ddr = true;
3676 tmp = RREG32(MC_ARB_RAMCFG);
3677 if (tmp & CHANSIZE_MASK) {
3678 chansize = 64;
3679 } else {
3680 chansize = 32;
3681 }
3682 tmp = RREG32(MC_SHARED_CHMAP);
3683 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3684 case 0:
3685 default:
3686 numchan = 1;
3687 break;
3688 case 1:
3689 numchan = 2;
3690 break;
3691 case 2:
3692 numchan = 4;
3693 break;
3694 case 3:
3695 numchan = 8;
3696 break;
3697 case 4:
3698 numchan = 3;
3699 break;
3700 case 5:
3701 numchan = 6;
3702 break;
3703 case 6:
3704 numchan = 10;
3705 break;
3706 case 7:
3707 numchan = 12;
3708 break;
3709 case 8:
3710 numchan = 16;
3711 break;
3712 }
3713 rdev->mc.vram_width = numchan * chansize;
3714 /* Could aper size report 0 ? */
3715 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3716 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3717 /* size in MB on si */
3718 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3719 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3720 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3721 si_vram_gtt_location(rdev, &rdev->mc);
3722 radeon_update_bandwidth_info(rdev);
3723
3724 return 0;
3725}
3726
3727/*
3728 * GART
3729 * VMID 0 is the physical GPU addresses as used by the kernel.
3730 * VMIDs 1-15 are used for userspace clients and are handled
3731 * by the radeon vm/hsa code.
3732 */
3733/**
3734 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3735 *
3736 * @rdev: radeon_device pointer
3737 *
3738 * Flush the TLB for the VMID 0 page table (CIK).
3739 */
3740void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3741{
3742 /* flush hdp cache */
3743 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3744
3745 /* bits 0-15 are the VM contexts0-15 */
3746 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3747}
3748
3749/**
3750 * cik_pcie_gart_enable - gart enable
3751 *
3752 * @rdev: radeon_device pointer
3753 *
3754 * This sets up the TLBs, programs the page tables for VMID0,
3755 * sets up the hw for VMIDs 1-15 which are allocated on
3756 * demand, and sets up the global locations for the LDS, GDS,
3757 * and GPUVM for FSA64 clients (CIK).
3758 * Returns 0 for success, errors for failure.
3759 */
3760static int cik_pcie_gart_enable(struct radeon_device *rdev)
3761{
3762 int r, i;
3763
3764 if (rdev->gart.robj == NULL) {
3765 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3766 return -EINVAL;
3767 }
3768 r = radeon_gart_table_vram_pin(rdev);
3769 if (r)
3770 return r;
3771 radeon_gart_restore(rdev);
3772 /* Setup TLB control */
3773 WREG32(MC_VM_MX_L1_TLB_CNTL,
3774 (0xA << 7) |
3775 ENABLE_L1_TLB |
3776 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3777 ENABLE_ADVANCED_DRIVER_MODEL |
3778 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3779 /* Setup L2 cache */
3780 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3781 ENABLE_L2_FRAGMENT_PROCESSING |
3782 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3783 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3784 EFFECTIVE_L2_QUEUE_SIZE(7) |
3785 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3786 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3787 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3788 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3789 /* setup context0 */
3790 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3791 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3792 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3793 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3794 (u32)(rdev->dummy_page.addr >> 12));
3795 WREG32(VM_CONTEXT0_CNTL2, 0);
3796 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3797 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3798
3799 WREG32(0x15D4, 0);
3800 WREG32(0x15D8, 0);
3801 WREG32(0x15DC, 0);
3802
3803 /* empty context1-15 */
3804 /* FIXME start with 4G, once using 2 level pt switch to full
3805 * vm size space
3806 */
3807 /* set vm size, must be a multiple of 4 */
3808 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3809 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3810 for (i = 1; i < 16; i++) {
3811 if (i < 8)
3812 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3813 rdev->gart.table_addr >> 12);
3814 else
3815 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3816 rdev->gart.table_addr >> 12);
3817 }
3818
3819 /* enable context1-15 */
3820 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3821 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 3822 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 3823 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
3824 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3825 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3826 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3827 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3828 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3829 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3830 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3831 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3832 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3833 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3834 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3835 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
3836
3837 /* TC cache setup ??? */
3838 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3839 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3840 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3841
3842 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3843 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3844 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3845 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3846 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3847
3848 WREG32(TC_CFG_L1_VOLATILE, 0);
3849 WREG32(TC_CFG_L2_VOLATILE, 0);
3850
3851 if (rdev->family == CHIP_KAVERI) {
3852 u32 tmp = RREG32(CHUB_CONTROL);
3853 tmp &= ~BYPASS_VM;
3854 WREG32(CHUB_CONTROL, tmp);
3855 }
3856
3857 /* XXX SH_MEM regs */
3858 /* where to put LDS, scratch, GPUVM in FSA64 space */
3859 for (i = 0; i < 16; i++) {
b556b12e 3860 cik_srbm_select(rdev, 0, 0, 0, i);
21a93e13 3861 /* CP and shaders */
1c49165d
AD
3862 WREG32(SH_MEM_CONFIG, 0);
3863 WREG32(SH_MEM_APE1_BASE, 1);
3864 WREG32(SH_MEM_APE1_LIMIT, 0);
3865 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
3866 /* SDMA GFX */
3867 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3868 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3869 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3870 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3871 /* XXX SDMA RLC - todo */
1c49165d 3872 }
b556b12e 3873 cik_srbm_select(rdev, 0, 0, 0, 0);
1c49165d
AD
3874
3875 cik_pcie_gart_tlb_flush(rdev);
3876 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3877 (unsigned)(rdev->mc.gtt_size >> 20),
3878 (unsigned long long)rdev->gart.table_addr);
3879 rdev->gart.ready = true;
3880 return 0;
3881}
3882
3883/**
3884 * cik_pcie_gart_disable - gart disable
3885 *
3886 * @rdev: radeon_device pointer
3887 *
3888 * This disables all VM page table (CIK).
3889 */
3890static void cik_pcie_gart_disable(struct radeon_device *rdev)
3891{
3892 /* Disable all tables */
3893 WREG32(VM_CONTEXT0_CNTL, 0);
3894 WREG32(VM_CONTEXT1_CNTL, 0);
3895 /* Setup TLB control */
3896 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3897 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3898 /* Setup L2 cache */
3899 WREG32(VM_L2_CNTL,
3900 ENABLE_L2_FRAGMENT_PROCESSING |
3901 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3902 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3903 EFFECTIVE_L2_QUEUE_SIZE(7) |
3904 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3905 WREG32(VM_L2_CNTL2, 0);
3906 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3907 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3908 radeon_gart_table_vram_unpin(rdev);
3909}
3910
3911/**
3912 * cik_pcie_gart_fini - vm fini callback
3913 *
3914 * @rdev: radeon_device pointer
3915 *
3916 * Tears down the driver GART/VM setup (CIK).
3917 */
3918static void cik_pcie_gart_fini(struct radeon_device *rdev)
3919{
3920 cik_pcie_gart_disable(rdev);
3921 radeon_gart_table_vram_free(rdev);
3922 radeon_gart_fini(rdev);
3923}
3924
3925/* vm parser */
3926/**
3927 * cik_ib_parse - vm ib_parse callback
3928 *
3929 * @rdev: radeon_device pointer
3930 * @ib: indirect buffer pointer
3931 *
3932 * CIK uses hw IB checking so this is a nop (CIK).
3933 */
3934int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3935{
3936 return 0;
3937}
3938
3939/*
3940 * vm
3941 * VMID 0 is the physical GPU addresses as used by the kernel.
3942 * VMIDs 1-15 are used for userspace clients and are handled
3943 * by the radeon vm/hsa code.
3944 */
3945/**
3946 * cik_vm_init - cik vm init callback
3947 *
3948 * @rdev: radeon_device pointer
3949 *
3950 * Inits cik specific vm parameters (number of VMs, base of vram for
3951 * VMIDs 1-15) (CIK).
3952 * Returns 0 for success.
3953 */
3954int cik_vm_init(struct radeon_device *rdev)
3955{
3956 /* number of VMs */
3957 rdev->vm_manager.nvm = 16;
3958 /* base offset of vram pages */
3959 if (rdev->flags & RADEON_IS_IGP) {
3960 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3961 tmp <<= 22;
3962 rdev->vm_manager.vram_base_offset = tmp;
3963 } else
3964 rdev->vm_manager.vram_base_offset = 0;
3965
3966 return 0;
3967}
3968
3969/**
3970 * cik_vm_fini - cik vm fini callback
3971 *
3972 * @rdev: radeon_device pointer
3973 *
3974 * Tear down any asic specific VM setup (CIK).
3975 */
3976void cik_vm_fini(struct radeon_device *rdev)
3977{
3978}
3979
f96ab484
AD
3980/**
3981 * cik_vm_flush - cik vm flush using the CP
3982 *
3983 * @rdev: radeon_device pointer
3984 *
3985 * Update the page table base and flush the VM TLB
3986 * using the CP (CIK).
3987 */
3988void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3989{
3990 struct radeon_ring *ring = &rdev->ring[ridx];
3991
3992 if (vm == NULL)
3993 return;
3994
3995 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3996 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3997 WRITE_DATA_DST_SEL(0)));
3998 if (vm->id < 8) {
3999 radeon_ring_write(ring,
4000 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4001 } else {
4002 radeon_ring_write(ring,
4003 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4004 }
4005 radeon_ring_write(ring, 0);
4006 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4007
4008 /* update SH_MEM_* regs */
4009 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4010 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4011 WRITE_DATA_DST_SEL(0)));
4012 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4013 radeon_ring_write(ring, 0);
4014 radeon_ring_write(ring, VMID(vm->id));
4015
4016 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4017 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4018 WRITE_DATA_DST_SEL(0)));
4019 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4020 radeon_ring_write(ring, 0);
4021
4022 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4023 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4024 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4025 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4026
4027 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4028 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4029 WRITE_DATA_DST_SEL(0)));
4030 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4031 radeon_ring_write(ring, 0);
4032 radeon_ring_write(ring, VMID(0));
4033
4034 /* HDP flush */
4035 /* We should be using the WAIT_REG_MEM packet here like in
4036 * cik_fence_ring_emit(), but it causes the CP to hang in this
4037 * context...
4038 */
4039 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4040 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4041 WRITE_DATA_DST_SEL(0)));
4042 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4043 radeon_ring_write(ring, 0);
4044 radeon_ring_write(ring, 0);
4045
4046 /* bits 0-15 are the VM contexts0-15 */
4047 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4048 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4049 WRITE_DATA_DST_SEL(0)));
4050 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4051 radeon_ring_write(ring, 0);
4052 radeon_ring_write(ring, 1 << vm->id);
4053
4054 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4055 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4056 radeon_ring_write(ring, 0x0);
4057}
4058
d0e092d9
AD
4059/**
4060 * cik_vm_set_page - update the page tables using sDMA
4061 *
4062 * @rdev: radeon_device pointer
4063 * @ib: indirect buffer to fill with commands
4064 * @pe: addr of the page entry
4065 * @addr: dst addr to write into pe
4066 * @count: number of page entries to update
4067 * @incr: increase next addr by incr bytes
4068 * @flags: access flags
4069 *
4070 * Update the page tables using CP or sDMA (CIK).
4071 */
4072void cik_vm_set_page(struct radeon_device *rdev,
4073 struct radeon_ib *ib,
4074 uint64_t pe,
4075 uint64_t addr, unsigned count,
4076 uint32_t incr, uint32_t flags)
4077{
4078 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4079 uint64_t value;
4080 unsigned ndw;
4081
4082 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4083 /* CP */
4084 while (count) {
4085 ndw = 2 + count * 2;
4086 if (ndw > 0x3FFE)
4087 ndw = 0x3FFE;
4088
4089 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4090 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4091 WRITE_DATA_DST_SEL(1));
4092 ib->ptr[ib->length_dw++] = pe;
4093 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4094 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4095 if (flags & RADEON_VM_PAGE_SYSTEM) {
4096 value = radeon_vm_map_gart(rdev, addr);
4097 value &= 0xFFFFFFFFFFFFF000ULL;
4098 } else if (flags & RADEON_VM_PAGE_VALID) {
4099 value = addr;
4100 } else {
4101 value = 0;
4102 }
4103 addr += incr;
4104 value |= r600_flags;
4105 ib->ptr[ib->length_dw++] = value;
4106 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4107 }
4108 }
4109 } else {
4110 /* DMA */
4111 if (flags & RADEON_VM_PAGE_SYSTEM) {
4112 while (count) {
4113 ndw = count * 2;
4114 if (ndw > 0xFFFFE)
4115 ndw = 0xFFFFE;
4116
4117 /* for non-physically contiguous pages (system) */
4118 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4119 ib->ptr[ib->length_dw++] = pe;
4120 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4121 ib->ptr[ib->length_dw++] = ndw;
4122 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4123 if (flags & RADEON_VM_PAGE_SYSTEM) {
4124 value = radeon_vm_map_gart(rdev, addr);
4125 value &= 0xFFFFFFFFFFFFF000ULL;
4126 } else if (flags & RADEON_VM_PAGE_VALID) {
4127 value = addr;
4128 } else {
4129 value = 0;
4130 }
4131 addr += incr;
4132 value |= r600_flags;
4133 ib->ptr[ib->length_dw++] = value;
4134 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4135 }
4136 }
4137 } else {
4138 while (count) {
4139 ndw = count;
4140 if (ndw > 0x7FFFF)
4141 ndw = 0x7FFFF;
4142
4143 if (flags & RADEON_VM_PAGE_VALID)
4144 value = addr;
4145 else
4146 value = 0;
4147 /* for physically contiguous pages (vram) */
4148 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4149 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4150 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4151 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4152 ib->ptr[ib->length_dw++] = 0;
4153 ib->ptr[ib->length_dw++] = value; /* value */
4154 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4155 ib->ptr[ib->length_dw++] = incr; /* increment size */
4156 ib->ptr[ib->length_dw++] = 0;
4157 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4158 pe += ndw * 8;
4159 addr += ndw * incr;
4160 count -= ndw;
4161 }
4162 }
4163 while (ib->length_dw & 0x7)
4164 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4165 }
4166}
4167
605de6b9
AD
4168/**
4169 * cik_dma_vm_flush - cik vm flush using sDMA
4170 *
4171 * @rdev: radeon_device pointer
4172 *
4173 * Update the page table base and flush the VM TLB
4174 * using sDMA (CIK).
4175 */
4176void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4177{
4178 struct radeon_ring *ring = &rdev->ring[ridx];
4179 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4180 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4181 u32 ref_and_mask;
4182
4183 if (vm == NULL)
4184 return;
4185
4186 if (ridx == R600_RING_TYPE_DMA_INDEX)
4187 ref_and_mask = SDMA0;
4188 else
4189 ref_and_mask = SDMA1;
4190
4191 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4192 if (vm->id < 8) {
4193 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4194 } else {
4195 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4196 }
4197 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4198
4199 /* update SH_MEM_* regs */
4200 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4201 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4202 radeon_ring_write(ring, VMID(vm->id));
4203
4204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4205 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4206 radeon_ring_write(ring, 0);
4207
4208 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4209 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4210 radeon_ring_write(ring, 0);
4211
4212 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4213 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4214 radeon_ring_write(ring, 1);
4215
4216 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4217 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4218 radeon_ring_write(ring, 0);
4219
4220 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4221 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4222 radeon_ring_write(ring, VMID(0));
4223
4224 /* flush HDP */
4225 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4226 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4227 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4228 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4229 radeon_ring_write(ring, ref_and_mask); /* MASK */
4230 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4231
4232 /* flush TLB */
4233 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4234 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4235 radeon_ring_write(ring, 1 << vm->id);
4236}
4237
f6796cae
AD
4238/*
4239 * RLC
4240 * The RLC is a multi-purpose microengine that handles a
4241 * variety of functions, the most important of which is
4242 * the interrupt controller.
4243 */
4244/**
4245 * cik_rlc_stop - stop the RLC ME
4246 *
4247 * @rdev: radeon_device pointer
4248 *
4249 * Halt the RLC ME (MicroEngine) (CIK).
4250 */
4251static void cik_rlc_stop(struct radeon_device *rdev)
4252{
4253 int i, j, k;
4254 u32 mask, tmp;
4255
4256 tmp = RREG32(CP_INT_CNTL_RING0);
4257 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4258 WREG32(CP_INT_CNTL_RING0, tmp);
4259
4260 RREG32(CB_CGTT_SCLK_CTRL);
4261 RREG32(CB_CGTT_SCLK_CTRL);
4262 RREG32(CB_CGTT_SCLK_CTRL);
4263 RREG32(CB_CGTT_SCLK_CTRL);
4264
4265 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4266 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4267
4268 WREG32(RLC_CNTL, 0);
4269
4270 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4271 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4272 cik_select_se_sh(rdev, i, j);
4273 for (k = 0; k < rdev->usec_timeout; k++) {
4274 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4275 break;
4276 udelay(1);
4277 }
4278 }
4279 }
4280 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4281
4282 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4283 for (k = 0; k < rdev->usec_timeout; k++) {
4284 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4285 break;
4286 udelay(1);
4287 }
4288}
4289
4290/**
4291 * cik_rlc_start - start the RLC ME
4292 *
4293 * @rdev: radeon_device pointer
4294 *
4295 * Unhalt the RLC ME (MicroEngine) (CIK).
4296 */
4297static void cik_rlc_start(struct radeon_device *rdev)
4298{
4299 u32 tmp;
4300
4301 WREG32(RLC_CNTL, RLC_ENABLE);
4302
4303 tmp = RREG32(CP_INT_CNTL_RING0);
4304 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4305 WREG32(CP_INT_CNTL_RING0, tmp);
4306
4307 udelay(50);
4308}
4309
4310/**
4311 * cik_rlc_resume - setup the RLC hw
4312 *
4313 * @rdev: radeon_device pointer
4314 *
4315 * Initialize the RLC registers, load the ucode,
4316 * and start the RLC (CIK).
4317 * Returns 0 for success, -EINVAL if the ucode is not available.
4318 */
4319static int cik_rlc_resume(struct radeon_device *rdev)
4320{
4321 u32 i, size;
4322 u32 clear_state_info[3];
4323 const __be32 *fw_data;
4324
4325 if (!rdev->rlc_fw)
4326 return -EINVAL;
4327
4328 switch (rdev->family) {
4329 case CHIP_BONAIRE:
4330 default:
4331 size = BONAIRE_RLC_UCODE_SIZE;
4332 break;
4333 case CHIP_KAVERI:
4334 size = KV_RLC_UCODE_SIZE;
4335 break;
4336 case CHIP_KABINI:
4337 size = KB_RLC_UCODE_SIZE;
4338 break;
4339 }
4340
4341 cik_rlc_stop(rdev);
4342
4343 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4344 RREG32(GRBM_SOFT_RESET);
4345 udelay(50);
4346 WREG32(GRBM_SOFT_RESET, 0);
4347 RREG32(GRBM_SOFT_RESET);
4348 udelay(50);
4349
4350 WREG32(RLC_LB_CNTR_INIT, 0);
4351 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4352
4353 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4354 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4355 WREG32(RLC_LB_PARAMS, 0x00600408);
4356 WREG32(RLC_LB_CNTL, 0x80000004);
4357
4358 WREG32(RLC_MC_CNTL, 0);
4359 WREG32(RLC_UCODE_CNTL, 0);
4360
4361 fw_data = (const __be32 *)rdev->rlc_fw->data;
4362 WREG32(RLC_GPM_UCODE_ADDR, 0);
4363 for (i = 0; i < size; i++)
4364 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4365 WREG32(RLC_GPM_UCODE_ADDR, 0);
4366
4367 /* XXX */
4368 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4369 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4370 clear_state_info[2] = 0;//cik_default_size;
4371 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4372 for (i = 0; i < 3; i++)
4373 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4374 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4375
4376 cik_rlc_start(rdev);
4377
4378 return 0;
4379}
a59781bb
AD
4380
4381/*
4382 * Interrupts
4383 * Starting with r6xx, interrupts are handled via a ring buffer.
4384 * Ring buffers are areas of GPU accessible memory that the GPU
4385 * writes interrupt vectors into and the host reads vectors out of.
4386 * There is a rptr (read pointer) that determines where the
4387 * host is currently reading, and a wptr (write pointer)
4388 * which determines where the GPU has written. When the
4389 * pointers are equal, the ring is idle. When the GPU
4390 * writes vectors to the ring buffer, it increments the
4391 * wptr. When there is an interrupt, the host then starts
4392 * fetching commands and processing them until the pointers are
4393 * equal again at which point it updates the rptr.
4394 */
4395
4396/**
4397 * cik_enable_interrupts - Enable the interrupt ring buffer
4398 *
4399 * @rdev: radeon_device pointer
4400 *
4401 * Enable the interrupt ring buffer (CIK).
4402 */
4403static void cik_enable_interrupts(struct radeon_device *rdev)
4404{
4405 u32 ih_cntl = RREG32(IH_CNTL);
4406 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4407
4408 ih_cntl |= ENABLE_INTR;
4409 ih_rb_cntl |= IH_RB_ENABLE;
4410 WREG32(IH_CNTL, ih_cntl);
4411 WREG32(IH_RB_CNTL, ih_rb_cntl);
4412 rdev->ih.enabled = true;
4413}
4414
4415/**
4416 * cik_disable_interrupts - Disable the interrupt ring buffer
4417 *
4418 * @rdev: radeon_device pointer
4419 *
4420 * Disable the interrupt ring buffer (CIK).
4421 */
4422static void cik_disable_interrupts(struct radeon_device *rdev)
4423{
4424 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4425 u32 ih_cntl = RREG32(IH_CNTL);
4426
4427 ih_rb_cntl &= ~IH_RB_ENABLE;
4428 ih_cntl &= ~ENABLE_INTR;
4429 WREG32(IH_RB_CNTL, ih_rb_cntl);
4430 WREG32(IH_CNTL, ih_cntl);
4431 /* set rptr, wptr to 0 */
4432 WREG32(IH_RB_RPTR, 0);
4433 WREG32(IH_RB_WPTR, 0);
4434 rdev->ih.enabled = false;
4435 rdev->ih.rptr = 0;
4436}
4437
4438/**
4439 * cik_disable_interrupt_state - Disable all interrupt sources
4440 *
4441 * @rdev: radeon_device pointer
4442 *
4443 * Clear all interrupt enable bits used by the driver (CIK).
4444 */
4445static void cik_disable_interrupt_state(struct radeon_device *rdev)
4446{
4447 u32 tmp;
4448
4449 /* gfx ring */
4450 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
4451 /* sdma */
4452 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4453 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4454 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4455 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
4456 /* compute queues */
4457 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4458 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4459 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4460 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4461 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4462 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4463 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4464 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4465 /* grbm */
4466 WREG32(GRBM_INT_CNTL, 0);
4467 /* vline/vblank, etc. */
4468 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4469 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4470 if (rdev->num_crtc >= 4) {
4471 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4472 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4473 }
4474 if (rdev->num_crtc >= 6) {
4475 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4476 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4477 }
4478
4479 /* dac hotplug */
4480 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4481
4482 /* digital hotplug */
4483 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4484 WREG32(DC_HPD1_INT_CONTROL, tmp);
4485 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4486 WREG32(DC_HPD2_INT_CONTROL, tmp);
4487 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4488 WREG32(DC_HPD3_INT_CONTROL, tmp);
4489 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4490 WREG32(DC_HPD4_INT_CONTROL, tmp);
4491 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4492 WREG32(DC_HPD5_INT_CONTROL, tmp);
4493 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4494 WREG32(DC_HPD6_INT_CONTROL, tmp);
4495
4496}
4497
4498/**
4499 * cik_irq_init - init and enable the interrupt ring
4500 *
4501 * @rdev: radeon_device pointer
4502 *
4503 * Allocate a ring buffer for the interrupt controller,
4504 * enable the RLC, disable interrupts, enable the IH
4505 * ring buffer and enable it (CIK).
4506 * Called at device load and reume.
4507 * Returns 0 for success, errors for failure.
4508 */
4509static int cik_irq_init(struct radeon_device *rdev)
4510{
4511 int ret = 0;
4512 int rb_bufsz;
4513 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4514
4515 /* allocate ring */
4516 ret = r600_ih_ring_alloc(rdev);
4517 if (ret)
4518 return ret;
4519
4520 /* disable irqs */
4521 cik_disable_interrupts(rdev);
4522
4523 /* init rlc */
4524 ret = cik_rlc_resume(rdev);
4525 if (ret) {
4526 r600_ih_ring_fini(rdev);
4527 return ret;
4528 }
4529
4530 /* setup interrupt control */
4531 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4532 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4533 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4534 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4535 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4536 */
4537 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4538 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4539 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4540 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4541
4542 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4543 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4544
4545 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4546 IH_WPTR_OVERFLOW_CLEAR |
4547 (rb_bufsz << 1));
4548
4549 if (rdev->wb.enabled)
4550 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4551
4552 /* set the writeback address whether it's enabled or not */
4553 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4554 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4555
4556 WREG32(IH_RB_CNTL, ih_rb_cntl);
4557
4558 /* set rptr, wptr to 0 */
4559 WREG32(IH_RB_RPTR, 0);
4560 WREG32(IH_RB_WPTR, 0);
4561
4562 /* Default settings for IH_CNTL (disabled at first) */
4563 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4564 /* RPTR_REARM only works if msi's are enabled */
4565 if (rdev->msi_enabled)
4566 ih_cntl |= RPTR_REARM;
4567 WREG32(IH_CNTL, ih_cntl);
4568
4569 /* force the active interrupt state to all disabled */
4570 cik_disable_interrupt_state(rdev);
4571
4572 pci_set_master(rdev->pdev);
4573
4574 /* enable irqs */
4575 cik_enable_interrupts(rdev);
4576
4577 return ret;
4578}
4579
4580/**
4581 * cik_irq_set - enable/disable interrupt sources
4582 *
4583 * @rdev: radeon_device pointer
4584 *
4585 * Enable interrupt sources on the GPU (vblanks, hpd,
4586 * etc.) (CIK).
4587 * Returns 0 for success, errors for failure.
4588 */
4589int cik_irq_set(struct radeon_device *rdev)
4590{
4591 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4592 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4593 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4594 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4595 u32 grbm_int_cntl = 0;
21a93e13 4596 u32 dma_cntl, dma_cntl1;
a59781bb
AD
4597
4598 if (!rdev->irq.installed) {
4599 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4600 return -EINVAL;
4601 }
4602 /* don't enable anything if the ih is disabled */
4603 if (!rdev->ih.enabled) {
4604 cik_disable_interrupts(rdev);
4605 /* force the active interrupt state to all disabled */
4606 cik_disable_interrupt_state(rdev);
4607 return 0;
4608 }
4609
4610 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4611 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4612 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4613 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4614 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4615 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4616
21a93e13
AD
4617 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4618 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4619
a59781bb
AD
4620 /* enable CP interrupts on all rings */
4621 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4622 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4623 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4624 }
4625 /* TODO: compute queues! */
4626 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4627
21a93e13
AD
4628 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4629 DRM_DEBUG("cik_irq_set: sw int dma\n");
4630 dma_cntl |= TRAP_ENABLE;
4631 }
4632
4633 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4634 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4635 dma_cntl1 |= TRAP_ENABLE;
4636 }
4637
a59781bb
AD
4638 if (rdev->irq.crtc_vblank_int[0] ||
4639 atomic_read(&rdev->irq.pflip[0])) {
4640 DRM_DEBUG("cik_irq_set: vblank 0\n");
4641 crtc1 |= VBLANK_INTERRUPT_MASK;
4642 }
4643 if (rdev->irq.crtc_vblank_int[1] ||
4644 atomic_read(&rdev->irq.pflip[1])) {
4645 DRM_DEBUG("cik_irq_set: vblank 1\n");
4646 crtc2 |= VBLANK_INTERRUPT_MASK;
4647 }
4648 if (rdev->irq.crtc_vblank_int[2] ||
4649 atomic_read(&rdev->irq.pflip[2])) {
4650 DRM_DEBUG("cik_irq_set: vblank 2\n");
4651 crtc3 |= VBLANK_INTERRUPT_MASK;
4652 }
4653 if (rdev->irq.crtc_vblank_int[3] ||
4654 atomic_read(&rdev->irq.pflip[3])) {
4655 DRM_DEBUG("cik_irq_set: vblank 3\n");
4656 crtc4 |= VBLANK_INTERRUPT_MASK;
4657 }
4658 if (rdev->irq.crtc_vblank_int[4] ||
4659 atomic_read(&rdev->irq.pflip[4])) {
4660 DRM_DEBUG("cik_irq_set: vblank 4\n");
4661 crtc5 |= VBLANK_INTERRUPT_MASK;
4662 }
4663 if (rdev->irq.crtc_vblank_int[5] ||
4664 atomic_read(&rdev->irq.pflip[5])) {
4665 DRM_DEBUG("cik_irq_set: vblank 5\n");
4666 crtc6 |= VBLANK_INTERRUPT_MASK;
4667 }
4668 if (rdev->irq.hpd[0]) {
4669 DRM_DEBUG("cik_irq_set: hpd 1\n");
4670 hpd1 |= DC_HPDx_INT_EN;
4671 }
4672 if (rdev->irq.hpd[1]) {
4673 DRM_DEBUG("cik_irq_set: hpd 2\n");
4674 hpd2 |= DC_HPDx_INT_EN;
4675 }
4676 if (rdev->irq.hpd[2]) {
4677 DRM_DEBUG("cik_irq_set: hpd 3\n");
4678 hpd3 |= DC_HPDx_INT_EN;
4679 }
4680 if (rdev->irq.hpd[3]) {
4681 DRM_DEBUG("cik_irq_set: hpd 4\n");
4682 hpd4 |= DC_HPDx_INT_EN;
4683 }
4684 if (rdev->irq.hpd[4]) {
4685 DRM_DEBUG("cik_irq_set: hpd 5\n");
4686 hpd5 |= DC_HPDx_INT_EN;
4687 }
4688 if (rdev->irq.hpd[5]) {
4689 DRM_DEBUG("cik_irq_set: hpd 6\n");
4690 hpd6 |= DC_HPDx_INT_EN;
4691 }
4692
4693 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4694
21a93e13
AD
4695 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4696 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4697
a59781bb
AD
4698 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4699
4700 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4701 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4702 if (rdev->num_crtc >= 4) {
4703 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4704 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4705 }
4706 if (rdev->num_crtc >= 6) {
4707 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4708 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4709 }
4710
4711 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4712 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4713 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4714 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4715 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4716 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4717
4718 return 0;
4719}
4720
4721/**
4722 * cik_irq_ack - ack interrupt sources
4723 *
4724 * @rdev: radeon_device pointer
4725 *
4726 * Ack interrupt sources on the GPU (vblanks, hpd,
4727 * etc.) (CIK). Certain interrupts sources are sw
4728 * generated and do not require an explicit ack.
4729 */
4730static inline void cik_irq_ack(struct radeon_device *rdev)
4731{
4732 u32 tmp;
4733
4734 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4735 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4736 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4737 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4738 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4739 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4740 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4741
4742 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4743 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4744 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4745 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4746 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4747 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4748 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4749 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4750
4751 if (rdev->num_crtc >= 4) {
4752 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4753 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4754 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4755 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4756 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4757 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4758 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4759 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4760 }
4761
4762 if (rdev->num_crtc >= 6) {
4763 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4764 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4765 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4766 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4767 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4768 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4769 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4770 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4771 }
4772
4773 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4774 tmp = RREG32(DC_HPD1_INT_CONTROL);
4775 tmp |= DC_HPDx_INT_ACK;
4776 WREG32(DC_HPD1_INT_CONTROL, tmp);
4777 }
4778 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4779 tmp = RREG32(DC_HPD2_INT_CONTROL);
4780 tmp |= DC_HPDx_INT_ACK;
4781 WREG32(DC_HPD2_INT_CONTROL, tmp);
4782 }
4783 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4784 tmp = RREG32(DC_HPD3_INT_CONTROL);
4785 tmp |= DC_HPDx_INT_ACK;
4786 WREG32(DC_HPD3_INT_CONTROL, tmp);
4787 }
4788 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4789 tmp = RREG32(DC_HPD4_INT_CONTROL);
4790 tmp |= DC_HPDx_INT_ACK;
4791 WREG32(DC_HPD4_INT_CONTROL, tmp);
4792 }
4793 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4794 tmp = RREG32(DC_HPD5_INT_CONTROL);
4795 tmp |= DC_HPDx_INT_ACK;
4796 WREG32(DC_HPD5_INT_CONTROL, tmp);
4797 }
4798 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4799 tmp = RREG32(DC_HPD5_INT_CONTROL);
4800 tmp |= DC_HPDx_INT_ACK;
4801 WREG32(DC_HPD6_INT_CONTROL, tmp);
4802 }
4803}
4804
4805/**
4806 * cik_irq_disable - disable interrupts
4807 *
4808 * @rdev: radeon_device pointer
4809 *
4810 * Disable interrupts on the hw (CIK).
4811 */
4812static void cik_irq_disable(struct radeon_device *rdev)
4813{
4814 cik_disable_interrupts(rdev);
4815 /* Wait and acknowledge irq */
4816 mdelay(1);
4817 cik_irq_ack(rdev);
4818 cik_disable_interrupt_state(rdev);
4819}
4820
4821/**
4822 * cik_irq_disable - disable interrupts for suspend
4823 *
4824 * @rdev: radeon_device pointer
4825 *
4826 * Disable interrupts and stop the RLC (CIK).
4827 * Used for suspend.
4828 */
4829static void cik_irq_suspend(struct radeon_device *rdev)
4830{
4831 cik_irq_disable(rdev);
4832 cik_rlc_stop(rdev);
4833}
4834
4835/**
4836 * cik_irq_fini - tear down interrupt support
4837 *
4838 * @rdev: radeon_device pointer
4839 *
4840 * Disable interrupts on the hw and free the IH ring
4841 * buffer (CIK).
4842 * Used for driver unload.
4843 */
4844static void cik_irq_fini(struct radeon_device *rdev)
4845{
4846 cik_irq_suspend(rdev);
4847 r600_ih_ring_fini(rdev);
4848}
4849
4850/**
4851 * cik_get_ih_wptr - get the IH ring buffer wptr
4852 *
4853 * @rdev: radeon_device pointer
4854 *
4855 * Get the IH ring buffer wptr from either the register
4856 * or the writeback memory buffer (CIK). Also check for
4857 * ring buffer overflow and deal with it.
4858 * Used by cik_irq_process().
4859 * Returns the value of the wptr.
4860 */
4861static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4862{
4863 u32 wptr, tmp;
4864
4865 if (rdev->wb.enabled)
4866 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4867 else
4868 wptr = RREG32(IH_RB_WPTR);
4869
4870 if (wptr & RB_OVERFLOW) {
4871 /* When a ring buffer overflow happen start parsing interrupt
4872 * from the last not overwritten vector (wptr + 16). Hopefully
4873 * this should allow us to catchup.
4874 */
4875 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4876 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4877 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4878 tmp = RREG32(IH_RB_CNTL);
4879 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4880 WREG32(IH_RB_CNTL, tmp);
4881 }
4882 return (wptr & rdev->ih.ptr_mask);
4883}
4884
4885/* CIK IV Ring
4886 * Each IV ring entry is 128 bits:
4887 * [7:0] - interrupt source id
4888 * [31:8] - reserved
4889 * [59:32] - interrupt source data
4890 * [63:60] - reserved
21a93e13
AD
4891 * [71:64] - RINGID
4892 * CP:
4893 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
4894 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4895 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4896 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4897 * PIPE_ID - ME0 0=3D
4898 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
4899 * SDMA:
4900 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4901 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4902 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
4903 * [79:72] - VMID
4904 * [95:80] - PASID
4905 * [127:96] - reserved
4906 */
4907/**
4908 * cik_irq_process - interrupt handler
4909 *
4910 * @rdev: radeon_device pointer
4911 *
4912 * Interrupt hander (CIK). Walk the IH ring,
4913 * ack interrupts and schedule work to handle
4914 * interrupt events.
4915 * Returns irq process return code.
4916 */
4917int cik_irq_process(struct radeon_device *rdev)
4918{
4919 u32 wptr;
4920 u32 rptr;
4921 u32 src_id, src_data, ring_id;
4922 u8 me_id, pipe_id, queue_id;
4923 u32 ring_index;
4924 bool queue_hotplug = false;
4925 bool queue_reset = false;
4926
4927 if (!rdev->ih.enabled || rdev->shutdown)
4928 return IRQ_NONE;
4929
4930 wptr = cik_get_ih_wptr(rdev);
4931
4932restart_ih:
4933 /* is somebody else already processing irqs? */
4934 if (atomic_xchg(&rdev->ih.lock, 1))
4935 return IRQ_NONE;
4936
4937 rptr = rdev->ih.rptr;
4938 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4939
4940 /* Order reading of wptr vs. reading of IH ring data */
4941 rmb();
4942
4943 /* display interrupts */
4944 cik_irq_ack(rdev);
4945
4946 while (rptr != wptr) {
4947 /* wptr/rptr are in bytes! */
4948 ring_index = rptr / 4;
4949 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4950 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4951 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
4952
4953 switch (src_id) {
4954 case 1: /* D1 vblank/vline */
4955 switch (src_data) {
4956 case 0: /* D1 vblank */
4957 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4958 if (rdev->irq.crtc_vblank_int[0]) {
4959 drm_handle_vblank(rdev->ddev, 0);
4960 rdev->pm.vblank_sync = true;
4961 wake_up(&rdev->irq.vblank_queue);
4962 }
4963 if (atomic_read(&rdev->irq.pflip[0]))
4964 radeon_crtc_handle_flip(rdev, 0);
4965 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4966 DRM_DEBUG("IH: D1 vblank\n");
4967 }
4968 break;
4969 case 1: /* D1 vline */
4970 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4971 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4972 DRM_DEBUG("IH: D1 vline\n");
4973 }
4974 break;
4975 default:
4976 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4977 break;
4978 }
4979 break;
4980 case 2: /* D2 vblank/vline */
4981 switch (src_data) {
4982 case 0: /* D2 vblank */
4983 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4984 if (rdev->irq.crtc_vblank_int[1]) {
4985 drm_handle_vblank(rdev->ddev, 1);
4986 rdev->pm.vblank_sync = true;
4987 wake_up(&rdev->irq.vblank_queue);
4988 }
4989 if (atomic_read(&rdev->irq.pflip[1]))
4990 radeon_crtc_handle_flip(rdev, 1);
4991 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4992 DRM_DEBUG("IH: D2 vblank\n");
4993 }
4994 break;
4995 case 1: /* D2 vline */
4996 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4997 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4998 DRM_DEBUG("IH: D2 vline\n");
4999 }
5000 break;
5001 default:
5002 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5003 break;
5004 }
5005 break;
5006 case 3: /* D3 vblank/vline */
5007 switch (src_data) {
5008 case 0: /* D3 vblank */
5009 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5010 if (rdev->irq.crtc_vblank_int[2]) {
5011 drm_handle_vblank(rdev->ddev, 2);
5012 rdev->pm.vblank_sync = true;
5013 wake_up(&rdev->irq.vblank_queue);
5014 }
5015 if (atomic_read(&rdev->irq.pflip[2]))
5016 radeon_crtc_handle_flip(rdev, 2);
5017 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5018 DRM_DEBUG("IH: D3 vblank\n");
5019 }
5020 break;
5021 case 1: /* D3 vline */
5022 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5023 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5024 DRM_DEBUG("IH: D3 vline\n");
5025 }
5026 break;
5027 default:
5028 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5029 break;
5030 }
5031 break;
5032 case 4: /* D4 vblank/vline */
5033 switch (src_data) {
5034 case 0: /* D4 vblank */
5035 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5036 if (rdev->irq.crtc_vblank_int[3]) {
5037 drm_handle_vblank(rdev->ddev, 3);
5038 rdev->pm.vblank_sync = true;
5039 wake_up(&rdev->irq.vblank_queue);
5040 }
5041 if (atomic_read(&rdev->irq.pflip[3]))
5042 radeon_crtc_handle_flip(rdev, 3);
5043 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5044 DRM_DEBUG("IH: D4 vblank\n");
5045 }
5046 break;
5047 case 1: /* D4 vline */
5048 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5049 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5050 DRM_DEBUG("IH: D4 vline\n");
5051 }
5052 break;
5053 default:
5054 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5055 break;
5056 }
5057 break;
5058 case 5: /* D5 vblank/vline */
5059 switch (src_data) {
5060 case 0: /* D5 vblank */
5061 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5062 if (rdev->irq.crtc_vblank_int[4]) {
5063 drm_handle_vblank(rdev->ddev, 4);
5064 rdev->pm.vblank_sync = true;
5065 wake_up(&rdev->irq.vblank_queue);
5066 }
5067 if (atomic_read(&rdev->irq.pflip[4]))
5068 radeon_crtc_handle_flip(rdev, 4);
5069 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5070 DRM_DEBUG("IH: D5 vblank\n");
5071 }
5072 break;
5073 case 1: /* D5 vline */
5074 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5075 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5076 DRM_DEBUG("IH: D5 vline\n");
5077 }
5078 break;
5079 default:
5080 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5081 break;
5082 }
5083 break;
5084 case 6: /* D6 vblank/vline */
5085 switch (src_data) {
5086 case 0: /* D6 vblank */
5087 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5088 if (rdev->irq.crtc_vblank_int[5]) {
5089 drm_handle_vblank(rdev->ddev, 5);
5090 rdev->pm.vblank_sync = true;
5091 wake_up(&rdev->irq.vblank_queue);
5092 }
5093 if (atomic_read(&rdev->irq.pflip[5]))
5094 radeon_crtc_handle_flip(rdev, 5);
5095 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5096 DRM_DEBUG("IH: D6 vblank\n");
5097 }
5098 break;
5099 case 1: /* D6 vline */
5100 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5101 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5102 DRM_DEBUG("IH: D6 vline\n");
5103 }
5104 break;
5105 default:
5106 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5107 break;
5108 }
5109 break;
5110 case 42: /* HPD hotplug */
5111 switch (src_data) {
5112 case 0:
5113 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5114 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5115 queue_hotplug = true;
5116 DRM_DEBUG("IH: HPD1\n");
5117 }
5118 break;
5119 case 1:
5120 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5121 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5122 queue_hotplug = true;
5123 DRM_DEBUG("IH: HPD2\n");
5124 }
5125 break;
5126 case 2:
5127 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5128 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5129 queue_hotplug = true;
5130 DRM_DEBUG("IH: HPD3\n");
5131 }
5132 break;
5133 case 3:
5134 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5135 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5136 queue_hotplug = true;
5137 DRM_DEBUG("IH: HPD4\n");
5138 }
5139 break;
5140 case 4:
5141 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5142 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5143 queue_hotplug = true;
5144 DRM_DEBUG("IH: HPD5\n");
5145 }
5146 break;
5147 case 5:
5148 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5149 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5150 queue_hotplug = true;
5151 DRM_DEBUG("IH: HPD6\n");
5152 }
5153 break;
5154 default:
5155 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5156 break;
5157 }
5158 break;
9d97c99b
AD
5159 case 146:
5160 case 147:
5161 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5162 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5163 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5164 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5165 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5166 /* reset addr and status */
5167 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5168 break;
a59781bb
AD
5169 case 176: /* GFX RB CP_INT */
5170 case 177: /* GFX IB CP_INT */
5171 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5172 break;
5173 case 181: /* CP EOP event */
5174 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
5175 /* XXX check the bitfield order! */
5176 me_id = (ring_id & 0x60) >> 5;
5177 pipe_id = (ring_id & 0x18) >> 3;
5178 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
5179 switch (me_id) {
5180 case 0:
5181 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5182 break;
5183 case 1:
5184 /* XXX compute */
5185 break;
5186 case 2:
5187 /* XXX compute */
5188 break;
5189 }
5190 break;
5191 case 184: /* CP Privileged reg access */
5192 DRM_ERROR("Illegal register access in command stream\n");
5193 /* XXX check the bitfield order! */
5194 me_id = (ring_id & 0x60) >> 5;
5195 pipe_id = (ring_id & 0x18) >> 3;
5196 queue_id = (ring_id & 0x7) >> 0;
5197 switch (me_id) {
5198 case 0:
5199 /* This results in a full GPU reset, but all we need to do is soft
5200 * reset the CP for gfx
5201 */
5202 queue_reset = true;
5203 break;
5204 case 1:
5205 /* XXX compute */
5206 break;
5207 case 2:
5208 /* XXX compute */
5209 break;
5210 }
5211 break;
5212 case 185: /* CP Privileged inst */
5213 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
5214 /* XXX check the bitfield order! */
5215 me_id = (ring_id & 0x60) >> 5;
5216 pipe_id = (ring_id & 0x18) >> 3;
5217 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
5218 switch (me_id) {
5219 case 0:
5220 /* This results in a full GPU reset, but all we need to do is soft
5221 * reset the CP for gfx
5222 */
5223 queue_reset = true;
5224 break;
5225 case 1:
5226 /* XXX compute */
5227 break;
5228 case 2:
5229 /* XXX compute */
5230 break;
5231 }
5232 break;
21a93e13
AD
5233 case 224: /* SDMA trap event */
5234 /* XXX check the bitfield order! */
5235 me_id = (ring_id & 0x3) >> 0;
5236 queue_id = (ring_id & 0xc) >> 2;
5237 DRM_DEBUG("IH: SDMA trap\n");
5238 switch (me_id) {
5239 case 0:
5240 switch (queue_id) {
5241 case 0:
5242 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5243 break;
5244 case 1:
5245 /* XXX compute */
5246 break;
5247 case 2:
5248 /* XXX compute */
5249 break;
5250 }
5251 break;
5252 case 1:
5253 switch (queue_id) {
5254 case 0:
5255 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5256 break;
5257 case 1:
5258 /* XXX compute */
5259 break;
5260 case 2:
5261 /* XXX compute */
5262 break;
5263 }
5264 break;
5265 }
5266 break;
5267 case 241: /* SDMA Privileged inst */
5268 case 247: /* SDMA Privileged inst */
5269 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5270 /* XXX check the bitfield order! */
5271 me_id = (ring_id & 0x3) >> 0;
5272 queue_id = (ring_id & 0xc) >> 2;
5273 switch (me_id) {
5274 case 0:
5275 switch (queue_id) {
5276 case 0:
5277 queue_reset = true;
5278 break;
5279 case 1:
5280 /* XXX compute */
5281 queue_reset = true;
5282 break;
5283 case 2:
5284 /* XXX compute */
5285 queue_reset = true;
5286 break;
5287 }
5288 break;
5289 case 1:
5290 switch (queue_id) {
5291 case 0:
5292 queue_reset = true;
5293 break;
5294 case 1:
5295 /* XXX compute */
5296 queue_reset = true;
5297 break;
5298 case 2:
5299 /* XXX compute */
5300 queue_reset = true;
5301 break;
5302 }
5303 break;
5304 }
5305 break;
a59781bb
AD
5306 case 233: /* GUI IDLE */
5307 DRM_DEBUG("IH: GUI idle\n");
5308 break;
5309 default:
5310 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5311 break;
5312 }
5313
5314 /* wptr/rptr are in bytes! */
5315 rptr += 16;
5316 rptr &= rdev->ih.ptr_mask;
5317 }
5318 if (queue_hotplug)
5319 schedule_work(&rdev->hotplug_work);
5320 if (queue_reset)
5321 schedule_work(&rdev->reset_work);
5322 rdev->ih.rptr = rptr;
5323 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5324 atomic_set(&rdev->ih.lock, 0);
5325
5326 /* make sure wptr hasn't changed while processing */
5327 wptr = cik_get_ih_wptr(rdev);
5328 if (wptr != rptr)
5329 goto restart_ih;
5330
5331 return IRQ_HANDLED;
5332}
7bf94a2c
AD
5333
5334/*
5335 * startup/shutdown callbacks
5336 */
5337/**
5338 * cik_startup - program the asic to a functional state
5339 *
5340 * @rdev: radeon_device pointer
5341 *
5342 * Programs the asic to a functional state (CIK).
5343 * Called by cik_init() and cik_resume().
5344 * Returns 0 for success, error for failure.
5345 */
5346static int cik_startup(struct radeon_device *rdev)
5347{
5348 struct radeon_ring *ring;
5349 int r;
5350
5351 if (rdev->flags & RADEON_IS_IGP) {
5352 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5353 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5354 r = cik_init_microcode(rdev);
5355 if (r) {
5356 DRM_ERROR("Failed to load firmware!\n");
5357 return r;
5358 }
5359 }
5360 } else {
5361 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5362 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5363 !rdev->mc_fw) {
5364 r = cik_init_microcode(rdev);
5365 if (r) {
5366 DRM_ERROR("Failed to load firmware!\n");
5367 return r;
5368 }
5369 }
5370
5371 r = ci_mc_load_microcode(rdev);
5372 if (r) {
5373 DRM_ERROR("Failed to load MC firmware!\n");
5374 return r;
5375 }
5376 }
5377
5378 r = r600_vram_scratch_init(rdev);
5379 if (r)
5380 return r;
5381
5382 cik_mc_program(rdev);
5383 r = cik_pcie_gart_enable(rdev);
5384 if (r)
5385 return r;
5386 cik_gpu_init(rdev);
5387
5388 /* allocate rlc buffers */
5389 r = si_rlc_init(rdev);
5390 if (r) {
5391 DRM_ERROR("Failed to init rlc BOs!\n");
5392 return r;
5393 }
5394
5395 /* allocate wb buffer */
5396 r = radeon_wb_init(rdev);
5397 if (r)
5398 return r;
5399
963e81f9
AD
5400 /* allocate mec buffers */
5401 r = cik_mec_init(rdev);
5402 if (r) {
5403 DRM_ERROR("Failed to init MEC BOs!\n");
5404 return r;
5405 }
5406
7bf94a2c
AD
5407 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5408 if (r) {
5409 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5410 return r;
5411 }
5412
963e81f9
AD
5413 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5414 if (r) {
5415 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5416 return r;
5417 }
5418
5419 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5420 if (r) {
5421 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5422 return r;
5423 }
5424
7bf94a2c
AD
5425 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5426 if (r) {
5427 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5428 return r;
5429 }
5430
5431 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5432 if (r) {
5433 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5434 return r;
5435 }
5436
87167bb1
CK
5437 r = cik_uvd_resume(rdev);
5438 if (!r) {
5439 r = radeon_fence_driver_start_ring(rdev,
5440 R600_RING_TYPE_UVD_INDEX);
5441 if (r)
5442 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5443 }
5444 if (r)
5445 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5446
7bf94a2c
AD
5447 /* Enable IRQ */
5448 if (!rdev->irq.installed) {
5449 r = radeon_irq_kms_init(rdev);
5450 if (r)
5451 return r;
5452 }
5453
5454 r = cik_irq_init(rdev);
5455 if (r) {
5456 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5457 radeon_irq_kms_fini(rdev);
5458 return r;
5459 }
5460 cik_irq_set(rdev);
5461
5462 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5463 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5464 CP_RB0_RPTR, CP_RB0_WPTR,
5465 0, 0xfffff, RADEON_CP_PACKET2);
5466 if (r)
5467 return r;
5468
963e81f9
AD
5469 /* set up the compute queues */
5470 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5471 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5472 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
5473 0, 0xfffff, RADEON_CP_PACKET2);
5474 if (r)
5475 return r;
5476 ring->me = 1; /* first MEC */
5477 ring->pipe = 0; /* first pipe */
5478 ring->queue = 0; /* first queue */
5479 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
5480
5481 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5482 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5483 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
5484 0, 0xffffffff, RADEON_CP_PACKET2);
5485 if (r)
5486 return r;
5487 /* dGPU only have 1 MEC */
5488 ring->me = 1; /* first MEC */
5489 ring->pipe = 0; /* first pipe */
5490 ring->queue = 1; /* second queue */
5491 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
5492
7bf94a2c
AD
5493 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5494 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5495 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
5496 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
5497 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5498 if (r)
5499 return r;
5500
5501 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5502 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5503 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
5504 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
5505 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5506 if (r)
5507 return r;
5508
5509 r = cik_cp_resume(rdev);
5510 if (r)
5511 return r;
5512
5513 r = cik_sdma_resume(rdev);
5514 if (r)
5515 return r;
5516
87167bb1
CK
5517 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5518 if (ring->ring_size) {
5519 r = radeon_ring_init(rdev, ring, ring->ring_size,
5520 R600_WB_UVD_RPTR_OFFSET,
5521 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5522 0, 0xfffff, RADEON_CP_PACKET2);
5523 if (!r)
5524 r = r600_uvd_init(rdev);
5525 if (r)
5526 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5527 }
5528
7bf94a2c
AD
5529 r = radeon_ib_pool_init(rdev);
5530 if (r) {
5531 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5532 return r;
5533 }
5534
5535 r = radeon_vm_manager_init(rdev);
5536 if (r) {
5537 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5538 return r;
5539 }
5540
5541 return 0;
5542}
5543
5544/**
5545 * cik_resume - resume the asic to a functional state
5546 *
5547 * @rdev: radeon_device pointer
5548 *
5549 * Programs the asic to a functional state (CIK).
5550 * Called at resume.
5551 * Returns 0 for success, error for failure.
5552 */
5553int cik_resume(struct radeon_device *rdev)
5554{
5555 int r;
5556
5557 /* post card */
5558 atom_asic_init(rdev->mode_info.atom_context);
5559
5560 rdev->accel_working = true;
5561 r = cik_startup(rdev);
5562 if (r) {
5563 DRM_ERROR("cik startup failed on resume\n");
5564 rdev->accel_working = false;
5565 return r;
5566 }
5567
5568 return r;
5569
5570}
5571
5572/**
5573 * cik_suspend - suspend the asic
5574 *
5575 * @rdev: radeon_device pointer
5576 *
5577 * Bring the chip into a state suitable for suspend (CIK).
5578 * Called at suspend.
5579 * Returns 0 for success.
5580 */
5581int cik_suspend(struct radeon_device *rdev)
5582{
5583 radeon_vm_manager_fini(rdev);
5584 cik_cp_enable(rdev, false);
5585 cik_sdma_enable(rdev, false);
87167bb1
CK
5586 r600_uvd_rbc_stop(rdev);
5587 radeon_uvd_suspend(rdev);
7bf94a2c
AD
5588 cik_irq_suspend(rdev);
5589 radeon_wb_disable(rdev);
5590 cik_pcie_gart_disable(rdev);
5591 return 0;
5592}
5593
5594/* Plan is to move initialization in that function and use
5595 * helper function so that radeon_device_init pretty much
5596 * do nothing more than calling asic specific function. This
5597 * should also allow to remove a bunch of callback function
5598 * like vram_info.
5599 */
5600/**
5601 * cik_init - asic specific driver and hw init
5602 *
5603 * @rdev: radeon_device pointer
5604 *
5605 * Setup asic specific driver variables and program the hw
5606 * to a functional state (CIK).
5607 * Called at driver startup.
5608 * Returns 0 for success, errors for failure.
5609 */
5610int cik_init(struct radeon_device *rdev)
5611{
5612 struct radeon_ring *ring;
5613 int r;
5614
5615 /* Read BIOS */
5616 if (!radeon_get_bios(rdev)) {
5617 if (ASIC_IS_AVIVO(rdev))
5618 return -EINVAL;
5619 }
5620 /* Must be an ATOMBIOS */
5621 if (!rdev->is_atom_bios) {
5622 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5623 return -EINVAL;
5624 }
5625 r = radeon_atombios_init(rdev);
5626 if (r)
5627 return r;
5628
5629 /* Post card if necessary */
5630 if (!radeon_card_posted(rdev)) {
5631 if (!rdev->bios) {
5632 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5633 return -EINVAL;
5634 }
5635 DRM_INFO("GPU not posted. posting now...\n");
5636 atom_asic_init(rdev->mode_info.atom_context);
5637 }
5638 /* Initialize scratch registers */
5639 cik_scratch_init(rdev);
5640 /* Initialize surface registers */
5641 radeon_surface_init(rdev);
5642 /* Initialize clocks */
5643 radeon_get_clock_info(rdev->ddev);
5644
5645 /* Fence driver */
5646 r = radeon_fence_driver_init(rdev);
5647 if (r)
5648 return r;
5649
5650 /* initialize memory controller */
5651 r = cik_mc_init(rdev);
5652 if (r)
5653 return r;
5654 /* Memory manager */
5655 r = radeon_bo_init(rdev);
5656 if (r)
5657 return r;
5658
5659 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5660 ring->ring_obj = NULL;
5661 r600_ring_init(rdev, ring, 1024 * 1024);
5662
963e81f9
AD
5663 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5664 ring->ring_obj = NULL;
5665 r600_ring_init(rdev, ring, 1024 * 1024);
5666 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5667 if (r)
5668 return r;
5669
5670 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5671 ring->ring_obj = NULL;
5672 r600_ring_init(rdev, ring, 1024 * 1024);
5673 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5674 if (r)
5675 return r;
5676
7bf94a2c
AD
5677 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5678 ring->ring_obj = NULL;
5679 r600_ring_init(rdev, ring, 256 * 1024);
5680
5681 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5682 ring->ring_obj = NULL;
5683 r600_ring_init(rdev, ring, 256 * 1024);
5684
87167bb1
CK
5685 r = radeon_uvd_init(rdev);
5686 if (!r) {
5687 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5688 ring->ring_obj = NULL;
5689 r600_ring_init(rdev, ring, 4096);
5690 }
5691
7bf94a2c
AD
5692 rdev->ih.ring_obj = NULL;
5693 r600_ih_ring_init(rdev, 64 * 1024);
5694
5695 r = r600_pcie_gart_init(rdev);
5696 if (r)
5697 return r;
5698
5699 rdev->accel_working = true;
5700 r = cik_startup(rdev);
5701 if (r) {
5702 dev_err(rdev->dev, "disabling GPU acceleration\n");
5703 cik_cp_fini(rdev);
5704 cik_sdma_fini(rdev);
5705 cik_irq_fini(rdev);
5706 si_rlc_fini(rdev);
963e81f9 5707 cik_mec_fini(rdev);
7bf94a2c
AD
5708 radeon_wb_fini(rdev);
5709 radeon_ib_pool_fini(rdev);
5710 radeon_vm_manager_fini(rdev);
5711 radeon_irq_kms_fini(rdev);
5712 cik_pcie_gart_fini(rdev);
5713 rdev->accel_working = false;
5714 }
5715
5716 /* Don't start up if the MC ucode is missing.
5717 * The default clocks and voltages before the MC ucode
5718 * is loaded are not suffient for advanced operations.
5719 */
5720 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5721 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5722 return -EINVAL;
5723 }
5724
5725 return 0;
5726}
5727
5728/**
5729 * cik_fini - asic specific driver and hw fini
5730 *
5731 * @rdev: radeon_device pointer
5732 *
5733 * Tear down the asic specific driver variables and program the hw
5734 * to an idle state (CIK).
5735 * Called at driver unload.
5736 */
5737void cik_fini(struct radeon_device *rdev)
5738{
5739 cik_cp_fini(rdev);
5740 cik_sdma_fini(rdev);
5741 cik_irq_fini(rdev);
5742 si_rlc_fini(rdev);
963e81f9 5743 cik_mec_fini(rdev);
7bf94a2c
AD
5744 radeon_wb_fini(rdev);
5745 radeon_vm_manager_fini(rdev);
5746 radeon_ib_pool_fini(rdev);
5747 radeon_irq_kms_fini(rdev);
87167bb1 5748 radeon_uvd_fini(rdev);
7bf94a2c
AD
5749 cik_pcie_gart_fini(rdev);
5750 r600_vram_scratch_fini(rdev);
5751 radeon_gem_fini(rdev);
5752 radeon_fence_driver_fini(rdev);
5753 radeon_bo_fini(rdev);
5754 radeon_atombios_fini(rdev);
5755 kfree(rdev->bios);
5756 rdev->bios = NULL;
5757}
cd84a27d
AD
5758
5759/* display watermark setup */
5760/**
5761 * dce8_line_buffer_adjust - Set up the line buffer
5762 *
5763 * @rdev: radeon_device pointer
5764 * @radeon_crtc: the selected display controller
5765 * @mode: the current display mode on the selected display
5766 * controller
5767 *
5768 * Setup up the line buffer allocation for
5769 * the selected display controller (CIK).
5770 * Returns the line buffer size in pixels.
5771 */
5772static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5773 struct radeon_crtc *radeon_crtc,
5774 struct drm_display_mode *mode)
5775{
5776 u32 tmp;
5777
5778 /*
5779 * Line Buffer Setup
5780 * There are 6 line buffers, one for each display controllers.
5781 * There are 3 partitions per LB. Select the number of partitions
5782 * to enable based on the display width. For display widths larger
5783 * than 4096, you need use to use 2 display controllers and combine
5784 * them using the stereo blender.
5785 */
5786 if (radeon_crtc->base.enabled && mode) {
5787 if (mode->crtc_hdisplay < 1920)
5788 tmp = 1;
5789 else if (mode->crtc_hdisplay < 2560)
5790 tmp = 2;
5791 else if (mode->crtc_hdisplay < 4096)
5792 tmp = 0;
5793 else {
5794 DRM_DEBUG_KMS("Mode too big for LB!\n");
5795 tmp = 0;
5796 }
5797 } else
5798 tmp = 1;
5799
5800 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5801 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5802
5803 if (radeon_crtc->base.enabled && mode) {
5804 switch (tmp) {
5805 case 0:
5806 default:
5807 return 4096 * 2;
5808 case 1:
5809 return 1920 * 2;
5810 case 2:
5811 return 2560 * 2;
5812 }
5813 }
5814
5815 /* controller not enabled, so no lb used */
5816 return 0;
5817}
5818
5819/**
5820 * cik_get_number_of_dram_channels - get the number of dram channels
5821 *
5822 * @rdev: radeon_device pointer
5823 *
5824 * Look up the number of video ram channels (CIK).
5825 * Used for display watermark bandwidth calculations
5826 * Returns the number of dram channels
5827 */
5828static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5829{
5830 u32 tmp = RREG32(MC_SHARED_CHMAP);
5831
5832 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5833 case 0:
5834 default:
5835 return 1;
5836 case 1:
5837 return 2;
5838 case 2:
5839 return 4;
5840 case 3:
5841 return 8;
5842 case 4:
5843 return 3;
5844 case 5:
5845 return 6;
5846 case 6:
5847 return 10;
5848 case 7:
5849 return 12;
5850 case 8:
5851 return 16;
5852 }
5853}
5854
5855struct dce8_wm_params {
5856 u32 dram_channels; /* number of dram channels */
5857 u32 yclk; /* bandwidth per dram data pin in kHz */
5858 u32 sclk; /* engine clock in kHz */
5859 u32 disp_clk; /* display clock in kHz */
5860 u32 src_width; /* viewport width */
5861 u32 active_time; /* active display time in ns */
5862 u32 blank_time; /* blank time in ns */
5863 bool interlaced; /* mode is interlaced */
5864 fixed20_12 vsc; /* vertical scale ratio */
5865 u32 num_heads; /* number of active crtcs */
5866 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5867 u32 lb_size; /* line buffer allocated to pipe */
5868 u32 vtaps; /* vertical scaler taps */
5869};
5870
5871/**
5872 * dce8_dram_bandwidth - get the dram bandwidth
5873 *
5874 * @wm: watermark calculation data
5875 *
5876 * Calculate the raw dram bandwidth (CIK).
5877 * Used for display watermark bandwidth calculations
5878 * Returns the dram bandwidth in MBytes/s
5879 */
5880static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5881{
5882 /* Calculate raw DRAM Bandwidth */
5883 fixed20_12 dram_efficiency; /* 0.7 */
5884 fixed20_12 yclk, dram_channels, bandwidth;
5885 fixed20_12 a;
5886
5887 a.full = dfixed_const(1000);
5888 yclk.full = dfixed_const(wm->yclk);
5889 yclk.full = dfixed_div(yclk, a);
5890 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5891 a.full = dfixed_const(10);
5892 dram_efficiency.full = dfixed_const(7);
5893 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5894 bandwidth.full = dfixed_mul(dram_channels, yclk);
5895 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5896
5897 return dfixed_trunc(bandwidth);
5898}
5899
5900/**
5901 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5902 *
5903 * @wm: watermark calculation data
5904 *
5905 * Calculate the dram bandwidth used for display (CIK).
5906 * Used for display watermark bandwidth calculations
5907 * Returns the dram bandwidth for display in MBytes/s
5908 */
5909static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5910{
5911 /* Calculate DRAM Bandwidth and the part allocated to display. */
5912 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5913 fixed20_12 yclk, dram_channels, bandwidth;
5914 fixed20_12 a;
5915
5916 a.full = dfixed_const(1000);
5917 yclk.full = dfixed_const(wm->yclk);
5918 yclk.full = dfixed_div(yclk, a);
5919 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5920 a.full = dfixed_const(10);
5921 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5922 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5923 bandwidth.full = dfixed_mul(dram_channels, yclk);
5924 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5925
5926 return dfixed_trunc(bandwidth);
5927}
5928
5929/**
5930 * dce8_data_return_bandwidth - get the data return bandwidth
5931 *
5932 * @wm: watermark calculation data
5933 *
5934 * Calculate the data return bandwidth used for display (CIK).
5935 * Used for display watermark bandwidth calculations
5936 * Returns the data return bandwidth in MBytes/s
5937 */
5938static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5939{
5940 /* Calculate the display Data return Bandwidth */
5941 fixed20_12 return_efficiency; /* 0.8 */
5942 fixed20_12 sclk, bandwidth;
5943 fixed20_12 a;
5944
5945 a.full = dfixed_const(1000);
5946 sclk.full = dfixed_const(wm->sclk);
5947 sclk.full = dfixed_div(sclk, a);
5948 a.full = dfixed_const(10);
5949 return_efficiency.full = dfixed_const(8);
5950 return_efficiency.full = dfixed_div(return_efficiency, a);
5951 a.full = dfixed_const(32);
5952 bandwidth.full = dfixed_mul(a, sclk);
5953 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5954
5955 return dfixed_trunc(bandwidth);
5956}
5957
5958/**
5959 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5960 *
5961 * @wm: watermark calculation data
5962 *
5963 * Calculate the dmif bandwidth used for display (CIK).
5964 * Used for display watermark bandwidth calculations
5965 * Returns the dmif bandwidth in MBytes/s
5966 */
5967static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5968{
5969 /* Calculate the DMIF Request Bandwidth */
5970 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5971 fixed20_12 disp_clk, bandwidth;
5972 fixed20_12 a, b;
5973
5974 a.full = dfixed_const(1000);
5975 disp_clk.full = dfixed_const(wm->disp_clk);
5976 disp_clk.full = dfixed_div(disp_clk, a);
5977 a.full = dfixed_const(32);
5978 b.full = dfixed_mul(a, disp_clk);
5979
5980 a.full = dfixed_const(10);
5981 disp_clk_request_efficiency.full = dfixed_const(8);
5982 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5983
5984 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5985
5986 return dfixed_trunc(bandwidth);
5987}
5988
5989/**
5990 * dce8_available_bandwidth - get the min available bandwidth
5991 *
5992 * @wm: watermark calculation data
5993 *
5994 * Calculate the min available bandwidth used for display (CIK).
5995 * Used for display watermark bandwidth calculations
5996 * Returns the min available bandwidth in MBytes/s
5997 */
5998static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5999{
6000 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6001 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6002 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6003 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6004
6005 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6006}
6007
6008/**
6009 * dce8_average_bandwidth - get the average available bandwidth
6010 *
6011 * @wm: watermark calculation data
6012 *
6013 * Calculate the average available bandwidth used for display (CIK).
6014 * Used for display watermark bandwidth calculations
6015 * Returns the average available bandwidth in MBytes/s
6016 */
6017static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6018{
6019 /* Calculate the display mode Average Bandwidth
6020 * DisplayMode should contain the source and destination dimensions,
6021 * timing, etc.
6022 */
6023 fixed20_12 bpp;
6024 fixed20_12 line_time;
6025 fixed20_12 src_width;
6026 fixed20_12 bandwidth;
6027 fixed20_12 a;
6028
6029 a.full = dfixed_const(1000);
6030 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6031 line_time.full = dfixed_div(line_time, a);
6032 bpp.full = dfixed_const(wm->bytes_per_pixel);
6033 src_width.full = dfixed_const(wm->src_width);
6034 bandwidth.full = dfixed_mul(src_width, bpp);
6035 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6036 bandwidth.full = dfixed_div(bandwidth, line_time);
6037
6038 return dfixed_trunc(bandwidth);
6039}
6040
6041/**
6042 * dce8_latency_watermark - get the latency watermark
6043 *
6044 * @wm: watermark calculation data
6045 *
6046 * Calculate the latency watermark (CIK).
6047 * Used for display watermark bandwidth calculations
6048 * Returns the latency watermark in ns
6049 */
6050static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6051{
6052 /* First calculate the latency in ns */
6053 u32 mc_latency = 2000; /* 2000 ns. */
6054 u32 available_bandwidth = dce8_available_bandwidth(wm);
6055 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6056 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6057 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6058 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6059 (wm->num_heads * cursor_line_pair_return_time);
6060 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6061 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6062 u32 tmp, dmif_size = 12288;
6063 fixed20_12 a, b, c;
6064
6065 if (wm->num_heads == 0)
6066 return 0;
6067
6068 a.full = dfixed_const(2);
6069 b.full = dfixed_const(1);
6070 if ((wm->vsc.full > a.full) ||
6071 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6072 (wm->vtaps >= 5) ||
6073 ((wm->vsc.full >= a.full) && wm->interlaced))
6074 max_src_lines_per_dst_line = 4;
6075 else
6076 max_src_lines_per_dst_line = 2;
6077
6078 a.full = dfixed_const(available_bandwidth);
6079 b.full = dfixed_const(wm->num_heads);
6080 a.full = dfixed_div(a, b);
6081
6082 b.full = dfixed_const(mc_latency + 512);
6083 c.full = dfixed_const(wm->disp_clk);
6084 b.full = dfixed_div(b, c);
6085
6086 c.full = dfixed_const(dmif_size);
6087 b.full = dfixed_div(c, b);
6088
6089 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6090
6091 b.full = dfixed_const(1000);
6092 c.full = dfixed_const(wm->disp_clk);
6093 b.full = dfixed_div(c, b);
6094 c.full = dfixed_const(wm->bytes_per_pixel);
6095 b.full = dfixed_mul(b, c);
6096
6097 lb_fill_bw = min(tmp, dfixed_trunc(b));
6098
6099 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6100 b.full = dfixed_const(1000);
6101 c.full = dfixed_const(lb_fill_bw);
6102 b.full = dfixed_div(c, b);
6103 a.full = dfixed_div(a, b);
6104 line_fill_time = dfixed_trunc(a);
6105
6106 if (line_fill_time < wm->active_time)
6107 return latency;
6108 else
6109 return latency + (line_fill_time - wm->active_time);
6110
6111}
6112
6113/**
6114 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6115 * average and available dram bandwidth
6116 *
6117 * @wm: watermark calculation data
6118 *
6119 * Check if the display average bandwidth fits in the display
6120 * dram bandwidth (CIK).
6121 * Used for display watermark bandwidth calculations
6122 * Returns true if the display fits, false if not.
6123 */
6124static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6125{
6126 if (dce8_average_bandwidth(wm) <=
6127 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6128 return true;
6129 else
6130 return false;
6131}
6132
6133/**
6134 * dce8_average_bandwidth_vs_available_bandwidth - check
6135 * average and available bandwidth
6136 *
6137 * @wm: watermark calculation data
6138 *
6139 * Check if the display average bandwidth fits in the display
6140 * available bandwidth (CIK).
6141 * Used for display watermark bandwidth calculations
6142 * Returns true if the display fits, false if not.
6143 */
6144static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6145{
6146 if (dce8_average_bandwidth(wm) <=
6147 (dce8_available_bandwidth(wm) / wm->num_heads))
6148 return true;
6149 else
6150 return false;
6151}
6152
6153/**
6154 * dce8_check_latency_hiding - check latency hiding
6155 *
6156 * @wm: watermark calculation data
6157 *
6158 * Check latency hiding (CIK).
6159 * Used for display watermark bandwidth calculations
6160 * Returns true if the display fits, false if not.
6161 */
6162static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6163{
6164 u32 lb_partitions = wm->lb_size / wm->src_width;
6165 u32 line_time = wm->active_time + wm->blank_time;
6166 u32 latency_tolerant_lines;
6167 u32 latency_hiding;
6168 fixed20_12 a;
6169
6170 a.full = dfixed_const(1);
6171 if (wm->vsc.full > a.full)
6172 latency_tolerant_lines = 1;
6173 else {
6174 if (lb_partitions <= (wm->vtaps + 1))
6175 latency_tolerant_lines = 1;
6176 else
6177 latency_tolerant_lines = 2;
6178 }
6179
6180 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6181
6182 if (dce8_latency_watermark(wm) <= latency_hiding)
6183 return true;
6184 else
6185 return false;
6186}
6187
6188/**
6189 * dce8_program_watermarks - program display watermarks
6190 *
6191 * @rdev: radeon_device pointer
6192 * @radeon_crtc: the selected display controller
6193 * @lb_size: line buffer size
6194 * @num_heads: number of display controllers in use
6195 *
6196 * Calculate and program the display watermarks for the
6197 * selected display controller (CIK).
6198 */
6199static void dce8_program_watermarks(struct radeon_device *rdev,
6200 struct radeon_crtc *radeon_crtc,
6201 u32 lb_size, u32 num_heads)
6202{
6203 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6204 struct dce8_wm_params wm;
6205 u32 pixel_period;
6206 u32 line_time = 0;
6207 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6208 u32 tmp, wm_mask;
6209
6210 if (radeon_crtc->base.enabled && num_heads && mode) {
6211 pixel_period = 1000000 / (u32)mode->clock;
6212 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6213
6214 wm.yclk = rdev->pm.current_mclk * 10;
6215 wm.sclk = rdev->pm.current_sclk * 10;
6216 wm.disp_clk = mode->clock;
6217 wm.src_width = mode->crtc_hdisplay;
6218 wm.active_time = mode->crtc_hdisplay * pixel_period;
6219 wm.blank_time = line_time - wm.active_time;
6220 wm.interlaced = false;
6221 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6222 wm.interlaced = true;
6223 wm.vsc = radeon_crtc->vsc;
6224 wm.vtaps = 1;
6225 if (radeon_crtc->rmx_type != RMX_OFF)
6226 wm.vtaps = 2;
6227 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6228 wm.lb_size = lb_size;
6229 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6230 wm.num_heads = num_heads;
6231
6232 /* set for high clocks */
6233 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6234 /* set for low clocks */
6235 /* wm.yclk = low clk; wm.sclk = low clk */
6236 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6237
6238 /* possibly force display priority to high */
6239 /* should really do this at mode validation time... */
6240 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6241 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6242 !dce8_check_latency_hiding(&wm) ||
6243 (rdev->disp_priority == 2)) {
6244 DRM_DEBUG_KMS("force priority to high\n");
6245 }
6246 }
6247
6248 /* select wm A */
6249 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6250 tmp = wm_mask;
6251 tmp &= ~LATENCY_WATERMARK_MASK(3);
6252 tmp |= LATENCY_WATERMARK_MASK(1);
6253 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6254 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6255 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6256 LATENCY_HIGH_WATERMARK(line_time)));
6257 /* select wm B */
6258 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6259 tmp &= ~LATENCY_WATERMARK_MASK(3);
6260 tmp |= LATENCY_WATERMARK_MASK(2);
6261 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6262 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6263 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6264 LATENCY_HIGH_WATERMARK(line_time)));
6265 /* restore original selection */
6266 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6267}
6268
6269/**
6270 * dce8_bandwidth_update - program display watermarks
6271 *
6272 * @rdev: radeon_device pointer
6273 *
6274 * Calculate and program the display watermarks and line
6275 * buffer allocation (CIK).
6276 */
6277void dce8_bandwidth_update(struct radeon_device *rdev)
6278{
6279 struct drm_display_mode *mode = NULL;
6280 u32 num_heads = 0, lb_size;
6281 int i;
6282
6283 radeon_update_display_priority(rdev);
6284
6285 for (i = 0; i < rdev->num_crtc; i++) {
6286 if (rdev->mode_info.crtcs[i]->base.enabled)
6287 num_heads++;
6288 }
6289 for (i = 0; i < rdev->num_crtc; i++) {
6290 mode = &rdev->mode_info.crtcs[i]->base.mode;
6291 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6292 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6293 }
6294}
44fa346f
AD
6295
6296/**
6297 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6298 *
6299 * @rdev: radeon_device pointer
6300 *
6301 * Fetches a GPU clock counter snapshot (SI).
6302 * Returns the 64 bit clock counter snapshot.
6303 */
6304uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6305{
6306 uint64_t clock;
6307
6308 mutex_lock(&rdev->gpu_clock_mutex);
6309 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6310 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6311 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6312 mutex_unlock(&rdev->gpu_clock_mutex);
6313 return clock;
6314}
6315
87167bb1
CK
6316static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6317 u32 cntl_reg, u32 status_reg)
6318{
6319 int r, i;
6320 struct atom_clock_dividers dividers;
6321 uint32_t tmp;
6322
6323 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6324 clock, false, &dividers);
6325 if (r)
6326 return r;
6327
6328 tmp = RREG32_SMC(cntl_reg);
6329 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6330 tmp |= dividers.post_divider;
6331 WREG32_SMC(cntl_reg, tmp);
6332
6333 for (i = 0; i < 100; i++) {
6334 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6335 break;
6336 mdelay(10);
6337 }
6338 if (i == 100)
6339 return -ETIMEDOUT;
6340
6341 return 0;
6342}
6343
6344int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6345{
6346 int r = 0;
6347
6348 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6349 if (r)
6350 return r;
6351
6352 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6353 return r;
6354}
6355
6356int cik_uvd_resume(struct radeon_device *rdev)
6357{
6358 uint64_t addr;
6359 uint32_t size;
6360 int r;
6361
6362 r = radeon_uvd_resume(rdev);
6363 if (r)
6364 return r;
6365
6366 /* programm the VCPU memory controller bits 0-27 */
6367 addr = rdev->uvd.gpu_addr >> 3;
6368 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6369 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6370 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6371
6372 addr += size;
6373 size = RADEON_UVD_STACK_SIZE >> 3;
6374 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6375 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6376
6377 addr += size;
6378 size = RADEON_UVD_HEAP_SIZE >> 3;
6379 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6380 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6381
6382 /* bits 28-31 */
6383 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6384 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6385
6386 /* bits 32-39 */
6387 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6388 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6389
6390 return 0;
6391}