]> git.ipfire.org Git - people/ms/linux.git/blame - drivers/gpu/drm/radeon/cik.c
drm/radeon: add missing ttm_eu_backoff_reservation to radeon_bo_list_validate
[people/ms/linux.git] / drivers / gpu / drm / radeon / cik.c
CommitLineData
8cc1a532
AD
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
6f2043ce 30#include "radeon_asic.h"
8cc1a532
AD
31#include "cikd.h"
32#include "atom.h"
841cf442 33#include "cik_blit_shaders.h"
8cc1a532 34
02c81327
AD
35/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
21a93e13
AD
47/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
02c81327
AD
50
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
21a93e13 57MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
02c81327
AD
58MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
21a93e13 63MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
02c81327
AD
64MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
21a93e13 69MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
02c81327 70
a59781bb
AD
71extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
6f2043ce
AD
73extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
cc066715 75extern bool evergreen_is_display_hung(struct radeon_device *rdev);
1c49165d 76extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
7bf94a2c
AD
77extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
cc066715 79static void cik_rlc_stop(struct radeon_device *rdev);
6f2043ce 80
6e2c3c0a
AD
81/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
0aafd313
AD
102static const u32 bonaire_golden_spm_registers[] =
103{
104 0x30800, 0xe0ffffff, 0xe0000000
105};
106
107static const u32 bonaire_golden_common_registers[] =
108{
109 0xc770, 0xffffffff, 0x00000800,
110 0xc774, 0xffffffff, 0x00000800,
111 0xc798, 0xffffffff, 0x00007fbf,
112 0xc79c, 0xffffffff, 0x00007faf
113};
114
115static const u32 bonaire_golden_registers[] =
116{
117 0x3354, 0x00000333, 0x00000333,
118 0x3350, 0x000c0fc0, 0x00040200,
119 0x9a10, 0x00010000, 0x00058208,
120 0x3c000, 0xffff1fff, 0x00140000,
121 0x3c200, 0xfdfc0fff, 0x00000100,
122 0x3c234, 0x40000000, 0x40000200,
123 0x9830, 0xffffffff, 0x00000000,
124 0x9834, 0xf00fffff, 0x00000400,
125 0x9838, 0x0002021c, 0x00020200,
126 0xc78, 0x00000080, 0x00000000,
127 0x5bb0, 0x000000f0, 0x00000070,
128 0x5bc0, 0xf0311fff, 0x80300000,
129 0x98f8, 0x73773777, 0x12010001,
130 0x350c, 0x00810000, 0x408af000,
131 0x7030, 0x31000111, 0x00000011,
132 0x2f48, 0x73773777, 0x12010001,
133 0x220c, 0x00007fb6, 0x0021a1b1,
134 0x2210, 0x00007fb6, 0x002021b1,
135 0x2180, 0x00007fb6, 0x00002191,
136 0x2218, 0x00007fb6, 0x002121b1,
137 0x221c, 0x00007fb6, 0x002021b1,
138 0x21dc, 0x00007fb6, 0x00002191,
139 0x21e0, 0x00007fb6, 0x00002191,
140 0x3628, 0x0000003f, 0x0000000a,
141 0x362c, 0x0000003f, 0x0000000a,
142 0x2ae4, 0x00073ffe, 0x000022a2,
143 0x240c, 0x000007ff, 0x00000000,
144 0x8a14, 0xf000003f, 0x00000007,
145 0x8bf0, 0x00002001, 0x00000001,
146 0x8b24, 0xffffffff, 0x00ffffff,
147 0x30a04, 0x0000ff0f, 0x00000000,
148 0x28a4c, 0x07ffffff, 0x06000000,
149 0x4d8, 0x00000fff, 0x00000100,
150 0x3e78, 0x00000001, 0x00000002,
151 0x9100, 0x03000000, 0x0362c688,
152 0x8c00, 0x000000ff, 0x00000001,
153 0xe40, 0x00001fff, 0x00001fff,
154 0x9060, 0x0000007f, 0x00000020,
155 0x9508, 0x00010000, 0x00010000,
156 0xac14, 0x000003ff, 0x000000f3,
157 0xac0c, 0xffffffff, 0x00001032
158};
159
160static const u32 bonaire_mgcg_cgcg_init[] =
161{
162 0xc420, 0xffffffff, 0xfffffffc,
163 0x30800, 0xffffffff, 0xe0000000,
164 0x3c2a0, 0xffffffff, 0x00000100,
165 0x3c208, 0xffffffff, 0x00000100,
166 0x3c2c0, 0xffffffff, 0xc0000100,
167 0x3c2c8, 0xffffffff, 0xc0000100,
168 0x3c2c4, 0xffffffff, 0xc0000100,
169 0x55e4, 0xffffffff, 0x00600100,
170 0x3c280, 0xffffffff, 0x00000100,
171 0x3c214, 0xffffffff, 0x06000100,
172 0x3c220, 0xffffffff, 0x00000100,
173 0x3c218, 0xffffffff, 0x06000100,
174 0x3c204, 0xffffffff, 0x00000100,
175 0x3c2e0, 0xffffffff, 0x00000100,
176 0x3c224, 0xffffffff, 0x00000100,
177 0x3c200, 0xffffffff, 0x00000100,
178 0x3c230, 0xffffffff, 0x00000100,
179 0x3c234, 0xffffffff, 0x00000100,
180 0x3c250, 0xffffffff, 0x00000100,
181 0x3c254, 0xffffffff, 0x00000100,
182 0x3c258, 0xffffffff, 0x00000100,
183 0x3c25c, 0xffffffff, 0x00000100,
184 0x3c260, 0xffffffff, 0x00000100,
185 0x3c27c, 0xffffffff, 0x00000100,
186 0x3c278, 0xffffffff, 0x00000100,
187 0x3c210, 0xffffffff, 0x06000100,
188 0x3c290, 0xffffffff, 0x00000100,
189 0x3c274, 0xffffffff, 0x00000100,
190 0x3c2b4, 0xffffffff, 0x00000100,
191 0x3c2b0, 0xffffffff, 0x00000100,
192 0x3c270, 0xffffffff, 0x00000100,
193 0x30800, 0xffffffff, 0xe0000000,
194 0x3c020, 0xffffffff, 0x00010000,
195 0x3c024, 0xffffffff, 0x00030002,
196 0x3c028, 0xffffffff, 0x00040007,
197 0x3c02c, 0xffffffff, 0x00060005,
198 0x3c030, 0xffffffff, 0x00090008,
199 0x3c034, 0xffffffff, 0x00010000,
200 0x3c038, 0xffffffff, 0x00030002,
201 0x3c03c, 0xffffffff, 0x00040007,
202 0x3c040, 0xffffffff, 0x00060005,
203 0x3c044, 0xffffffff, 0x00090008,
204 0x3c048, 0xffffffff, 0x00010000,
205 0x3c04c, 0xffffffff, 0x00030002,
206 0x3c050, 0xffffffff, 0x00040007,
207 0x3c054, 0xffffffff, 0x00060005,
208 0x3c058, 0xffffffff, 0x00090008,
209 0x3c05c, 0xffffffff, 0x00010000,
210 0x3c060, 0xffffffff, 0x00030002,
211 0x3c064, 0xffffffff, 0x00040007,
212 0x3c068, 0xffffffff, 0x00060005,
213 0x3c06c, 0xffffffff, 0x00090008,
214 0x3c070, 0xffffffff, 0x00010000,
215 0x3c074, 0xffffffff, 0x00030002,
216 0x3c078, 0xffffffff, 0x00040007,
217 0x3c07c, 0xffffffff, 0x00060005,
218 0x3c080, 0xffffffff, 0x00090008,
219 0x3c084, 0xffffffff, 0x00010000,
220 0x3c088, 0xffffffff, 0x00030002,
221 0x3c08c, 0xffffffff, 0x00040007,
222 0x3c090, 0xffffffff, 0x00060005,
223 0x3c094, 0xffffffff, 0x00090008,
224 0x3c098, 0xffffffff, 0x00010000,
225 0x3c09c, 0xffffffff, 0x00030002,
226 0x3c0a0, 0xffffffff, 0x00040007,
227 0x3c0a4, 0xffffffff, 0x00060005,
228 0x3c0a8, 0xffffffff, 0x00090008,
229 0x3c000, 0xffffffff, 0x96e00200,
230 0x8708, 0xffffffff, 0x00900100,
231 0xc424, 0xffffffff, 0x0020003f,
232 0x38, 0xffffffff, 0x0140001c,
233 0x3c, 0x000f0000, 0x000f0000,
234 0x220, 0xffffffff, 0xC060000C,
235 0x224, 0xc0000fff, 0x00000100,
236 0xf90, 0xffffffff, 0x00000100,
237 0xf98, 0x00000101, 0x00000000,
238 0x20a8, 0xffffffff, 0x00000104,
239 0x55e4, 0xff000fff, 0x00000100,
240 0x30cc, 0xc0000fff, 0x00000104,
241 0xc1e4, 0x00000001, 0x00000001,
242 0xd00c, 0xff000ff0, 0x00000100,
243 0xd80c, 0xff000ff0, 0x00000100
244};
245
246static const u32 spectre_golden_spm_registers[] =
247{
248 0x30800, 0xe0ffffff, 0xe0000000
249};
250
251static const u32 spectre_golden_common_registers[] =
252{
253 0xc770, 0xffffffff, 0x00000800,
254 0xc774, 0xffffffff, 0x00000800,
255 0xc798, 0xffffffff, 0x00007fbf,
256 0xc79c, 0xffffffff, 0x00007faf
257};
258
259static const u32 spectre_golden_registers[] =
260{
261 0x3c000, 0xffff1fff, 0x96940200,
262 0x3c00c, 0xffff0001, 0xff000000,
263 0x3c200, 0xfffc0fff, 0x00000100,
264 0x6ed8, 0x00010101, 0x00010000,
265 0x9834, 0xf00fffff, 0x00000400,
266 0x9838, 0xfffffffc, 0x00020200,
267 0x5bb0, 0x000000f0, 0x00000070,
268 0x5bc0, 0xf0311fff, 0x80300000,
269 0x98f8, 0x73773777, 0x12010001,
270 0x9b7c, 0x00ff0000, 0x00fc0000,
271 0x2f48, 0x73773777, 0x12010001,
272 0x8a14, 0xf000003f, 0x00000007,
273 0x8b24, 0xffffffff, 0x00ffffff,
274 0x28350, 0x3f3f3fff, 0x00000082,
275 0x28355, 0x0000003f, 0x00000000,
276 0x3e78, 0x00000001, 0x00000002,
277 0x913c, 0xffff03df, 0x00000004,
278 0xc768, 0x00000008, 0x00000008,
279 0x8c00, 0x000008ff, 0x00000800,
280 0x9508, 0x00010000, 0x00010000,
281 0xac0c, 0xffffffff, 0x54763210,
282 0x214f8, 0x01ff01ff, 0x00000002,
283 0x21498, 0x007ff800, 0x00200000,
284 0x2015c, 0xffffffff, 0x00000f40,
285 0x30934, 0xffffffff, 0x00000001
286};
287
288static const u32 spectre_mgcg_cgcg_init[] =
289{
290 0xc420, 0xffffffff, 0xfffffffc,
291 0x30800, 0xffffffff, 0xe0000000,
292 0x3c2a0, 0xffffffff, 0x00000100,
293 0x3c208, 0xffffffff, 0x00000100,
294 0x3c2c0, 0xffffffff, 0x00000100,
295 0x3c2c8, 0xffffffff, 0x00000100,
296 0x3c2c4, 0xffffffff, 0x00000100,
297 0x55e4, 0xffffffff, 0x00600100,
298 0x3c280, 0xffffffff, 0x00000100,
299 0x3c214, 0xffffffff, 0x06000100,
300 0x3c220, 0xffffffff, 0x00000100,
301 0x3c218, 0xffffffff, 0x06000100,
302 0x3c204, 0xffffffff, 0x00000100,
303 0x3c2e0, 0xffffffff, 0x00000100,
304 0x3c224, 0xffffffff, 0x00000100,
305 0x3c200, 0xffffffff, 0x00000100,
306 0x3c230, 0xffffffff, 0x00000100,
307 0x3c234, 0xffffffff, 0x00000100,
308 0x3c250, 0xffffffff, 0x00000100,
309 0x3c254, 0xffffffff, 0x00000100,
310 0x3c258, 0xffffffff, 0x00000100,
311 0x3c25c, 0xffffffff, 0x00000100,
312 0x3c260, 0xffffffff, 0x00000100,
313 0x3c27c, 0xffffffff, 0x00000100,
314 0x3c278, 0xffffffff, 0x00000100,
315 0x3c210, 0xffffffff, 0x06000100,
316 0x3c290, 0xffffffff, 0x00000100,
317 0x3c274, 0xffffffff, 0x00000100,
318 0x3c2b4, 0xffffffff, 0x00000100,
319 0x3c2b0, 0xffffffff, 0x00000100,
320 0x3c270, 0xffffffff, 0x00000100,
321 0x30800, 0xffffffff, 0xe0000000,
322 0x3c020, 0xffffffff, 0x00010000,
323 0x3c024, 0xffffffff, 0x00030002,
324 0x3c028, 0xffffffff, 0x00040007,
325 0x3c02c, 0xffffffff, 0x00060005,
326 0x3c030, 0xffffffff, 0x00090008,
327 0x3c034, 0xffffffff, 0x00010000,
328 0x3c038, 0xffffffff, 0x00030002,
329 0x3c03c, 0xffffffff, 0x00040007,
330 0x3c040, 0xffffffff, 0x00060005,
331 0x3c044, 0xffffffff, 0x00090008,
332 0x3c048, 0xffffffff, 0x00010000,
333 0x3c04c, 0xffffffff, 0x00030002,
334 0x3c050, 0xffffffff, 0x00040007,
335 0x3c054, 0xffffffff, 0x00060005,
336 0x3c058, 0xffffffff, 0x00090008,
337 0x3c05c, 0xffffffff, 0x00010000,
338 0x3c060, 0xffffffff, 0x00030002,
339 0x3c064, 0xffffffff, 0x00040007,
340 0x3c068, 0xffffffff, 0x00060005,
341 0x3c06c, 0xffffffff, 0x00090008,
342 0x3c070, 0xffffffff, 0x00010000,
343 0x3c074, 0xffffffff, 0x00030002,
344 0x3c078, 0xffffffff, 0x00040007,
345 0x3c07c, 0xffffffff, 0x00060005,
346 0x3c080, 0xffffffff, 0x00090008,
347 0x3c084, 0xffffffff, 0x00010000,
348 0x3c088, 0xffffffff, 0x00030002,
349 0x3c08c, 0xffffffff, 0x00040007,
350 0x3c090, 0xffffffff, 0x00060005,
351 0x3c094, 0xffffffff, 0x00090008,
352 0x3c098, 0xffffffff, 0x00010000,
353 0x3c09c, 0xffffffff, 0x00030002,
354 0x3c0a0, 0xffffffff, 0x00040007,
355 0x3c0a4, 0xffffffff, 0x00060005,
356 0x3c0a8, 0xffffffff, 0x00090008,
357 0x3c0ac, 0xffffffff, 0x00010000,
358 0x3c0b0, 0xffffffff, 0x00030002,
359 0x3c0b4, 0xffffffff, 0x00040007,
360 0x3c0b8, 0xffffffff, 0x00060005,
361 0x3c0bc, 0xffffffff, 0x00090008,
362 0x3c000, 0xffffffff, 0x96e00200,
363 0x8708, 0xffffffff, 0x00900100,
364 0xc424, 0xffffffff, 0x0020003f,
365 0x38, 0xffffffff, 0x0140001c,
366 0x3c, 0x000f0000, 0x000f0000,
367 0x220, 0xffffffff, 0xC060000C,
368 0x224, 0xc0000fff, 0x00000100,
369 0xf90, 0xffffffff, 0x00000100,
370 0xf98, 0x00000101, 0x00000000,
371 0x20a8, 0xffffffff, 0x00000104,
372 0x55e4, 0xff000fff, 0x00000100,
373 0x30cc, 0xc0000fff, 0x00000104,
374 0xc1e4, 0x00000001, 0x00000001,
375 0xd00c, 0xff000ff0, 0x00000100,
376 0xd80c, 0xff000ff0, 0x00000100
377};
378
379static const u32 kalindi_golden_spm_registers[] =
380{
381 0x30800, 0xe0ffffff, 0xe0000000
382};
383
384static const u32 kalindi_golden_common_registers[] =
385{
386 0xc770, 0xffffffff, 0x00000800,
387 0xc774, 0xffffffff, 0x00000800,
388 0xc798, 0xffffffff, 0x00007fbf,
389 0xc79c, 0xffffffff, 0x00007faf
390};
391
392static const u32 kalindi_golden_registers[] =
393{
394 0x3c000, 0xffffdfff, 0x6e944040,
395 0x55e4, 0xff607fff, 0xfc000100,
396 0x3c220, 0xff000fff, 0x00000100,
397 0x3c224, 0xff000fff, 0x00000100,
398 0x3c200, 0xfffc0fff, 0x00000100,
399 0x6ed8, 0x00010101, 0x00010000,
400 0x9830, 0xffffffff, 0x00000000,
401 0x9834, 0xf00fffff, 0x00000400,
402 0x5bb0, 0x000000f0, 0x00000070,
403 0x5bc0, 0xf0311fff, 0x80300000,
404 0x98f8, 0x73773777, 0x12010001,
405 0x98fc, 0xffffffff, 0x00000010,
406 0x9b7c, 0x00ff0000, 0x00fc0000,
407 0x8030, 0x00001f0f, 0x0000100a,
408 0x2f48, 0x73773777, 0x12010001,
409 0x2408, 0x000fffff, 0x000c007f,
410 0x8a14, 0xf000003f, 0x00000007,
411 0x8b24, 0x3fff3fff, 0x00ffcfff,
412 0x30a04, 0x0000ff0f, 0x00000000,
413 0x28a4c, 0x07ffffff, 0x06000000,
414 0x4d8, 0x00000fff, 0x00000100,
415 0x3e78, 0x00000001, 0x00000002,
416 0xc768, 0x00000008, 0x00000008,
417 0x8c00, 0x000000ff, 0x00000003,
418 0x214f8, 0x01ff01ff, 0x00000002,
419 0x21498, 0x007ff800, 0x00200000,
420 0x2015c, 0xffffffff, 0x00000f40,
421 0x88c4, 0x001f3ae3, 0x00000082,
422 0x88d4, 0x0000001f, 0x00000010,
423 0x30934, 0xffffffff, 0x00000000
424};
425
426static const u32 kalindi_mgcg_cgcg_init[] =
427{
428 0xc420, 0xffffffff, 0xfffffffc,
429 0x30800, 0xffffffff, 0xe0000000,
430 0x3c2a0, 0xffffffff, 0x00000100,
431 0x3c208, 0xffffffff, 0x00000100,
432 0x3c2c0, 0xffffffff, 0x00000100,
433 0x3c2c8, 0xffffffff, 0x00000100,
434 0x3c2c4, 0xffffffff, 0x00000100,
435 0x55e4, 0xffffffff, 0x00600100,
436 0x3c280, 0xffffffff, 0x00000100,
437 0x3c214, 0xffffffff, 0x06000100,
438 0x3c220, 0xffffffff, 0x00000100,
439 0x3c218, 0xffffffff, 0x06000100,
440 0x3c204, 0xffffffff, 0x00000100,
441 0x3c2e0, 0xffffffff, 0x00000100,
442 0x3c224, 0xffffffff, 0x00000100,
443 0x3c200, 0xffffffff, 0x00000100,
444 0x3c230, 0xffffffff, 0x00000100,
445 0x3c234, 0xffffffff, 0x00000100,
446 0x3c250, 0xffffffff, 0x00000100,
447 0x3c254, 0xffffffff, 0x00000100,
448 0x3c258, 0xffffffff, 0x00000100,
449 0x3c25c, 0xffffffff, 0x00000100,
450 0x3c260, 0xffffffff, 0x00000100,
451 0x3c27c, 0xffffffff, 0x00000100,
452 0x3c278, 0xffffffff, 0x00000100,
453 0x3c210, 0xffffffff, 0x06000100,
454 0x3c290, 0xffffffff, 0x00000100,
455 0x3c274, 0xffffffff, 0x00000100,
456 0x3c2b4, 0xffffffff, 0x00000100,
457 0x3c2b0, 0xffffffff, 0x00000100,
458 0x3c270, 0xffffffff, 0x00000100,
459 0x30800, 0xffffffff, 0xe0000000,
460 0x3c020, 0xffffffff, 0x00010000,
461 0x3c024, 0xffffffff, 0x00030002,
462 0x3c028, 0xffffffff, 0x00040007,
463 0x3c02c, 0xffffffff, 0x00060005,
464 0x3c030, 0xffffffff, 0x00090008,
465 0x3c034, 0xffffffff, 0x00010000,
466 0x3c038, 0xffffffff, 0x00030002,
467 0x3c03c, 0xffffffff, 0x00040007,
468 0x3c040, 0xffffffff, 0x00060005,
469 0x3c044, 0xffffffff, 0x00090008,
470 0x3c000, 0xffffffff, 0x96e00200,
471 0x8708, 0xffffffff, 0x00900100,
472 0xc424, 0xffffffff, 0x0020003f,
473 0x38, 0xffffffff, 0x0140001c,
474 0x3c, 0x000f0000, 0x000f0000,
475 0x220, 0xffffffff, 0xC060000C,
476 0x224, 0xc0000fff, 0x00000100,
477 0x20a8, 0xffffffff, 0x00000104,
478 0x55e4, 0xff000fff, 0x00000100,
479 0x30cc, 0xc0000fff, 0x00000104,
480 0xc1e4, 0x00000001, 0x00000001,
481 0xd00c, 0xff000ff0, 0x00000100,
482 0xd80c, 0xff000ff0, 0x00000100
483};
484
485static void cik_init_golden_registers(struct radeon_device *rdev)
486{
487 switch (rdev->family) {
488 case CHIP_BONAIRE:
489 radeon_program_register_sequence(rdev,
490 bonaire_mgcg_cgcg_init,
491 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
492 radeon_program_register_sequence(rdev,
493 bonaire_golden_registers,
494 (const u32)ARRAY_SIZE(bonaire_golden_registers));
495 radeon_program_register_sequence(rdev,
496 bonaire_golden_common_registers,
497 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
498 radeon_program_register_sequence(rdev,
499 bonaire_golden_spm_registers,
500 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
501 break;
502 case CHIP_KABINI:
503 radeon_program_register_sequence(rdev,
504 kalindi_mgcg_cgcg_init,
505 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
506 radeon_program_register_sequence(rdev,
507 kalindi_golden_registers,
508 (const u32)ARRAY_SIZE(kalindi_golden_registers));
509 radeon_program_register_sequence(rdev,
510 kalindi_golden_common_registers,
511 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
512 radeon_program_register_sequence(rdev,
513 kalindi_golden_spm_registers,
514 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
515 break;
516 case CHIP_KAVERI:
517 radeon_program_register_sequence(rdev,
518 spectre_mgcg_cgcg_init,
519 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
520 radeon_program_register_sequence(rdev,
521 spectre_golden_registers,
522 (const u32)ARRAY_SIZE(spectre_golden_registers));
523 radeon_program_register_sequence(rdev,
524 spectre_golden_common_registers,
525 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
526 radeon_program_register_sequence(rdev,
527 spectre_golden_spm_registers,
528 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
529 break;
530 default:
531 break;
532 }
533}
534
2c67912c
AD
535/**
536 * cik_get_xclk - get the xclk
537 *
538 * @rdev: radeon_device pointer
539 *
540 * Returns the reference clock used by the gfx engine
541 * (CIK).
542 */
543u32 cik_get_xclk(struct radeon_device *rdev)
544{
545 u32 reference_clock = rdev->clock.spll.reference_freq;
546
547 if (rdev->flags & RADEON_IS_IGP) {
548 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
549 return reference_clock / 2;
550 } else {
551 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
552 return reference_clock / 4;
553 }
554 return reference_clock;
555}
556
75efdee1
AD
557/**
558 * cik_mm_rdoorbell - read a doorbell dword
559 *
560 * @rdev: radeon_device pointer
561 * @offset: byte offset into the aperture
562 *
563 * Returns the value in the doorbell aperture at the
564 * requested offset (CIK).
565 */
566u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
567{
568 if (offset < rdev->doorbell.size) {
569 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
570 } else {
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
572 return 0;
573 }
574}
575
576/**
577 * cik_mm_wdoorbell - write a doorbell dword
578 *
579 * @rdev: radeon_device pointer
580 * @offset: byte offset into the aperture
581 * @v: value to write
582 *
583 * Writes @v to the doorbell aperture at the
584 * requested offset (CIK).
585 */
586void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
587{
588 if (offset < rdev->doorbell.size) {
589 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
590 } else {
591 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
592 }
593}
594
bc8273fe
AD
595#define BONAIRE_IO_MC_REGS_SIZE 36
596
597static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
598{
599 {0x00000070, 0x04400000},
600 {0x00000071, 0x80c01803},
601 {0x00000072, 0x00004004},
602 {0x00000073, 0x00000100},
603 {0x00000074, 0x00ff0000},
604 {0x00000075, 0x34000000},
605 {0x00000076, 0x08000014},
606 {0x00000077, 0x00cc08ec},
607 {0x00000078, 0x00000400},
608 {0x00000079, 0x00000000},
609 {0x0000007a, 0x04090000},
610 {0x0000007c, 0x00000000},
611 {0x0000007e, 0x4408a8e8},
612 {0x0000007f, 0x00000304},
613 {0x00000080, 0x00000000},
614 {0x00000082, 0x00000001},
615 {0x00000083, 0x00000002},
616 {0x00000084, 0xf3e4f400},
617 {0x00000085, 0x052024e3},
618 {0x00000087, 0x00000000},
619 {0x00000088, 0x01000000},
620 {0x0000008a, 0x1c0a0000},
621 {0x0000008b, 0xff010000},
622 {0x0000008d, 0xffffefff},
623 {0x0000008e, 0xfff3efff},
624 {0x0000008f, 0xfff3efbf},
625 {0x00000092, 0xf7ffffff},
626 {0x00000093, 0xffffff7f},
627 {0x00000095, 0x00101101},
628 {0x00000096, 0x00000fff},
629 {0x00000097, 0x00116fff},
630 {0x00000098, 0x60010000},
631 {0x00000099, 0x10010000},
632 {0x0000009a, 0x00006000},
633 {0x0000009b, 0x00001000},
634 {0x0000009f, 0x00b48000}
635};
636
b556b12e
AD
637/**
638 * cik_srbm_select - select specific register instances
639 *
640 * @rdev: radeon_device pointer
641 * @me: selected ME (micro engine)
642 * @pipe: pipe
643 * @queue: queue
644 * @vmid: VMID
645 *
646 * Switches the currently active registers instances. Some
647 * registers are instanced per VMID, others are instanced per
648 * me/pipe/queue combination.
649 */
650static void cik_srbm_select(struct radeon_device *rdev,
651 u32 me, u32 pipe, u32 queue, u32 vmid)
652{
653 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
654 MEID(me & 0x3) |
655 VMID(vmid & 0xf) |
656 QUEUEID(queue & 0x7));
657 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
658}
659
bc8273fe
AD
660/* ucode loading */
661/**
662 * ci_mc_load_microcode - load MC ucode into the hw
663 *
664 * @rdev: radeon_device pointer
665 *
666 * Load the GDDR MC ucode into the hw (CIK).
667 * Returns 0 on success, error on failure.
668 */
669static int ci_mc_load_microcode(struct radeon_device *rdev)
670{
671 const __be32 *fw_data;
672 u32 running, blackout = 0;
673 u32 *io_mc_regs;
674 int i, ucode_size, regs_size;
675
676 if (!rdev->mc_fw)
677 return -EINVAL;
678
679 switch (rdev->family) {
680 case CHIP_BONAIRE:
681 default:
682 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
683 ucode_size = CIK_MC_UCODE_SIZE;
684 regs_size = BONAIRE_IO_MC_REGS_SIZE;
685 break;
686 }
687
688 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
689
690 if (running == 0) {
691 if (running) {
692 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
693 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
694 }
695
696 /* reset the engine and set to writable */
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
699
700 /* load mc io regs */
701 for (i = 0; i < regs_size; i++) {
702 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
703 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
704 }
705 /* load the MC ucode */
706 fw_data = (const __be32 *)rdev->mc_fw->data;
707 for (i = 0; i < ucode_size; i++)
708 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
709
710 /* put the engine back into the active state */
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
713 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
714
715 /* wait for training to complete */
716 for (i = 0; i < rdev->usec_timeout; i++) {
717 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
718 break;
719 udelay(1);
720 }
721 for (i = 0; i < rdev->usec_timeout; i++) {
722 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
723 break;
724 udelay(1);
725 }
726
727 if (running)
728 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
729 }
730
731 return 0;
732}
733
02c81327
AD
734/**
735 * cik_init_microcode - load ucode images from disk
736 *
737 * @rdev: radeon_device pointer
738 *
739 * Use the firmware interface to load the ucode images into
740 * the driver (not loaded into hw).
741 * Returns 0 on success, error on failure.
742 */
743static int cik_init_microcode(struct radeon_device *rdev)
744{
745 struct platform_device *pdev;
746 const char *chip_name;
747 size_t pfp_req_size, me_req_size, ce_req_size,
21a93e13
AD
748 mec_req_size, rlc_req_size, mc_req_size,
749 sdma_req_size;
02c81327
AD
750 char fw_name[30];
751 int err;
752
753 DRM_DEBUG("\n");
754
755 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
756 err = IS_ERR(pdev);
757 if (err) {
758 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
759 return -EINVAL;
760 }
761
762 switch (rdev->family) {
763 case CHIP_BONAIRE:
764 chip_name = "BONAIRE";
765 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
766 me_req_size = CIK_ME_UCODE_SIZE * 4;
767 ce_req_size = CIK_CE_UCODE_SIZE * 4;
768 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
769 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
770 mc_req_size = CIK_MC_UCODE_SIZE * 4;
21a93e13 771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
772 break;
773 case CHIP_KAVERI:
774 chip_name = "KAVERI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
21a93e13 780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
781 break;
782 case CHIP_KABINI:
783 chip_name = "KABINI";
784 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
785 me_req_size = CIK_ME_UCODE_SIZE * 4;
786 ce_req_size = CIK_CE_UCODE_SIZE * 4;
787 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
788 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
21a93e13 789 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
02c81327
AD
790 break;
791 default: BUG();
792 }
793
794 DRM_INFO("Loading %s Microcode\n", chip_name);
795
796 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
797 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
798 if (err)
799 goto out;
800 if (rdev->pfp_fw->size != pfp_req_size) {
801 printk(KERN_ERR
802 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803 rdev->pfp_fw->size, fw_name);
804 err = -EINVAL;
805 goto out;
806 }
807
808 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
809 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
810 if (err)
811 goto out;
812 if (rdev->me_fw->size != me_req_size) {
813 printk(KERN_ERR
814 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815 rdev->me_fw->size, fw_name);
816 err = -EINVAL;
817 }
818
819 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
820 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
821 if (err)
822 goto out;
823 if (rdev->ce_fw->size != ce_req_size) {
824 printk(KERN_ERR
825 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
826 rdev->ce_fw->size, fw_name);
827 err = -EINVAL;
828 }
829
830 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
831 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
832 if (err)
833 goto out;
834 if (rdev->mec_fw->size != mec_req_size) {
835 printk(KERN_ERR
836 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
837 rdev->mec_fw->size, fw_name);
838 err = -EINVAL;
839 }
840
841 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
842 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
843 if (err)
844 goto out;
845 if (rdev->rlc_fw->size != rlc_req_size) {
846 printk(KERN_ERR
847 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
848 rdev->rlc_fw->size, fw_name);
849 err = -EINVAL;
850 }
851
21a93e13
AD
852 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
853 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
854 if (err)
855 goto out;
856 if (rdev->sdma_fw->size != sdma_req_size) {
857 printk(KERN_ERR
858 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
859 rdev->sdma_fw->size, fw_name);
860 err = -EINVAL;
861 }
862
02c81327
AD
863 /* No MC ucode on APUs */
864 if (!(rdev->flags & RADEON_IS_IGP)) {
865 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
866 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
867 if (err)
868 goto out;
869 if (rdev->mc_fw->size != mc_req_size) {
870 printk(KERN_ERR
871 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
872 rdev->mc_fw->size, fw_name);
873 err = -EINVAL;
874 }
875 }
876
877out:
878 platform_device_unregister(pdev);
879
880 if (err) {
881 if (err != -EINVAL)
882 printk(KERN_ERR
883 "cik_cp: Failed to load firmware \"%s\"\n",
884 fw_name);
885 release_firmware(rdev->pfp_fw);
886 rdev->pfp_fw = NULL;
887 release_firmware(rdev->me_fw);
888 rdev->me_fw = NULL;
889 release_firmware(rdev->ce_fw);
890 rdev->ce_fw = NULL;
891 release_firmware(rdev->rlc_fw);
892 rdev->rlc_fw = NULL;
893 release_firmware(rdev->mc_fw);
894 rdev->mc_fw = NULL;
895 }
896 return err;
897}
898
8cc1a532
AD
899/*
900 * Core functions
901 */
902/**
903 * cik_tiling_mode_table_init - init the hw tiling table
904 *
905 * @rdev: radeon_device pointer
906 *
907 * Starting with SI, the tiling setup is done globally in a
908 * set of 32 tiling modes. Rather than selecting each set of
909 * parameters per surface as on older asics, we just select
910 * which index in the tiling table we want to use, and the
911 * surface uses those parameters (CIK).
912 */
913static void cik_tiling_mode_table_init(struct radeon_device *rdev)
914{
915 const u32 num_tile_mode_states = 32;
916 const u32 num_secondary_tile_mode_states = 16;
917 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
918 u32 num_pipe_configs;
919 u32 num_rbs = rdev->config.cik.max_backends_per_se *
920 rdev->config.cik.max_shader_engines;
921
922 switch (rdev->config.cik.mem_row_size_in_kb) {
923 case 1:
924 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
925 break;
926 case 2:
927 default:
928 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
929 break;
930 case 4:
931 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
932 break;
933 }
934
935 num_pipe_configs = rdev->config.cik.max_tile_pipes;
936 if (num_pipe_configs > 8)
937 num_pipe_configs = 8; /* ??? */
938
939 if (num_pipe_configs == 8) {
940 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
941 switch (reg_offset) {
942 case 0:
943 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
947 break;
948 case 1:
949 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
951 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
953 break;
954 case 2:
955 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
958 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
959 break;
960 case 3:
961 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
965 break;
966 case 4:
967 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 TILE_SPLIT(split_equal_to_row_size));
971 break;
972 case 5:
973 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
975 break;
976 case 6:
977 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
981 break;
982 case 7:
983 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986 TILE_SPLIT(split_equal_to_row_size));
987 break;
988 case 8:
989 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
991 break;
992 case 9:
993 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
995 break;
996 case 10:
997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1001 break;
1002 case 11:
1003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007 break;
1008 case 12:
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013 break;
1014 case 13:
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1017 break;
1018 case 14:
1019 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1023 break;
1024 case 16:
1025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029 break;
1030 case 17:
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1032 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035 break;
1036 case 27:
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1039 break;
1040 case 28:
1041 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1042 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1045 break;
1046 case 29:
1047 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1048 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1049 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051 break;
1052 case 30:
1053 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1054 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1057 break;
1058 default:
1059 gb_tile_moden = 0;
1060 break;
1061 }
39aee490 1062 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1063 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1064 }
1065 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1066 switch (reg_offset) {
1067 case 0:
1068 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1071 NUM_BANKS(ADDR_SURF_16_BANK));
1072 break;
1073 case 1:
1074 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1077 NUM_BANKS(ADDR_SURF_16_BANK));
1078 break;
1079 case 2:
1080 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1083 NUM_BANKS(ADDR_SURF_16_BANK));
1084 break;
1085 case 3:
1086 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1089 NUM_BANKS(ADDR_SURF_16_BANK));
1090 break;
1091 case 4:
1092 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1095 NUM_BANKS(ADDR_SURF_8_BANK));
1096 break;
1097 case 5:
1098 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1101 NUM_BANKS(ADDR_SURF_4_BANK));
1102 break;
1103 case 6:
1104 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1107 NUM_BANKS(ADDR_SURF_2_BANK));
1108 break;
1109 case 8:
1110 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1113 NUM_BANKS(ADDR_SURF_16_BANK));
1114 break;
1115 case 9:
1116 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1119 NUM_BANKS(ADDR_SURF_16_BANK));
1120 break;
1121 case 10:
1122 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1125 NUM_BANKS(ADDR_SURF_16_BANK));
1126 break;
1127 case 11:
1128 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1131 NUM_BANKS(ADDR_SURF_16_BANK));
1132 break;
1133 case 12:
1134 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1137 NUM_BANKS(ADDR_SURF_8_BANK));
1138 break;
1139 case 13:
1140 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1143 NUM_BANKS(ADDR_SURF_4_BANK));
1144 break;
1145 case 14:
1146 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1149 NUM_BANKS(ADDR_SURF_2_BANK));
1150 break;
1151 default:
1152 gb_tile_moden = 0;
1153 break;
1154 }
1155 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1156 }
1157 } else if (num_pipe_configs == 4) {
1158 if (num_rbs == 4) {
1159 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1160 switch (reg_offset) {
1161 case 0:
1162 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1164 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1166 break;
1167 case 1:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1172 break;
1173 case 2:
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1177 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1178 break;
1179 case 3:
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1184 break;
1185 case 4:
1186 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 TILE_SPLIT(split_equal_to_row_size));
1190 break;
1191 case 5:
1192 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1193 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1194 break;
1195 case 6:
1196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1200 break;
1201 case 7:
1202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1204 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1205 TILE_SPLIT(split_equal_to_row_size));
1206 break;
1207 case 8:
1208 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1209 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1210 break;
1211 case 9:
1212 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1214 break;
1215 case 10:
1216 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1217 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1218 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1220 break;
1221 case 11:
1222 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1223 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226 break;
1227 case 12:
1228 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1230 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232 break;
1233 case 13:
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1236 break;
1237 case 14:
1238 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1240 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1242 break;
1243 case 16:
1244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1246 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248 break;
1249 case 17:
1250 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1252 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254 break;
1255 case 27:
1256 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1258 break;
1259 case 28:
1260 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1262 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1264 break;
1265 case 29:
1266 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1267 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1268 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1270 break;
1271 case 30:
1272 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1274 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1276 break;
1277 default:
1278 gb_tile_moden = 0;
1279 break;
1280 }
39aee490 1281 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1282 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1283 }
1284 } else if (num_rbs < 4) {
1285 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1286 switch (reg_offset) {
1287 case 0:
1288 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1290 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1292 break;
1293 case 1:
1294 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1296 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1298 break;
1299 case 2:
1300 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1302 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1304 break;
1305 case 3:
1306 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1310 break;
1311 case 4:
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 TILE_SPLIT(split_equal_to_row_size));
1316 break;
1317 case 5:
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1319 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1320 break;
1321 case 6:
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1326 break;
1327 case 7:
1328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1330 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1331 TILE_SPLIT(split_equal_to_row_size));
1332 break;
1333 case 8:
1334 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1335 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1336 break;
1337 case 9:
1338 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1340 break;
1341 case 10:
1342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1344 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1346 break;
1347 case 11:
1348 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1349 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352 break;
1353 case 12:
1354 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358 break;
1359 case 13:
1360 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1362 break;
1363 case 14:
1364 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1365 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1366 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1368 break;
1369 case 16:
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374 break;
1375 case 17:
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1378 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380 break;
1381 case 27:
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1384 break;
1385 case 28:
1386 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1387 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1388 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1390 break;
1391 case 29:
1392 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1393 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1394 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1396 break;
1397 case 30:
1398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1400 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1402 break;
1403 default:
1404 gb_tile_moden = 0;
1405 break;
1406 }
39aee490 1407 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1408 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1409 }
1410 }
1411 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1412 switch (reg_offset) {
1413 case 0:
1414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1417 NUM_BANKS(ADDR_SURF_16_BANK));
1418 break;
1419 case 1:
1420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1423 NUM_BANKS(ADDR_SURF_16_BANK));
1424 break;
1425 case 2:
1426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1429 NUM_BANKS(ADDR_SURF_16_BANK));
1430 break;
1431 case 3:
1432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1435 NUM_BANKS(ADDR_SURF_16_BANK));
1436 break;
1437 case 4:
1438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1441 NUM_BANKS(ADDR_SURF_16_BANK));
1442 break;
1443 case 5:
1444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1447 NUM_BANKS(ADDR_SURF_8_BANK));
1448 break;
1449 case 6:
1450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1453 NUM_BANKS(ADDR_SURF_4_BANK));
1454 break;
1455 case 8:
1456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1459 NUM_BANKS(ADDR_SURF_16_BANK));
1460 break;
1461 case 9:
1462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1465 NUM_BANKS(ADDR_SURF_16_BANK));
1466 break;
1467 case 10:
1468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1471 NUM_BANKS(ADDR_SURF_16_BANK));
1472 break;
1473 case 11:
1474 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1477 NUM_BANKS(ADDR_SURF_16_BANK));
1478 break;
1479 case 12:
1480 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1483 NUM_BANKS(ADDR_SURF_16_BANK));
1484 break;
1485 case 13:
1486 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1489 NUM_BANKS(ADDR_SURF_8_BANK));
1490 break;
1491 case 14:
1492 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1495 NUM_BANKS(ADDR_SURF_4_BANK));
1496 break;
1497 default:
1498 gb_tile_moden = 0;
1499 break;
1500 }
1501 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1502 }
1503 } else if (num_pipe_configs == 2) {
1504 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1505 switch (reg_offset) {
1506 case 0:
1507 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1508 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1509 PIPE_CONFIG(ADDR_SURF_P2) |
1510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1511 break;
1512 case 1:
1513 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1515 PIPE_CONFIG(ADDR_SURF_P2) |
1516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1517 break;
1518 case 2:
1519 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1521 PIPE_CONFIG(ADDR_SURF_P2) |
1522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1523 break;
1524 case 3:
1525 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 PIPE_CONFIG(ADDR_SURF_P2) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1529 break;
1530 case 4:
1531 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 PIPE_CONFIG(ADDR_SURF_P2) |
1534 TILE_SPLIT(split_equal_to_row_size));
1535 break;
1536 case 5:
1537 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1539 break;
1540 case 6:
1541 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1543 PIPE_CONFIG(ADDR_SURF_P2) |
1544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1545 break;
1546 case 7:
1547 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1549 PIPE_CONFIG(ADDR_SURF_P2) |
1550 TILE_SPLIT(split_equal_to_row_size));
1551 break;
1552 case 8:
1553 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1554 break;
1555 case 9:
1556 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1558 break;
1559 case 10:
1560 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1562 PIPE_CONFIG(ADDR_SURF_P2) |
1563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1564 break;
1565 case 11:
1566 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1567 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1568 PIPE_CONFIG(ADDR_SURF_P2) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570 break;
1571 case 12:
1572 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1573 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1574 PIPE_CONFIG(ADDR_SURF_P2) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576 break;
1577 case 13:
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1580 break;
1581 case 14:
1582 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1584 PIPE_CONFIG(ADDR_SURF_P2) |
1585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1586 break;
1587 case 16:
1588 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1590 PIPE_CONFIG(ADDR_SURF_P2) |
1591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592 break;
1593 case 17:
1594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1595 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1596 PIPE_CONFIG(ADDR_SURF_P2) |
1597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598 break;
1599 case 27:
1600 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1602 break;
1603 case 28:
1604 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1606 PIPE_CONFIG(ADDR_SURF_P2) |
1607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1608 break;
1609 case 29:
1610 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1611 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1612 PIPE_CONFIG(ADDR_SURF_P2) |
1613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1614 break;
1615 case 30:
1616 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1617 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1618 PIPE_CONFIG(ADDR_SURF_P2) |
1619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1620 break;
1621 default:
1622 gb_tile_moden = 0;
1623 break;
1624 }
39aee490 1625 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
8cc1a532
AD
1626 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1627 }
1628 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1629 switch (reg_offset) {
1630 case 0:
1631 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1634 NUM_BANKS(ADDR_SURF_16_BANK));
1635 break;
1636 case 1:
1637 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1640 NUM_BANKS(ADDR_SURF_16_BANK));
1641 break;
1642 case 2:
1643 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1646 NUM_BANKS(ADDR_SURF_16_BANK));
1647 break;
1648 case 3:
1649 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1652 NUM_BANKS(ADDR_SURF_16_BANK));
1653 break;
1654 case 4:
1655 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1658 NUM_BANKS(ADDR_SURF_16_BANK));
1659 break;
1660 case 5:
1661 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1664 NUM_BANKS(ADDR_SURF_16_BANK));
1665 break;
1666 case 6:
1667 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1670 NUM_BANKS(ADDR_SURF_8_BANK));
1671 break;
1672 case 8:
1673 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1676 NUM_BANKS(ADDR_SURF_16_BANK));
1677 break;
1678 case 9:
1679 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1682 NUM_BANKS(ADDR_SURF_16_BANK));
1683 break;
1684 case 10:
1685 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1688 NUM_BANKS(ADDR_SURF_16_BANK));
1689 break;
1690 case 11:
1691 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1692 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1693 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1694 NUM_BANKS(ADDR_SURF_16_BANK));
1695 break;
1696 case 12:
1697 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1698 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1699 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1700 NUM_BANKS(ADDR_SURF_16_BANK));
1701 break;
1702 case 13:
1703 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1704 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1705 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1706 NUM_BANKS(ADDR_SURF_16_BANK));
1707 break;
1708 case 14:
1709 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1712 NUM_BANKS(ADDR_SURF_8_BANK));
1713 break;
1714 default:
1715 gb_tile_moden = 0;
1716 break;
1717 }
1718 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1719 }
1720 } else
1721 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1722}
1723
1724/**
1725 * cik_select_se_sh - select which SE, SH to address
1726 *
1727 * @rdev: radeon_device pointer
1728 * @se_num: shader engine to address
1729 * @sh_num: sh block to address
1730 *
1731 * Select which SE, SH combinations to address. Certain
1732 * registers are instanced per SE or SH. 0xffffffff means
1733 * broadcast to all SEs or SHs (CIK).
1734 */
1735static void cik_select_se_sh(struct radeon_device *rdev,
1736 u32 se_num, u32 sh_num)
1737{
1738 u32 data = INSTANCE_BROADCAST_WRITES;
1739
1740 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
b0fe3d39 1741 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
8cc1a532
AD
1742 else if (se_num == 0xffffffff)
1743 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1744 else if (sh_num == 0xffffffff)
1745 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1746 else
1747 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1748 WREG32(GRBM_GFX_INDEX, data);
1749}
1750
1751/**
1752 * cik_create_bitmask - create a bitmask
1753 *
1754 * @bit_width: length of the mask
1755 *
1756 * create a variable length bit mask (CIK).
1757 * Returns the bitmask.
1758 */
1759static u32 cik_create_bitmask(u32 bit_width)
1760{
1761 u32 i, mask = 0;
1762
1763 for (i = 0; i < bit_width; i++) {
1764 mask <<= 1;
1765 mask |= 1;
1766 }
1767 return mask;
1768}
1769
1770/**
1771 * cik_select_se_sh - select which SE, SH to address
1772 *
1773 * @rdev: radeon_device pointer
1774 * @max_rb_num: max RBs (render backends) for the asic
1775 * @se_num: number of SEs (shader engines) for the asic
1776 * @sh_per_se: number of SH blocks per SE for the asic
1777 *
1778 * Calculates the bitmask of disabled RBs (CIK).
1779 * Returns the disabled RB bitmask.
1780 */
1781static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1782 u32 max_rb_num, u32 se_num,
1783 u32 sh_per_se)
1784{
1785 u32 data, mask;
1786
1787 data = RREG32(CC_RB_BACKEND_DISABLE);
1788 if (data & 1)
1789 data &= BACKEND_DISABLE_MASK;
1790 else
1791 data = 0;
1792 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1793
1794 data >>= BACKEND_DISABLE_SHIFT;
1795
1796 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1797
1798 return data & mask;
1799}
1800
1801/**
1802 * cik_setup_rb - setup the RBs on the asic
1803 *
1804 * @rdev: radeon_device pointer
1805 * @se_num: number of SEs (shader engines) for the asic
1806 * @sh_per_se: number of SH blocks per SE for the asic
1807 * @max_rb_num: max RBs (render backends) for the asic
1808 *
1809 * Configures per-SE/SH RB registers (CIK).
1810 */
1811static void cik_setup_rb(struct radeon_device *rdev,
1812 u32 se_num, u32 sh_per_se,
1813 u32 max_rb_num)
1814{
1815 int i, j;
1816 u32 data, mask;
1817 u32 disabled_rbs = 0;
1818 u32 enabled_rbs = 0;
1819
1820 for (i = 0; i < se_num; i++) {
1821 for (j = 0; j < sh_per_se; j++) {
1822 cik_select_se_sh(rdev, i, j);
1823 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1824 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1825 }
1826 }
1827 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1828
1829 mask = 1;
1830 for (i = 0; i < max_rb_num; i++) {
1831 if (!(disabled_rbs & mask))
1832 enabled_rbs |= mask;
1833 mask <<= 1;
1834 }
1835
1836 for (i = 0; i < se_num; i++) {
1837 cik_select_se_sh(rdev, i, 0xffffffff);
1838 data = 0;
1839 for (j = 0; j < sh_per_se; j++) {
1840 switch (enabled_rbs & 3) {
1841 case 1:
1842 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1843 break;
1844 case 2:
1845 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1846 break;
1847 case 3:
1848 default:
1849 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1850 break;
1851 }
1852 enabled_rbs >>= 2;
1853 }
1854 WREG32(PA_SC_RASTER_CONFIG, data);
1855 }
1856 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1857}
1858
1859/**
1860 * cik_gpu_init - setup the 3D engine
1861 *
1862 * @rdev: radeon_device pointer
1863 *
1864 * Configures the 3D engine and tiling configuration
1865 * registers so that the 3D engine is usable.
1866 */
1867static void cik_gpu_init(struct radeon_device *rdev)
1868{
1869 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1870 u32 mc_shared_chmap, mc_arb_ramcfg;
1871 u32 hdp_host_path_cntl;
1872 u32 tmp;
1873 int i, j;
1874
1875 switch (rdev->family) {
1876 case CHIP_BONAIRE:
1877 rdev->config.cik.max_shader_engines = 2;
1878 rdev->config.cik.max_tile_pipes = 4;
1879 rdev->config.cik.max_cu_per_sh = 7;
1880 rdev->config.cik.max_sh_per_se = 1;
1881 rdev->config.cik.max_backends_per_se = 2;
1882 rdev->config.cik.max_texture_channel_caches = 4;
1883 rdev->config.cik.max_gprs = 256;
1884 rdev->config.cik.max_gs_threads = 32;
1885 rdev->config.cik.max_hw_contexts = 8;
1886
1887 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1888 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1889 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1890 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1891 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1892 break;
1893 case CHIP_KAVERI:
1894 /* TODO */
1895 break;
1896 case CHIP_KABINI:
1897 default:
1898 rdev->config.cik.max_shader_engines = 1;
1899 rdev->config.cik.max_tile_pipes = 2;
1900 rdev->config.cik.max_cu_per_sh = 2;
1901 rdev->config.cik.max_sh_per_se = 1;
1902 rdev->config.cik.max_backends_per_se = 1;
1903 rdev->config.cik.max_texture_channel_caches = 2;
1904 rdev->config.cik.max_gprs = 256;
1905 rdev->config.cik.max_gs_threads = 16;
1906 rdev->config.cik.max_hw_contexts = 8;
1907
1908 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1909 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1910 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1911 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1912 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1913 break;
1914 }
1915
1916 /* Initialize HDP */
1917 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1918 WREG32((0x2c14 + j), 0x00000000);
1919 WREG32((0x2c18 + j), 0x00000000);
1920 WREG32((0x2c1c + j), 0x00000000);
1921 WREG32((0x2c20 + j), 0x00000000);
1922 WREG32((0x2c24 + j), 0x00000000);
1923 }
1924
1925 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1926
1927 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1928
1929 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1930 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1931
1932 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1933 rdev->config.cik.mem_max_burst_length_bytes = 256;
1934 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1935 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1936 if (rdev->config.cik.mem_row_size_in_kb > 4)
1937 rdev->config.cik.mem_row_size_in_kb = 4;
1938 /* XXX use MC settings? */
1939 rdev->config.cik.shader_engine_tile_size = 32;
1940 rdev->config.cik.num_gpus = 1;
1941 rdev->config.cik.multi_gpu_tile_size = 64;
1942
1943 /* fix up row size */
1944 gb_addr_config &= ~ROW_SIZE_MASK;
1945 switch (rdev->config.cik.mem_row_size_in_kb) {
1946 case 1:
1947 default:
1948 gb_addr_config |= ROW_SIZE(0);
1949 break;
1950 case 2:
1951 gb_addr_config |= ROW_SIZE(1);
1952 break;
1953 case 4:
1954 gb_addr_config |= ROW_SIZE(2);
1955 break;
1956 }
1957
1958 /* setup tiling info dword. gb_addr_config is not adequate since it does
1959 * not have bank info, so create a custom tiling dword.
1960 * bits 3:0 num_pipes
1961 * bits 7:4 num_banks
1962 * bits 11:8 group_size
1963 * bits 15:12 row_size
1964 */
1965 rdev->config.cik.tile_config = 0;
1966 switch (rdev->config.cik.num_tile_pipes) {
1967 case 1:
1968 rdev->config.cik.tile_config |= (0 << 0);
1969 break;
1970 case 2:
1971 rdev->config.cik.tile_config |= (1 << 0);
1972 break;
1973 case 4:
1974 rdev->config.cik.tile_config |= (2 << 0);
1975 break;
1976 case 8:
1977 default:
1978 /* XXX what about 12? */
1979 rdev->config.cik.tile_config |= (3 << 0);
1980 break;
1981 }
1982 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1983 rdev->config.cik.tile_config |= 1 << 4;
1984 else
1985 rdev->config.cik.tile_config |= 0 << 4;
1986 rdev->config.cik.tile_config |=
1987 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1988 rdev->config.cik.tile_config |=
1989 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1990
1991 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1992 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1993 WREG32(DMIF_ADDR_CALC, gb_addr_config);
21a93e13
AD
1994 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1995 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
87167bb1
CK
1996 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1997 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1998 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
8cc1a532
AD
1999
2000 cik_tiling_mode_table_init(rdev);
2001
2002 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2003 rdev->config.cik.max_sh_per_se,
2004 rdev->config.cik.max_backends_per_se);
2005
2006 /* set HW defaults for 3D engine */
2007 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2008
2009 WREG32(SX_DEBUG_1, 0x20);
2010
2011 WREG32(TA_CNTL_AUX, 0x00010000);
2012
2013 tmp = RREG32(SPI_CONFIG_CNTL);
2014 tmp |= 0x03000000;
2015 WREG32(SPI_CONFIG_CNTL, tmp);
2016
2017 WREG32(SQ_CONFIG, 1);
2018
2019 WREG32(DB_DEBUG, 0);
2020
2021 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2022 tmp |= 0x00000400;
2023 WREG32(DB_DEBUG2, tmp);
2024
2025 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2026 tmp |= 0x00020200;
2027 WREG32(DB_DEBUG3, tmp);
2028
2029 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2030 tmp |= 0x00018208;
2031 WREG32(CB_HW_CONTROL, tmp);
2032
2033 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2034
2035 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2036 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2037 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2038 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2039
2040 WREG32(VGT_NUM_INSTANCES, 1);
2041
2042 WREG32(CP_PERFMON_CNTL, 0);
2043
2044 WREG32(SQ_CONFIG, 0);
2045
2046 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2047 FORCE_EOV_MAX_REZ_CNT(255)));
2048
2049 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2050 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2051
2052 WREG32(VGT_GS_VERTEX_REUSE, 16);
2053 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2054
2055 tmp = RREG32(HDP_MISC_CNTL);
2056 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2057 WREG32(HDP_MISC_CNTL, tmp);
2058
2059 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2060 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2061
2062 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2063 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2064
2065 udelay(50);
2066}
2067
2cae3bc3
AD
2068/*
2069 * GPU scratch registers helpers function.
2070 */
2071/**
2072 * cik_scratch_init - setup driver info for CP scratch regs
2073 *
2074 * @rdev: radeon_device pointer
2075 *
2076 * Set up the number and offset of the CP scratch registers.
2077 * NOTE: use of CP scratch registers is a legacy inferface and
2078 * is not used by default on newer asics (r6xx+). On newer asics,
2079 * memory buffers are used for fences rather than scratch regs.
2080 */
2081static void cik_scratch_init(struct radeon_device *rdev)
2082{
2083 int i;
2084
2085 rdev->scratch.num_reg = 7;
2086 rdev->scratch.reg_base = SCRATCH_REG0;
2087 for (i = 0; i < rdev->scratch.num_reg; i++) {
2088 rdev->scratch.free[i] = true;
2089 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2090 }
2091}
2092
fbc832c7
AD
2093/**
2094 * cik_ring_test - basic gfx ring test
2095 *
2096 * @rdev: radeon_device pointer
2097 * @ring: radeon_ring structure holding ring information
2098 *
2099 * Allocate a scratch register and write to it using the gfx ring (CIK).
2100 * Provides a basic gfx ring test to verify that the ring is working.
2101 * Used by cik_cp_gfx_resume();
2102 * Returns 0 on success, error on failure.
2103 */
2104int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2105{
2106 uint32_t scratch;
2107 uint32_t tmp = 0;
2108 unsigned i;
2109 int r;
2110
2111 r = radeon_scratch_get(rdev, &scratch);
2112 if (r) {
2113 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2114 return r;
2115 }
2116 WREG32(scratch, 0xCAFEDEAD);
2117 r = radeon_ring_lock(rdev, ring, 3);
2118 if (r) {
2119 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2120 radeon_scratch_free(rdev, scratch);
2121 return r;
2122 }
2123 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2124 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2125 radeon_ring_write(ring, 0xDEADBEEF);
2126 radeon_ring_unlock_commit(rdev, ring);
963e81f9 2127
fbc832c7
AD
2128 for (i = 0; i < rdev->usec_timeout; i++) {
2129 tmp = RREG32(scratch);
2130 if (tmp == 0xDEADBEEF)
2131 break;
2132 DRM_UDELAY(1);
2133 }
2134 if (i < rdev->usec_timeout) {
2135 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2136 } else {
2137 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2138 ring->idx, scratch, tmp);
2139 r = -EINVAL;
2140 }
2141 radeon_scratch_free(rdev, scratch);
2142 return r;
2143}
2144
2cae3bc3 2145/**
b07fdd38 2146 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2cae3bc3
AD
2147 *
2148 * @rdev: radeon_device pointer
2149 * @fence: radeon fence object
2150 *
2151 * Emits a fence sequnce number on the gfx ring and flushes
2152 * GPU caches.
2153 */
b07fdd38
AD
2154void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2155 struct radeon_fence *fence)
2cae3bc3
AD
2156{
2157 struct radeon_ring *ring = &rdev->ring[fence->ring];
2158 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2159
2160 /* EVENT_WRITE_EOP - flush caches, send int */
2161 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2162 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2163 EOP_TC_ACTION_EN |
2164 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2165 EVENT_INDEX(5)));
2166 radeon_ring_write(ring, addr & 0xfffffffc);
2167 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2168 radeon_ring_write(ring, fence->seq);
2169 radeon_ring_write(ring, 0);
2170 /* HDP flush */
2171 /* We should be using the new WAIT_REG_MEM special op packet here
2172 * but it causes the CP to hang
2173 */
2174 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2175 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2176 WRITE_DATA_DST_SEL(0)));
2177 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2178 radeon_ring_write(ring, 0);
2179 radeon_ring_write(ring, 0);
2180}
2181
b07fdd38
AD
2182/**
2183 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2184 *
2185 * @rdev: radeon_device pointer
2186 * @fence: radeon fence object
2187 *
2188 * Emits a fence sequnce number on the compute ring and flushes
2189 * GPU caches.
2190 */
2191void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2192 struct radeon_fence *fence)
2193{
2194 struct radeon_ring *ring = &rdev->ring[fence->ring];
2195 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2196
2197 /* RELEASE_MEM - flush caches, send int */
2198 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2199 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2200 EOP_TC_ACTION_EN |
2201 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2202 EVENT_INDEX(5)));
2203 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2204 radeon_ring_write(ring, addr & 0xfffffffc);
2205 radeon_ring_write(ring, upper_32_bits(addr));
2206 radeon_ring_write(ring, fence->seq);
2207 radeon_ring_write(ring, 0);
2208 /* HDP flush */
2209 /* We should be using the new WAIT_REG_MEM special op packet here
2210 * but it causes the CP to hang
2211 */
2212 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2213 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2214 WRITE_DATA_DST_SEL(0)));
2215 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2216 radeon_ring_write(ring, 0);
2217 radeon_ring_write(ring, 0);
2218}
2219
2cae3bc3
AD
2220void cik_semaphore_ring_emit(struct radeon_device *rdev,
2221 struct radeon_ring *ring,
2222 struct radeon_semaphore *semaphore,
2223 bool emit_wait)
2224{
2225 uint64_t addr = semaphore->gpu_addr;
2226 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2227
2228 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2229 radeon_ring_write(ring, addr & 0xffffffff);
2230 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2231}
2232
2233/*
2234 * IB stuff
2235 */
2236/**
2237 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2238 *
2239 * @rdev: radeon_device pointer
2240 * @ib: radeon indirect buffer object
2241 *
2242 * Emits an DE (drawing engine) or CE (constant engine) IB
2243 * on the gfx ring. IBs are usually generated by userspace
2244 * acceleration drivers and submitted to the kernel for
2245 * sheduling on the ring. This function schedules the IB
2246 * on the gfx ring for execution by the GPU.
2247 */
2248void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2249{
2250 struct radeon_ring *ring = &rdev->ring[ib->ring];
2251 u32 header, control = INDIRECT_BUFFER_VALID;
2252
2253 if (ib->is_const_ib) {
2254 /* set switch buffer packet before const IB */
2255 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2256 radeon_ring_write(ring, 0);
2257
2258 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2259 } else {
2260 u32 next_rptr;
2261 if (ring->rptr_save_reg) {
2262 next_rptr = ring->wptr + 3 + 4;
2263 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2264 radeon_ring_write(ring, ((ring->rptr_save_reg -
2265 PACKET3_SET_UCONFIG_REG_START) >> 2));
2266 radeon_ring_write(ring, next_rptr);
2267 } else if (rdev->wb.enabled) {
2268 next_rptr = ring->wptr + 5 + 4;
2269 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2270 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2271 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2272 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2273 radeon_ring_write(ring, next_rptr);
2274 }
2275
2276 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2277 }
2278
2279 control |= ib->length_dw |
2280 (ib->vm ? (ib->vm->id << 24) : 0);
2281
2282 radeon_ring_write(ring, header);
2283 radeon_ring_write(ring,
2284#ifdef __BIG_ENDIAN
2285 (2 << 0) |
2286#endif
2287 (ib->gpu_addr & 0xFFFFFFFC));
2288 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2289 radeon_ring_write(ring, control);
2290}
2291
fbc832c7
AD
2292/**
2293 * cik_ib_test - basic gfx ring IB test
2294 *
2295 * @rdev: radeon_device pointer
2296 * @ring: radeon_ring structure holding ring information
2297 *
2298 * Allocate an IB and execute it on the gfx ring (CIK).
2299 * Provides a basic gfx ring test to verify that IBs are working.
2300 * Returns 0 on success, error on failure.
2301 */
2302int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2303{
2304 struct radeon_ib ib;
2305 uint32_t scratch;
2306 uint32_t tmp = 0;
2307 unsigned i;
2308 int r;
2309
2310 r = radeon_scratch_get(rdev, &scratch);
2311 if (r) {
2312 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2313 return r;
2314 }
2315 WREG32(scratch, 0xCAFEDEAD);
2316 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2317 if (r) {
2318 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2319 return r;
2320 }
2321 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2322 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2323 ib.ptr[2] = 0xDEADBEEF;
2324 ib.length_dw = 3;
2325 r = radeon_ib_schedule(rdev, &ib, NULL);
2326 if (r) {
2327 radeon_scratch_free(rdev, scratch);
2328 radeon_ib_free(rdev, &ib);
2329 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2330 return r;
2331 }
2332 r = radeon_fence_wait(ib.fence, false);
2333 if (r) {
2334 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2335 return r;
2336 }
2337 for (i = 0; i < rdev->usec_timeout; i++) {
2338 tmp = RREG32(scratch);
2339 if (tmp == 0xDEADBEEF)
2340 break;
2341 DRM_UDELAY(1);
2342 }
2343 if (i < rdev->usec_timeout) {
2344 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2345 } else {
2346 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2347 scratch, tmp);
2348 r = -EINVAL;
2349 }
2350 radeon_scratch_free(rdev, scratch);
2351 radeon_ib_free(rdev, &ib);
2352 return r;
2353}
2354
841cf442
AD
2355/*
2356 * CP.
2357 * On CIK, gfx and compute now have independant command processors.
2358 *
2359 * GFX
2360 * Gfx consists of a single ring and can process both gfx jobs and
2361 * compute jobs. The gfx CP consists of three microengines (ME):
2362 * PFP - Pre-Fetch Parser
2363 * ME - Micro Engine
2364 * CE - Constant Engine
2365 * The PFP and ME make up what is considered the Drawing Engine (DE).
2366 * The CE is an asynchronous engine used for updating buffer desciptors
2367 * used by the DE so that they can be loaded into cache in parallel
2368 * while the DE is processing state update packets.
2369 *
2370 * Compute
2371 * The compute CP consists of two microengines (ME):
2372 * MEC1 - Compute MicroEngine 1
2373 * MEC2 - Compute MicroEngine 2
2374 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2375 * The queues are exposed to userspace and are programmed directly
2376 * by the compute runtime.
2377 */
2378/**
2379 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2380 *
2381 * @rdev: radeon_device pointer
2382 * @enable: enable or disable the MEs
2383 *
2384 * Halts or unhalts the gfx MEs.
2385 */
2386static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2387{
2388 if (enable)
2389 WREG32(CP_ME_CNTL, 0);
2390 else {
2391 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2392 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2393 }
2394 udelay(50);
2395}
2396
2397/**
2398 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2399 *
2400 * @rdev: radeon_device pointer
2401 *
2402 * Loads the gfx PFP, ME, and CE ucode.
2403 * Returns 0 for success, -EINVAL if the ucode is not available.
2404 */
2405static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2406{
2407 const __be32 *fw_data;
2408 int i;
2409
2410 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2411 return -EINVAL;
2412
2413 cik_cp_gfx_enable(rdev, false);
2414
2415 /* PFP */
2416 fw_data = (const __be32 *)rdev->pfp_fw->data;
2417 WREG32(CP_PFP_UCODE_ADDR, 0);
2418 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2419 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2420 WREG32(CP_PFP_UCODE_ADDR, 0);
2421
2422 /* CE */
2423 fw_data = (const __be32 *)rdev->ce_fw->data;
2424 WREG32(CP_CE_UCODE_ADDR, 0);
2425 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2426 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2427 WREG32(CP_CE_UCODE_ADDR, 0);
2428
2429 /* ME */
2430 fw_data = (const __be32 *)rdev->me_fw->data;
2431 WREG32(CP_ME_RAM_WADDR, 0);
2432 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2433 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2434 WREG32(CP_ME_RAM_WADDR, 0);
2435
2436 WREG32(CP_PFP_UCODE_ADDR, 0);
2437 WREG32(CP_CE_UCODE_ADDR, 0);
2438 WREG32(CP_ME_RAM_WADDR, 0);
2439 WREG32(CP_ME_RAM_RADDR, 0);
2440 return 0;
2441}
2442
2443/**
2444 * cik_cp_gfx_start - start the gfx ring
2445 *
2446 * @rdev: radeon_device pointer
2447 *
2448 * Enables the ring and loads the clear state context and other
2449 * packets required to init the ring.
2450 * Returns 0 for success, error for failure.
2451 */
2452static int cik_cp_gfx_start(struct radeon_device *rdev)
2453{
2454 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2455 int r, i;
2456
2457 /* init the CP */
2458 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2459 WREG32(CP_ENDIAN_SWAP, 0);
2460 WREG32(CP_DEVICE_ID, 1);
2461
2462 cik_cp_gfx_enable(rdev, true);
2463
2464 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2465 if (r) {
2466 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2467 return r;
2468 }
2469
2470 /* init the CE partitions. CE only used for gfx on CIK */
2471 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2472 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2473 radeon_ring_write(ring, 0xc000);
2474 radeon_ring_write(ring, 0xc000);
2475
2476 /* setup clear context state */
2477 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2478 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2479
2480 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2481 radeon_ring_write(ring, 0x80000000);
2482 radeon_ring_write(ring, 0x80000000);
2483
2484 for (i = 0; i < cik_default_size; i++)
2485 radeon_ring_write(ring, cik_default_state[i]);
2486
2487 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2488 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2489
2490 /* set clear context state */
2491 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2492 radeon_ring_write(ring, 0);
2493
2494 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2495 radeon_ring_write(ring, 0x00000316);
2496 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2497 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2498
2499 radeon_ring_unlock_commit(rdev, ring);
2500
2501 return 0;
2502}
2503
2504/**
2505 * cik_cp_gfx_fini - stop the gfx ring
2506 *
2507 * @rdev: radeon_device pointer
2508 *
2509 * Stop the gfx ring and tear down the driver ring
2510 * info.
2511 */
2512static void cik_cp_gfx_fini(struct radeon_device *rdev)
2513{
2514 cik_cp_gfx_enable(rdev, false);
2515 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2516}
2517
2518/**
2519 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2520 *
2521 * @rdev: radeon_device pointer
2522 *
2523 * Program the location and size of the gfx ring buffer
2524 * and test it to make sure it's working.
2525 * Returns 0 for success, error for failure.
2526 */
2527static int cik_cp_gfx_resume(struct radeon_device *rdev)
2528{
2529 struct radeon_ring *ring;
2530 u32 tmp;
2531 u32 rb_bufsz;
2532 u64 rb_addr;
2533 int r;
2534
2535 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2536 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2537
2538 /* Set the write pointer delay */
2539 WREG32(CP_RB_WPTR_DELAY, 0);
2540
2541 /* set the RB to use vmid 0 */
2542 WREG32(CP_RB_VMID, 0);
2543
2544 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2545
2546 /* ring 0 - compute and gfx */
2547 /* Set ring buffer size */
2548 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2549 rb_bufsz = drm_order(ring->ring_size / 8);
2550 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2551#ifdef __BIG_ENDIAN
2552 tmp |= BUF_SWAP_32BIT;
2553#endif
2554 WREG32(CP_RB0_CNTL, tmp);
2555
2556 /* Initialize the ring buffer's read and write pointers */
2557 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2558 ring->wptr = 0;
2559 WREG32(CP_RB0_WPTR, ring->wptr);
2560
2561 /* set the wb address wether it's enabled or not */
2562 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2563 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2564
2565 /* scratch register shadowing is no longer supported */
2566 WREG32(SCRATCH_UMSK, 0);
2567
2568 if (!rdev->wb.enabled)
2569 tmp |= RB_NO_UPDATE;
2570
2571 mdelay(1);
2572 WREG32(CP_RB0_CNTL, tmp);
2573
2574 rb_addr = ring->gpu_addr >> 8;
2575 WREG32(CP_RB0_BASE, rb_addr);
2576 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2577
2578 ring->rptr = RREG32(CP_RB0_RPTR);
2579
2580 /* start the ring */
2581 cik_cp_gfx_start(rdev);
2582 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2583 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2584 if (r) {
2585 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2586 return r;
2587 }
2588 return 0;
2589}
2590
963e81f9
AD
2591u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2592 struct radeon_ring *ring)
2593{
2594 u32 rptr;
2595
2596
2597
2598 if (rdev->wb.enabled) {
2599 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2600 } else {
2601 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2602 rptr = RREG32(CP_HQD_PQ_RPTR);
2603 cik_srbm_select(rdev, 0, 0, 0, 0);
2604 }
2605 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2606
2607 return rptr;
2608}
2609
2610u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2611 struct radeon_ring *ring)
2612{
2613 u32 wptr;
2614
2615 if (rdev->wb.enabled) {
2616 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2617 } else {
2618 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2619 wptr = RREG32(CP_HQD_PQ_WPTR);
2620 cik_srbm_select(rdev, 0, 0, 0, 0);
2621 }
2622 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2623
2624 return wptr;
2625}
2626
2627void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2628 struct radeon_ring *ring)
2629{
2630 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2631
2632 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2633 WDOORBELL32(ring->doorbell_offset, wptr);
2634}
2635
841cf442
AD
2636/**
2637 * cik_cp_compute_enable - enable/disable the compute CP MEs
2638 *
2639 * @rdev: radeon_device pointer
2640 * @enable: enable or disable the MEs
2641 *
2642 * Halts or unhalts the compute MEs.
2643 */
2644static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2645{
2646 if (enable)
2647 WREG32(CP_MEC_CNTL, 0);
2648 else
2649 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2650 udelay(50);
2651}
2652
2653/**
2654 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2655 *
2656 * @rdev: radeon_device pointer
2657 *
2658 * Loads the compute MEC1&2 ucode.
2659 * Returns 0 for success, -EINVAL if the ucode is not available.
2660 */
2661static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2662{
2663 const __be32 *fw_data;
2664 int i;
2665
2666 if (!rdev->mec_fw)
2667 return -EINVAL;
2668
2669 cik_cp_compute_enable(rdev, false);
2670
2671 /* MEC1 */
2672 fw_data = (const __be32 *)rdev->mec_fw->data;
2673 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2674 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2675 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2676 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2677
2678 if (rdev->family == CHIP_KAVERI) {
2679 /* MEC2 */
2680 fw_data = (const __be32 *)rdev->mec_fw->data;
2681 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2682 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2683 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2684 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2685 }
2686
2687 return 0;
2688}
2689
2690/**
2691 * cik_cp_compute_start - start the compute queues
2692 *
2693 * @rdev: radeon_device pointer
2694 *
2695 * Enable the compute queues.
2696 * Returns 0 for success, error for failure.
2697 */
2698static int cik_cp_compute_start(struct radeon_device *rdev)
2699{
963e81f9
AD
2700 cik_cp_compute_enable(rdev, true);
2701
841cf442
AD
2702 return 0;
2703}
2704
2705/**
2706 * cik_cp_compute_fini - stop the compute queues
2707 *
2708 * @rdev: radeon_device pointer
2709 *
2710 * Stop the compute queues and tear down the driver queue
2711 * info.
2712 */
2713static void cik_cp_compute_fini(struct radeon_device *rdev)
2714{
963e81f9
AD
2715 int i, idx, r;
2716
841cf442 2717 cik_cp_compute_enable(rdev, false);
963e81f9
AD
2718
2719 for (i = 0; i < 2; i++) {
2720 if (i == 0)
2721 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2722 else
2723 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2724
2725 if (rdev->ring[idx].mqd_obj) {
2726 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2727 if (unlikely(r != 0))
2728 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2729
2730 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2731 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2732
2733 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2734 rdev->ring[idx].mqd_obj = NULL;
2735 }
2736 }
841cf442
AD
2737}
2738
963e81f9
AD
2739static void cik_mec_fini(struct radeon_device *rdev)
2740{
2741 int r;
2742
2743 if (rdev->mec.hpd_eop_obj) {
2744 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2745 if (unlikely(r != 0))
2746 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2747 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2748 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2749
2750 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2751 rdev->mec.hpd_eop_obj = NULL;
2752 }
2753}
2754
2755#define MEC_HPD_SIZE 2048
2756
2757static int cik_mec_init(struct radeon_device *rdev)
2758{
2759 int r;
2760 u32 *hpd;
2761
2762 /*
2763 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2764 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2765 */
2766 if (rdev->family == CHIP_KAVERI)
2767 rdev->mec.num_mec = 2;
2768 else
2769 rdev->mec.num_mec = 1;
2770 rdev->mec.num_pipe = 4;
2771 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2772
2773 if (rdev->mec.hpd_eop_obj == NULL) {
2774 r = radeon_bo_create(rdev,
2775 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2776 PAGE_SIZE, true,
2777 RADEON_GEM_DOMAIN_GTT, NULL,
2778 &rdev->mec.hpd_eop_obj);
2779 if (r) {
2780 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2781 return r;
2782 }
2783 }
2784
2785 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2786 if (unlikely(r != 0)) {
2787 cik_mec_fini(rdev);
2788 return r;
2789 }
2790 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2791 &rdev->mec.hpd_eop_gpu_addr);
2792 if (r) {
2793 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2794 cik_mec_fini(rdev);
2795 return r;
2796 }
2797 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2798 if (r) {
2799 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2800 cik_mec_fini(rdev);
2801 return r;
2802 }
2803
2804 /* clear memory. Not sure if this is required or not */
2805 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2806
2807 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2808 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2809
2810 return 0;
2811}
2812
2813struct hqd_registers
2814{
2815 u32 cp_mqd_base_addr;
2816 u32 cp_mqd_base_addr_hi;
2817 u32 cp_hqd_active;
2818 u32 cp_hqd_vmid;
2819 u32 cp_hqd_persistent_state;
2820 u32 cp_hqd_pipe_priority;
2821 u32 cp_hqd_queue_priority;
2822 u32 cp_hqd_quantum;
2823 u32 cp_hqd_pq_base;
2824 u32 cp_hqd_pq_base_hi;
2825 u32 cp_hqd_pq_rptr;
2826 u32 cp_hqd_pq_rptr_report_addr;
2827 u32 cp_hqd_pq_rptr_report_addr_hi;
2828 u32 cp_hqd_pq_wptr_poll_addr;
2829 u32 cp_hqd_pq_wptr_poll_addr_hi;
2830 u32 cp_hqd_pq_doorbell_control;
2831 u32 cp_hqd_pq_wptr;
2832 u32 cp_hqd_pq_control;
2833 u32 cp_hqd_ib_base_addr;
2834 u32 cp_hqd_ib_base_addr_hi;
2835 u32 cp_hqd_ib_rptr;
2836 u32 cp_hqd_ib_control;
2837 u32 cp_hqd_iq_timer;
2838 u32 cp_hqd_iq_rptr;
2839 u32 cp_hqd_dequeue_request;
2840 u32 cp_hqd_dma_offload;
2841 u32 cp_hqd_sema_cmd;
2842 u32 cp_hqd_msg_type;
2843 u32 cp_hqd_atomic0_preop_lo;
2844 u32 cp_hqd_atomic0_preop_hi;
2845 u32 cp_hqd_atomic1_preop_lo;
2846 u32 cp_hqd_atomic1_preop_hi;
2847 u32 cp_hqd_hq_scheduler0;
2848 u32 cp_hqd_hq_scheduler1;
2849 u32 cp_mqd_control;
2850};
2851
2852struct bonaire_mqd
2853{
2854 u32 header;
2855 u32 dispatch_initiator;
2856 u32 dimensions[3];
2857 u32 start_idx[3];
2858 u32 num_threads[3];
2859 u32 pipeline_stat_enable;
2860 u32 perf_counter_enable;
2861 u32 pgm[2];
2862 u32 tba[2];
2863 u32 tma[2];
2864 u32 pgm_rsrc[2];
2865 u32 vmid;
2866 u32 resource_limits;
2867 u32 static_thread_mgmt01[2];
2868 u32 tmp_ring_size;
2869 u32 static_thread_mgmt23[2];
2870 u32 restart[3];
2871 u32 thread_trace_enable;
2872 u32 reserved1;
2873 u32 user_data[16];
2874 u32 vgtcs_invoke_count[2];
2875 struct hqd_registers queue_state;
2876 u32 dequeue_cntr;
2877 u32 interrupt_queue[64];
2878};
2879
841cf442
AD
2880/**
2881 * cik_cp_compute_resume - setup the compute queue registers
2882 *
2883 * @rdev: radeon_device pointer
2884 *
2885 * Program the compute queues and test them to make sure they
2886 * are working.
2887 * Returns 0 for success, error for failure.
2888 */
2889static int cik_cp_compute_resume(struct radeon_device *rdev)
2890{
963e81f9
AD
2891 int r, i, idx;
2892 u32 tmp;
2893 bool use_doorbell = true;
2894 u64 hqd_gpu_addr;
2895 u64 mqd_gpu_addr;
2896 u64 eop_gpu_addr;
2897 u64 wb_gpu_addr;
2898 u32 *buf;
2899 struct bonaire_mqd *mqd;
841cf442 2900
841cf442
AD
2901 r = cik_cp_compute_start(rdev);
2902 if (r)
2903 return r;
963e81f9
AD
2904
2905 /* fix up chicken bits */
2906 tmp = RREG32(CP_CPF_DEBUG);
2907 tmp |= (1 << 23);
2908 WREG32(CP_CPF_DEBUG, tmp);
2909
2910 /* init the pipes */
2911 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2912 int me = (i < 4) ? 1 : 2;
2913 int pipe = (i < 4) ? i : (i - 4);
2914
2915 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2916
2917 cik_srbm_select(rdev, me, pipe, 0, 0);
2918
2919 /* write the EOP addr */
2920 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2921 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2922
2923 /* set the VMID assigned */
2924 WREG32(CP_HPD_EOP_VMID, 0);
2925
2926 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2927 tmp = RREG32(CP_HPD_EOP_CONTROL);
2928 tmp &= ~EOP_SIZE_MASK;
2929 tmp |= drm_order(MEC_HPD_SIZE / 8);
2930 WREG32(CP_HPD_EOP_CONTROL, tmp);
2931 }
2932 cik_srbm_select(rdev, 0, 0, 0, 0);
2933
2934 /* init the queues. Just two for now. */
2935 for (i = 0; i < 2; i++) {
2936 if (i == 0)
2937 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2938 else
2939 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2940
2941 if (rdev->ring[idx].mqd_obj == NULL) {
2942 r = radeon_bo_create(rdev,
2943 sizeof(struct bonaire_mqd),
2944 PAGE_SIZE, true,
2945 RADEON_GEM_DOMAIN_GTT, NULL,
2946 &rdev->ring[idx].mqd_obj);
2947 if (r) {
2948 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2949 return r;
2950 }
2951 }
2952
2953 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2954 if (unlikely(r != 0)) {
2955 cik_cp_compute_fini(rdev);
2956 return r;
2957 }
2958 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2959 &mqd_gpu_addr);
2960 if (r) {
2961 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2962 cik_cp_compute_fini(rdev);
2963 return r;
2964 }
2965 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2966 if (r) {
2967 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2968 cik_cp_compute_fini(rdev);
2969 return r;
2970 }
2971
2972 /* doorbell offset */
2973 rdev->ring[idx].doorbell_offset =
2974 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2975
2976 /* init the mqd struct */
2977 memset(buf, 0, sizeof(struct bonaire_mqd));
2978
2979 mqd = (struct bonaire_mqd *)buf;
2980 mqd->header = 0xC0310800;
2981 mqd->static_thread_mgmt01[0] = 0xffffffff;
2982 mqd->static_thread_mgmt01[1] = 0xffffffff;
2983 mqd->static_thread_mgmt23[0] = 0xffffffff;
2984 mqd->static_thread_mgmt23[1] = 0xffffffff;
2985
2986 cik_srbm_select(rdev, rdev->ring[idx].me,
2987 rdev->ring[idx].pipe,
2988 rdev->ring[idx].queue, 0);
2989
2990 /* disable wptr polling */
2991 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2992 tmp &= ~WPTR_POLL_EN;
2993 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2994
2995 /* enable doorbell? */
2996 mqd->queue_state.cp_hqd_pq_doorbell_control =
2997 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2998 if (use_doorbell)
2999 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3000 else
3001 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3002 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3003 mqd->queue_state.cp_hqd_pq_doorbell_control);
3004
3005 /* disable the queue if it's active */
3006 mqd->queue_state.cp_hqd_dequeue_request = 0;
3007 mqd->queue_state.cp_hqd_pq_rptr = 0;
3008 mqd->queue_state.cp_hqd_pq_wptr= 0;
3009 if (RREG32(CP_HQD_ACTIVE) & 1) {
3010 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3011 for (i = 0; i < rdev->usec_timeout; i++) {
3012 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3013 break;
3014 udelay(1);
3015 }
3016 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3017 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3018 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3019 }
3020
3021 /* set the pointer to the MQD */
3022 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3023 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3024 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3025 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3026 /* set MQD vmid to 0 */
3027 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3028 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3029 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3030
3031 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3033 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3034 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3035 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3036 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3037
3038 /* set up the HQD, this is similar to CP_RB0_CNTL */
3039 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3040 mqd->queue_state.cp_hqd_pq_control &=
3041 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3042
3043 mqd->queue_state.cp_hqd_pq_control |=
3044 drm_order(rdev->ring[idx].ring_size / 8);
3045 mqd->queue_state.cp_hqd_pq_control |=
3046 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3047#ifdef __BIG_ENDIAN
3048 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3049#endif
3050 mqd->queue_state.cp_hqd_pq_control &=
3051 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3052 mqd->queue_state.cp_hqd_pq_control |=
3053 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3054 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3055
3056 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3057 if (i == 0)
3058 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3059 else
3060 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3061 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3062 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3063 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3064 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3065 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3066
3067 /* set the wb address wether it's enabled or not */
3068 if (i == 0)
3069 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3070 else
3071 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3072 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3073 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3074 upper_32_bits(wb_gpu_addr) & 0xffff;
3075 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3076 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3077 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3078 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3079
3080 /* enable the doorbell if requested */
3081 if (use_doorbell) {
3082 mqd->queue_state.cp_hqd_pq_doorbell_control =
3083 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3084 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3085 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3086 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3087 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3088 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3089 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3090
3091 } else {
3092 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3093 }
3094 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3095 mqd->queue_state.cp_hqd_pq_doorbell_control);
3096
3097 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3098 rdev->ring[idx].wptr = 0;
3099 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3100 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3101 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3102 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3103
3104 /* set the vmid for the queue */
3105 mqd->queue_state.cp_hqd_vmid = 0;
3106 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3107
3108 /* activate the queue */
3109 mqd->queue_state.cp_hqd_active = 1;
3110 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3111
3112 cik_srbm_select(rdev, 0, 0, 0, 0);
3113
3114 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3115 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3116
3117 rdev->ring[idx].ready = true;
3118 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3119 if (r)
3120 rdev->ring[idx].ready = false;
3121 }
3122
841cf442
AD
3123 return 0;
3124}
3125
841cf442
AD
3126static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3127{
3128 cik_cp_gfx_enable(rdev, enable);
3129 cik_cp_compute_enable(rdev, enable);
3130}
3131
841cf442
AD
3132static int cik_cp_load_microcode(struct radeon_device *rdev)
3133{
3134 int r;
3135
3136 r = cik_cp_gfx_load_microcode(rdev);
3137 if (r)
3138 return r;
3139 r = cik_cp_compute_load_microcode(rdev);
3140 if (r)
3141 return r;
3142
3143 return 0;
3144}
3145
841cf442
AD
3146static void cik_cp_fini(struct radeon_device *rdev)
3147{
3148 cik_cp_gfx_fini(rdev);
3149 cik_cp_compute_fini(rdev);
3150}
3151
841cf442
AD
3152static int cik_cp_resume(struct radeon_device *rdev)
3153{
3154 int r;
3155
3156 /* Reset all cp blocks */
3157 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3158 RREG32(GRBM_SOFT_RESET);
3159 mdelay(15);
3160 WREG32(GRBM_SOFT_RESET, 0);
3161 RREG32(GRBM_SOFT_RESET);
3162
3163 r = cik_cp_load_microcode(rdev);
3164 if (r)
3165 return r;
3166
3167 r = cik_cp_gfx_resume(rdev);
3168 if (r)
3169 return r;
3170 r = cik_cp_compute_resume(rdev);
3171 if (r)
3172 return r;
3173
3174 return 0;
3175}
3176
21a93e13
AD
3177/*
3178 * sDMA - System DMA
3179 * Starting with CIK, the GPU has new asynchronous
3180 * DMA engines. These engines are used for compute
3181 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3182 * and each one supports 1 ring buffer used for gfx
3183 * and 2 queues used for compute.
3184 *
3185 * The programming model is very similar to the CP
3186 * (ring buffer, IBs, etc.), but sDMA has it's own
3187 * packet format that is different from the PM4 format
3188 * used by the CP. sDMA supports copying data, writing
3189 * embedded data, solid fills, and a number of other
3190 * things. It also has support for tiling/detiling of
3191 * buffers.
3192 */
3193/**
3194 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3195 *
3196 * @rdev: radeon_device pointer
3197 * @ib: IB object to schedule
3198 *
3199 * Schedule an IB in the DMA ring (CIK).
3200 */
3201void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3202 struct radeon_ib *ib)
3203{
3204 struct radeon_ring *ring = &rdev->ring[ib->ring];
3205 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3206
3207 if (rdev->wb.enabled) {
3208 u32 next_rptr = ring->wptr + 5;
3209 while ((next_rptr & 7) != 4)
3210 next_rptr++;
3211 next_rptr += 4;
3212 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3213 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3214 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3215 radeon_ring_write(ring, 1); /* number of DWs to follow */
3216 radeon_ring_write(ring, next_rptr);
3217 }
3218
3219 /* IB packet must end on a 8 DW boundary */
3220 while ((ring->wptr & 7) != 4)
3221 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3222 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3223 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3224 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3225 radeon_ring_write(ring, ib->length_dw);
3226
3227}
3228
3229/**
3230 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3231 *
3232 * @rdev: radeon_device pointer
3233 * @fence: radeon fence object
3234 *
3235 * Add a DMA fence packet to the ring to write
3236 * the fence seq number and DMA trap packet to generate
3237 * an interrupt if needed (CIK).
3238 */
3239void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3240 struct radeon_fence *fence)
3241{
3242 struct radeon_ring *ring = &rdev->ring[fence->ring];
3243 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3244 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3245 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3246 u32 ref_and_mask;
3247
3248 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3249 ref_and_mask = SDMA0;
3250 else
3251 ref_and_mask = SDMA1;
3252
3253 /* write the fence */
3254 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3255 radeon_ring_write(ring, addr & 0xffffffff);
3256 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3257 radeon_ring_write(ring, fence->seq);
3258 /* generate an interrupt */
3259 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3260 /* flush HDP */
3261 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3262 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3263 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3264 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3265 radeon_ring_write(ring, ref_and_mask); /* MASK */
3266 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3267}
3268
3269/**
3270 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3271 *
3272 * @rdev: radeon_device pointer
3273 * @ring: radeon_ring structure holding ring information
3274 * @semaphore: radeon semaphore object
3275 * @emit_wait: wait or signal semaphore
3276 *
3277 * Add a DMA semaphore packet to the ring wait on or signal
3278 * other rings (CIK).
3279 */
3280void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3281 struct radeon_ring *ring,
3282 struct radeon_semaphore *semaphore,
3283 bool emit_wait)
3284{
3285 u64 addr = semaphore->gpu_addr;
3286 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3287
3288 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3289 radeon_ring_write(ring, addr & 0xfffffff8);
3290 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3291}
3292
3293/**
3294 * cik_sdma_gfx_stop - stop the gfx async dma engines
3295 *
3296 * @rdev: radeon_device pointer
3297 *
3298 * Stop the gfx async dma ring buffers (CIK).
3299 */
3300static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3301{
3302 u32 rb_cntl, reg_offset;
3303 int i;
3304
3305 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3306
3307 for (i = 0; i < 2; i++) {
3308 if (i == 0)
3309 reg_offset = SDMA0_REGISTER_OFFSET;
3310 else
3311 reg_offset = SDMA1_REGISTER_OFFSET;
3312 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3313 rb_cntl &= ~SDMA_RB_ENABLE;
3314 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3315 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3316 }
3317}
3318
3319/**
3320 * cik_sdma_rlc_stop - stop the compute async dma engines
3321 *
3322 * @rdev: radeon_device pointer
3323 *
3324 * Stop the compute async dma queues (CIK).
3325 */
3326static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3327{
3328 /* XXX todo */
3329}
3330
3331/**
3332 * cik_sdma_enable - stop the async dma engines
3333 *
3334 * @rdev: radeon_device pointer
3335 * @enable: enable/disable the DMA MEs.
3336 *
3337 * Halt or unhalt the async dma engines (CIK).
3338 */
3339static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3340{
3341 u32 me_cntl, reg_offset;
3342 int i;
3343
3344 for (i = 0; i < 2; i++) {
3345 if (i == 0)
3346 reg_offset = SDMA0_REGISTER_OFFSET;
3347 else
3348 reg_offset = SDMA1_REGISTER_OFFSET;
3349 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3350 if (enable)
3351 me_cntl &= ~SDMA_HALT;
3352 else
3353 me_cntl |= SDMA_HALT;
3354 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3355 }
3356}
3357
3358/**
3359 * cik_sdma_gfx_resume - setup and start the async dma engines
3360 *
3361 * @rdev: radeon_device pointer
3362 *
3363 * Set up the gfx DMA ring buffers and enable them (CIK).
3364 * Returns 0 for success, error for failure.
3365 */
3366static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3367{
3368 struct radeon_ring *ring;
3369 u32 rb_cntl, ib_cntl;
3370 u32 rb_bufsz;
3371 u32 reg_offset, wb_offset;
3372 int i, r;
3373
3374 for (i = 0; i < 2; i++) {
3375 if (i == 0) {
3376 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3377 reg_offset = SDMA0_REGISTER_OFFSET;
3378 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3379 } else {
3380 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3381 reg_offset = SDMA1_REGISTER_OFFSET;
3382 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3383 }
3384
3385 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3386 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3387
3388 /* Set ring buffer size in dwords */
3389 rb_bufsz = drm_order(ring->ring_size / 4);
3390 rb_cntl = rb_bufsz << 1;
3391#ifdef __BIG_ENDIAN
3392 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3393#endif
3394 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3395
3396 /* Initialize the ring buffer's read and write pointers */
3397 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3398 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3399
3400 /* set the wb address whether it's enabled or not */
3401 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3402 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3403 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3404 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3405
3406 if (rdev->wb.enabled)
3407 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3408
3409 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3410 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3411
3412 ring->wptr = 0;
3413 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3414
3415 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3416
3417 /* enable DMA RB */
3418 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3419
3420 ib_cntl = SDMA_IB_ENABLE;
3421#ifdef __BIG_ENDIAN
3422 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3423#endif
3424 /* enable DMA IBs */
3425 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3426
3427 ring->ready = true;
3428
3429 r = radeon_ring_test(rdev, ring->idx, ring);
3430 if (r) {
3431 ring->ready = false;
3432 return r;
3433 }
3434 }
3435
3436 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3437
3438 return 0;
3439}
3440
3441/**
3442 * cik_sdma_rlc_resume - setup and start the async dma engines
3443 *
3444 * @rdev: radeon_device pointer
3445 *
3446 * Set up the compute DMA queues and enable them (CIK).
3447 * Returns 0 for success, error for failure.
3448 */
3449static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3450{
3451 /* XXX todo */
3452 return 0;
3453}
3454
3455/**
3456 * cik_sdma_load_microcode - load the sDMA ME ucode
3457 *
3458 * @rdev: radeon_device pointer
3459 *
3460 * Loads the sDMA0/1 ucode.
3461 * Returns 0 for success, -EINVAL if the ucode is not available.
3462 */
3463static int cik_sdma_load_microcode(struct radeon_device *rdev)
3464{
3465 const __be32 *fw_data;
3466 int i;
3467
3468 if (!rdev->sdma_fw)
3469 return -EINVAL;
3470
3471 /* stop the gfx rings and rlc compute queues */
3472 cik_sdma_gfx_stop(rdev);
3473 cik_sdma_rlc_stop(rdev);
3474
3475 /* halt the MEs */
3476 cik_sdma_enable(rdev, false);
3477
3478 /* sdma0 */
3479 fw_data = (const __be32 *)rdev->sdma_fw->data;
3480 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3481 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3482 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3483 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3484
3485 /* sdma1 */
3486 fw_data = (const __be32 *)rdev->sdma_fw->data;
3487 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3488 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3489 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3490 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3491
3492 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3493 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3494 return 0;
3495}
3496
3497/**
3498 * cik_sdma_resume - setup and start the async dma engines
3499 *
3500 * @rdev: radeon_device pointer
3501 *
3502 * Set up the DMA engines and enable them (CIK).
3503 * Returns 0 for success, error for failure.
3504 */
3505static int cik_sdma_resume(struct radeon_device *rdev)
3506{
3507 int r;
3508
3509 /* Reset dma */
3510 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3511 RREG32(SRBM_SOFT_RESET);
3512 udelay(50);
3513 WREG32(SRBM_SOFT_RESET, 0);
3514 RREG32(SRBM_SOFT_RESET);
3515
3516 r = cik_sdma_load_microcode(rdev);
3517 if (r)
3518 return r;
3519
3520 /* unhalt the MEs */
3521 cik_sdma_enable(rdev, true);
3522
3523 /* start the gfx rings and rlc compute queues */
3524 r = cik_sdma_gfx_resume(rdev);
3525 if (r)
3526 return r;
3527 r = cik_sdma_rlc_resume(rdev);
3528 if (r)
3529 return r;
3530
3531 return 0;
3532}
3533
3534/**
3535 * cik_sdma_fini - tear down the async dma engines
3536 *
3537 * @rdev: radeon_device pointer
3538 *
3539 * Stop the async dma engines and free the rings (CIK).
3540 */
3541static void cik_sdma_fini(struct radeon_device *rdev)
3542{
3543 /* stop the gfx rings and rlc compute queues */
3544 cik_sdma_gfx_stop(rdev);
3545 cik_sdma_rlc_stop(rdev);
3546 /* halt the MEs */
3547 cik_sdma_enable(rdev, false);
3548 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3549 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3550 /* XXX - compute dma queue tear down */
3551}
3552
3553/**
3554 * cik_copy_dma - copy pages using the DMA engine
3555 *
3556 * @rdev: radeon_device pointer
3557 * @src_offset: src GPU address
3558 * @dst_offset: dst GPU address
3559 * @num_gpu_pages: number of GPU pages to xfer
3560 * @fence: radeon fence object
3561 *
3562 * Copy GPU paging using the DMA engine (CIK).
3563 * Used by the radeon ttm implementation to move pages if
3564 * registered as the asic copy callback.
3565 */
3566int cik_copy_dma(struct radeon_device *rdev,
3567 uint64_t src_offset, uint64_t dst_offset,
3568 unsigned num_gpu_pages,
3569 struct radeon_fence **fence)
3570{
3571 struct radeon_semaphore *sem = NULL;
3572 int ring_index = rdev->asic->copy.dma_ring_index;
3573 struct radeon_ring *ring = &rdev->ring[ring_index];
3574 u32 size_in_bytes, cur_size_in_bytes;
3575 int i, num_loops;
3576 int r = 0;
3577
3578 r = radeon_semaphore_create(rdev, &sem);
3579 if (r) {
3580 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 return r;
3582 }
3583
3584 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3585 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3586 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3587 if (r) {
3588 DRM_ERROR("radeon: moving bo (%d).\n", r);
3589 radeon_semaphore_free(rdev, &sem, NULL);
3590 return r;
3591 }
3592
3593 if (radeon_fence_need_sync(*fence, ring->idx)) {
3594 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3595 ring->idx);
3596 radeon_fence_note_sync(*fence, ring->idx);
3597 } else {
3598 radeon_semaphore_free(rdev, &sem, NULL);
3599 }
3600
3601 for (i = 0; i < num_loops; i++) {
3602 cur_size_in_bytes = size_in_bytes;
3603 if (cur_size_in_bytes > 0x1fffff)
3604 cur_size_in_bytes = 0x1fffff;
3605 size_in_bytes -= cur_size_in_bytes;
3606 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3607 radeon_ring_write(ring, cur_size_in_bytes);
3608 radeon_ring_write(ring, 0); /* src/dst endian swap */
3609 radeon_ring_write(ring, src_offset & 0xffffffff);
3610 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3611 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3612 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3613 src_offset += cur_size_in_bytes;
3614 dst_offset += cur_size_in_bytes;
3615 }
3616
3617 r = radeon_fence_emit(rdev, fence, ring->idx);
3618 if (r) {
3619 radeon_ring_unlock_undo(rdev, ring);
3620 return r;
3621 }
3622
3623 radeon_ring_unlock_commit(rdev, ring);
3624 radeon_semaphore_free(rdev, &sem, *fence);
3625
3626 return r;
3627}
3628
3629/**
3630 * cik_sdma_ring_test - simple async dma engine test
3631 *
3632 * @rdev: radeon_device pointer
3633 * @ring: radeon_ring structure holding ring information
3634 *
3635 * Test the DMA engine by writing using it to write an
3636 * value to memory. (CIK).
3637 * Returns 0 for success, error for failure.
3638 */
3639int cik_sdma_ring_test(struct radeon_device *rdev,
3640 struct radeon_ring *ring)
3641{
3642 unsigned i;
3643 int r;
3644 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3645 u32 tmp;
3646
3647 if (!ptr) {
3648 DRM_ERROR("invalid vram scratch pointer\n");
3649 return -EINVAL;
3650 }
3651
3652 tmp = 0xCAFEDEAD;
3653 writel(tmp, ptr);
3654
3655 r = radeon_ring_lock(rdev, ring, 4);
3656 if (r) {
3657 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3658 return r;
3659 }
3660 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3661 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3662 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3663 radeon_ring_write(ring, 1); /* number of DWs to follow */
3664 radeon_ring_write(ring, 0xDEADBEEF);
3665 radeon_ring_unlock_commit(rdev, ring);
3666
3667 for (i = 0; i < rdev->usec_timeout; i++) {
3668 tmp = readl(ptr);
3669 if (tmp == 0xDEADBEEF)
3670 break;
3671 DRM_UDELAY(1);
3672 }
3673
3674 if (i < rdev->usec_timeout) {
3675 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3676 } else {
3677 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3678 ring->idx, tmp);
3679 r = -EINVAL;
3680 }
3681 return r;
3682}
3683
3684/**
3685 * cik_sdma_ib_test - test an IB on the DMA engine
3686 *
3687 * @rdev: radeon_device pointer
3688 * @ring: radeon_ring structure holding ring information
3689 *
3690 * Test a simple IB in the DMA ring (CIK).
3691 * Returns 0 on success, error on failure.
3692 */
3693int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3694{
3695 struct radeon_ib ib;
3696 unsigned i;
3697 int r;
3698 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3699 u32 tmp = 0;
3700
3701 if (!ptr) {
3702 DRM_ERROR("invalid vram scratch pointer\n");
3703 return -EINVAL;
3704 }
3705
3706 tmp = 0xCAFEDEAD;
3707 writel(tmp, ptr);
3708
3709 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3710 if (r) {
3711 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3712 return r;
3713 }
3714
3715 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3716 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3717 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3718 ib.ptr[3] = 1;
3719 ib.ptr[4] = 0xDEADBEEF;
3720 ib.length_dw = 5;
3721
3722 r = radeon_ib_schedule(rdev, &ib, NULL);
3723 if (r) {
3724 radeon_ib_free(rdev, &ib);
3725 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3726 return r;
3727 }
3728 r = radeon_fence_wait(ib.fence, false);
3729 if (r) {
3730 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3731 return r;
3732 }
3733 for (i = 0; i < rdev->usec_timeout; i++) {
3734 tmp = readl(ptr);
3735 if (tmp == 0xDEADBEEF)
3736 break;
3737 DRM_UDELAY(1);
3738 }
3739 if (i < rdev->usec_timeout) {
3740 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3741 } else {
3742 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3743 r = -EINVAL;
3744 }
3745 radeon_ib_free(rdev, &ib);
3746 return r;
3747}
3748
6f2043ce 3749
cc066715 3750static void cik_print_gpu_status_regs(struct radeon_device *rdev)
6f2043ce 3751{
6f2043ce
AD
3752 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3753 RREG32(GRBM_STATUS));
3754 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3755 RREG32(GRBM_STATUS2));
3756 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3757 RREG32(GRBM_STATUS_SE0));
3758 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3759 RREG32(GRBM_STATUS_SE1));
3760 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3761 RREG32(GRBM_STATUS_SE2));
3762 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3763 RREG32(GRBM_STATUS_SE3));
3764 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3765 RREG32(SRBM_STATUS));
3766 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3767 RREG32(SRBM_STATUS2));
cc066715
AD
3768 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3769 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3770 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3771 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
963e81f9
AD
3772 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3773 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3774 RREG32(CP_STALLED_STAT1));
3775 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3776 RREG32(CP_STALLED_STAT2));
3777 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3778 RREG32(CP_STALLED_STAT3));
3779 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3780 RREG32(CP_CPF_BUSY_STAT));
3781 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3782 RREG32(CP_CPF_STALLED_STAT1));
3783 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3784 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3785 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3786 RREG32(CP_CPC_STALLED_STAT1));
3787 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
cc066715 3788}
6f2043ce 3789
cc066715
AD
3790/**
3791 * cik_gpu_check_soft_reset - check which blocks are busy
3792 *
3793 * @rdev: radeon_device pointer
3794 *
3795 * Check which blocks are busy and return the relevant reset
3796 * mask to be used by cik_gpu_soft_reset().
3797 * Returns a mask of the blocks to be reset.
3798 */
3799static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3800{
3801 u32 reset_mask = 0;
3802 u32 tmp;
6f2043ce 3803
cc066715
AD
3804 /* GRBM_STATUS */
3805 tmp = RREG32(GRBM_STATUS);
3806 if (tmp & (PA_BUSY | SC_BUSY |
3807 BCI_BUSY | SX_BUSY |
3808 TA_BUSY | VGT_BUSY |
3809 DB_BUSY | CB_BUSY |
3810 GDS_BUSY | SPI_BUSY |
3811 IA_BUSY | IA_BUSY_NO_DMA))
3812 reset_mask |= RADEON_RESET_GFX;
3813
3814 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3815 reset_mask |= RADEON_RESET_CP;
3816
3817 /* GRBM_STATUS2 */
3818 tmp = RREG32(GRBM_STATUS2);
3819 if (tmp & RLC_BUSY)
3820 reset_mask |= RADEON_RESET_RLC;
3821
3822 /* SDMA0_STATUS_REG */
3823 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3824 if (!(tmp & SDMA_IDLE))
3825 reset_mask |= RADEON_RESET_DMA;
3826
3827 /* SDMA1_STATUS_REG */
3828 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3829 if (!(tmp & SDMA_IDLE))
3830 reset_mask |= RADEON_RESET_DMA1;
3831
3832 /* SRBM_STATUS2 */
3833 tmp = RREG32(SRBM_STATUS2);
3834 if (tmp & SDMA_BUSY)
3835 reset_mask |= RADEON_RESET_DMA;
3836
3837 if (tmp & SDMA1_BUSY)
3838 reset_mask |= RADEON_RESET_DMA1;
3839
3840 /* SRBM_STATUS */
3841 tmp = RREG32(SRBM_STATUS);
3842
3843 if (tmp & IH_BUSY)
3844 reset_mask |= RADEON_RESET_IH;
3845
3846 if (tmp & SEM_BUSY)
3847 reset_mask |= RADEON_RESET_SEM;
3848
3849 if (tmp & GRBM_RQ_PENDING)
3850 reset_mask |= RADEON_RESET_GRBM;
3851
3852 if (tmp & VMC_BUSY)
3853 reset_mask |= RADEON_RESET_VMC;
3854
3855 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3856 MCC_BUSY | MCD_BUSY))
3857 reset_mask |= RADEON_RESET_MC;
3858
3859 if (evergreen_is_display_hung(rdev))
3860 reset_mask |= RADEON_RESET_DISPLAY;
3861
3862 /* Skip MC reset as it's mostly likely not hung, just busy */
3863 if (reset_mask & RADEON_RESET_MC) {
3864 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3865 reset_mask &= ~RADEON_RESET_MC;
3866 }
3867
3868 return reset_mask;
6f2043ce
AD
3869}
3870
3871/**
cc066715 3872 * cik_gpu_soft_reset - soft reset GPU
6f2043ce
AD
3873 *
3874 * @rdev: radeon_device pointer
cc066715 3875 * @reset_mask: mask of which blocks to reset
6f2043ce 3876 *
cc066715 3877 * Soft reset the blocks specified in @reset_mask.
6f2043ce 3878 */
cc066715 3879static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
6f2043ce
AD
3880{
3881 struct evergreen_mc_save save;
cc066715
AD
3882 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3883 u32 tmp;
3884
3885 if (reset_mask == 0)
3886 return;
3887
3888 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3889
3890 cik_print_gpu_status_regs(rdev);
3891 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3892 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3893 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3894 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3895
3896 /* stop the rlc */
3897 cik_rlc_stop(rdev);
3898
3899 /* Disable GFX parsing/prefetching */
3900 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3901
3902 /* Disable MEC parsing/prefetching */
3903 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3904
3905 if (reset_mask & RADEON_RESET_DMA) {
3906 /* sdma0 */
3907 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3908 tmp |= SDMA_HALT;
3909 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3910 }
3911 if (reset_mask & RADEON_RESET_DMA1) {
3912 /* sdma1 */
3913 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3914 tmp |= SDMA_HALT;
3915 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3916 }
6f2043ce 3917
6f2043ce 3918 evergreen_mc_stop(rdev, &save);
cc066715 3919 if (evergreen_mc_wait_for_idle(rdev)) {
6f2043ce
AD
3920 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3921 }
6f2043ce 3922
cc066715
AD
3923 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3924 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3925
3926 if (reset_mask & RADEON_RESET_CP) {
3927 grbm_soft_reset |= SOFT_RESET_CP;
3928
3929 srbm_soft_reset |= SOFT_RESET_GRBM;
3930 }
3931
3932 if (reset_mask & RADEON_RESET_DMA)
3933 srbm_soft_reset |= SOFT_RESET_SDMA;
3934
3935 if (reset_mask & RADEON_RESET_DMA1)
3936 srbm_soft_reset |= SOFT_RESET_SDMA1;
3937
3938 if (reset_mask & RADEON_RESET_DISPLAY)
3939 srbm_soft_reset |= SOFT_RESET_DC;
3940
3941 if (reset_mask & RADEON_RESET_RLC)
3942 grbm_soft_reset |= SOFT_RESET_RLC;
3943
3944 if (reset_mask & RADEON_RESET_SEM)
3945 srbm_soft_reset |= SOFT_RESET_SEM;
3946
3947 if (reset_mask & RADEON_RESET_IH)
3948 srbm_soft_reset |= SOFT_RESET_IH;
3949
3950 if (reset_mask & RADEON_RESET_GRBM)
3951 srbm_soft_reset |= SOFT_RESET_GRBM;
3952
3953 if (reset_mask & RADEON_RESET_VMC)
3954 srbm_soft_reset |= SOFT_RESET_VMC;
3955
3956 if (!(rdev->flags & RADEON_IS_IGP)) {
3957 if (reset_mask & RADEON_RESET_MC)
3958 srbm_soft_reset |= SOFT_RESET_MC;
3959 }
3960
3961 if (grbm_soft_reset) {
3962 tmp = RREG32(GRBM_SOFT_RESET);
3963 tmp |= grbm_soft_reset;
3964 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3965 WREG32(GRBM_SOFT_RESET, tmp);
3966 tmp = RREG32(GRBM_SOFT_RESET);
3967
3968 udelay(50);
3969
3970 tmp &= ~grbm_soft_reset;
3971 WREG32(GRBM_SOFT_RESET, tmp);
3972 tmp = RREG32(GRBM_SOFT_RESET);
3973 }
3974
3975 if (srbm_soft_reset) {
3976 tmp = RREG32(SRBM_SOFT_RESET);
3977 tmp |= srbm_soft_reset;
3978 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3979 WREG32(SRBM_SOFT_RESET, tmp);
3980 tmp = RREG32(SRBM_SOFT_RESET);
3981
3982 udelay(50);
3983
3984 tmp &= ~srbm_soft_reset;
3985 WREG32(SRBM_SOFT_RESET, tmp);
3986 tmp = RREG32(SRBM_SOFT_RESET);
3987 }
6f2043ce 3988
6f2043ce
AD
3989 /* Wait a little for things to settle down */
3990 udelay(50);
cc066715 3991
6f2043ce 3992 evergreen_mc_resume(rdev, &save);
cc066715
AD
3993 udelay(50);
3994
3995 cik_print_gpu_status_regs(rdev);
6f2043ce
AD
3996}
3997
3998/**
cc066715 3999 * cik_asic_reset - soft reset GPU
6f2043ce
AD
4000 *
4001 * @rdev: radeon_device pointer
4002 *
cc066715
AD
4003 * Look up which blocks are hung and attempt
4004 * to reset them.
6f2043ce
AD
4005 * Returns 0 for success.
4006 */
4007int cik_asic_reset(struct radeon_device *rdev)
4008{
cc066715 4009 u32 reset_mask;
6f2043ce 4010
cc066715
AD
4011 reset_mask = cik_gpu_check_soft_reset(rdev);
4012
4013 if (reset_mask)
4014 r600_set_bios_scratch_engine_hung(rdev, true);
4015
4016 cik_gpu_soft_reset(rdev, reset_mask);
6f2043ce 4017
cc066715
AD
4018 reset_mask = cik_gpu_check_soft_reset(rdev);
4019
4020 if (!reset_mask)
4021 r600_set_bios_scratch_engine_hung(rdev, false);
4022
4023 return 0;
4024}
4025
4026/**
4027 * cik_gfx_is_lockup - check if the 3D engine is locked up
4028 *
4029 * @rdev: radeon_device pointer
4030 * @ring: radeon_ring structure holding ring information
4031 *
4032 * Check if the 3D engine is locked up (CIK).
4033 * Returns true if the engine is locked, false if not.
4034 */
4035bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4036{
4037 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4038
4039 if (!(reset_mask & (RADEON_RESET_GFX |
4040 RADEON_RESET_COMPUTE |
4041 RADEON_RESET_CP))) {
4042 radeon_ring_lockup_update(ring);
4043 return false;
4044 }
4045 /* force CP activities */
4046 radeon_ring_force_activity(rdev, ring);
4047 return radeon_ring_test_lockup(rdev, ring);
6f2043ce 4048}
1c49165d 4049
21a93e13
AD
4050/**
4051 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4052 *
4053 * @rdev: radeon_device pointer
4054 * @ring: radeon_ring structure holding ring information
4055 *
4056 * Check if the async DMA engine is locked up (CIK).
4057 * Returns true if the engine appears to be locked up, false if not.
4058 */
4059bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4060{
cc066715
AD
4061 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4062 u32 mask;
21a93e13
AD
4063
4064 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
cc066715 4065 mask = RADEON_RESET_DMA;
21a93e13 4066 else
cc066715
AD
4067 mask = RADEON_RESET_DMA1;
4068
4069 if (!(reset_mask & mask)) {
21a93e13
AD
4070 radeon_ring_lockup_update(ring);
4071 return false;
4072 }
4073 /* force ring activities */
4074 radeon_ring_force_activity(rdev, ring);
4075 return radeon_ring_test_lockup(rdev, ring);
4076}
4077
1c49165d
AD
4078/* MC */
4079/**
4080 * cik_mc_program - program the GPU memory controller
4081 *
4082 * @rdev: radeon_device pointer
4083 *
4084 * Set the location of vram, gart, and AGP in the GPU's
4085 * physical address space (CIK).
4086 */
4087static void cik_mc_program(struct radeon_device *rdev)
4088{
4089 struct evergreen_mc_save save;
4090 u32 tmp;
4091 int i, j;
4092
4093 /* Initialize HDP */
4094 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4095 WREG32((0x2c14 + j), 0x00000000);
4096 WREG32((0x2c18 + j), 0x00000000);
4097 WREG32((0x2c1c + j), 0x00000000);
4098 WREG32((0x2c20 + j), 0x00000000);
4099 WREG32((0x2c24 + j), 0x00000000);
4100 }
4101 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4102
4103 evergreen_mc_stop(rdev, &save);
4104 if (radeon_mc_wait_for_idle(rdev)) {
4105 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4106 }
4107 /* Lockout access through VGA aperture*/
4108 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4109 /* Update configuration */
4110 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4111 rdev->mc.vram_start >> 12);
4112 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4113 rdev->mc.vram_end >> 12);
4114 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4115 rdev->vram_scratch.gpu_addr >> 12);
4116 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4117 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4118 WREG32(MC_VM_FB_LOCATION, tmp);
4119 /* XXX double check these! */
4120 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4121 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4122 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4123 WREG32(MC_VM_AGP_BASE, 0);
4124 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4125 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4126 if (radeon_mc_wait_for_idle(rdev)) {
4127 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4128 }
4129 evergreen_mc_resume(rdev, &save);
4130 /* we need to own VRAM, so turn off the VGA renderer here
4131 * to stop it overwriting our objects */
4132 rv515_vga_render_disable(rdev);
4133}
4134
4135/**
4136 * cik_mc_init - initialize the memory controller driver params
4137 *
4138 * @rdev: radeon_device pointer
4139 *
4140 * Look up the amount of vram, vram width, and decide how to place
4141 * vram and gart within the GPU's physical address space (CIK).
4142 * Returns 0 for success.
4143 */
4144static int cik_mc_init(struct radeon_device *rdev)
4145{
4146 u32 tmp;
4147 int chansize, numchan;
4148
4149 /* Get VRAM informations */
4150 rdev->mc.vram_is_ddr = true;
4151 tmp = RREG32(MC_ARB_RAMCFG);
4152 if (tmp & CHANSIZE_MASK) {
4153 chansize = 64;
4154 } else {
4155 chansize = 32;
4156 }
4157 tmp = RREG32(MC_SHARED_CHMAP);
4158 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4159 case 0:
4160 default:
4161 numchan = 1;
4162 break;
4163 case 1:
4164 numchan = 2;
4165 break;
4166 case 2:
4167 numchan = 4;
4168 break;
4169 case 3:
4170 numchan = 8;
4171 break;
4172 case 4:
4173 numchan = 3;
4174 break;
4175 case 5:
4176 numchan = 6;
4177 break;
4178 case 6:
4179 numchan = 10;
4180 break;
4181 case 7:
4182 numchan = 12;
4183 break;
4184 case 8:
4185 numchan = 16;
4186 break;
4187 }
4188 rdev->mc.vram_width = numchan * chansize;
4189 /* Could aper size report 0 ? */
4190 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4191 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4192 /* size in MB on si */
4193 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4195 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4196 si_vram_gtt_location(rdev, &rdev->mc);
4197 radeon_update_bandwidth_info(rdev);
4198
4199 return 0;
4200}
4201
4202/*
4203 * GART
4204 * VMID 0 is the physical GPU addresses as used by the kernel.
4205 * VMIDs 1-15 are used for userspace clients and are handled
4206 * by the radeon vm/hsa code.
4207 */
4208/**
4209 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4210 *
4211 * @rdev: radeon_device pointer
4212 *
4213 * Flush the TLB for the VMID 0 page table (CIK).
4214 */
4215void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4216{
4217 /* flush hdp cache */
4218 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4219
4220 /* bits 0-15 are the VM contexts0-15 */
4221 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4222}
4223
4224/**
4225 * cik_pcie_gart_enable - gart enable
4226 *
4227 * @rdev: radeon_device pointer
4228 *
4229 * This sets up the TLBs, programs the page tables for VMID0,
4230 * sets up the hw for VMIDs 1-15 which are allocated on
4231 * demand, and sets up the global locations for the LDS, GDS,
4232 * and GPUVM for FSA64 clients (CIK).
4233 * Returns 0 for success, errors for failure.
4234 */
4235static int cik_pcie_gart_enable(struct radeon_device *rdev)
4236{
4237 int r, i;
4238
4239 if (rdev->gart.robj == NULL) {
4240 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4241 return -EINVAL;
4242 }
4243 r = radeon_gart_table_vram_pin(rdev);
4244 if (r)
4245 return r;
4246 radeon_gart_restore(rdev);
4247 /* Setup TLB control */
4248 WREG32(MC_VM_MX_L1_TLB_CNTL,
4249 (0xA << 7) |
4250 ENABLE_L1_TLB |
4251 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4252 ENABLE_ADVANCED_DRIVER_MODEL |
4253 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4254 /* Setup L2 cache */
4255 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4256 ENABLE_L2_FRAGMENT_PROCESSING |
4257 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4258 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4259 EFFECTIVE_L2_QUEUE_SIZE(7) |
4260 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4261 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4262 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4263 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4264 /* setup context0 */
4265 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4266 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4267 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4268 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4269 (u32)(rdev->dummy_page.addr >> 12));
4270 WREG32(VM_CONTEXT0_CNTL2, 0);
4271 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4272 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4273
4274 WREG32(0x15D4, 0);
4275 WREG32(0x15D8, 0);
4276 WREG32(0x15DC, 0);
4277
4278 /* empty context1-15 */
4279 /* FIXME start with 4G, once using 2 level pt switch to full
4280 * vm size space
4281 */
4282 /* set vm size, must be a multiple of 4 */
4283 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4284 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4285 for (i = 1; i < 16; i++) {
4286 if (i < 8)
4287 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4288 rdev->gart.table_addr >> 12);
4289 else
4290 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4291 rdev->gart.table_addr >> 12);
4292 }
4293
4294 /* enable context1-15 */
4295 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4296 (u32)(rdev->dummy_page.addr >> 12));
a00024b0 4297 WREG32(VM_CONTEXT1_CNTL2, 4);
1c49165d 4298 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
a00024b0
AD
4299 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4303 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4304 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4305 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4307 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4308 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4309 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1c49165d
AD
4311
4312 /* TC cache setup ??? */
4313 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4314 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4315 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4316
4317 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4318 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4319 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4320 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4321 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4322
4323 WREG32(TC_CFG_L1_VOLATILE, 0);
4324 WREG32(TC_CFG_L2_VOLATILE, 0);
4325
4326 if (rdev->family == CHIP_KAVERI) {
4327 u32 tmp = RREG32(CHUB_CONTROL);
4328 tmp &= ~BYPASS_VM;
4329 WREG32(CHUB_CONTROL, tmp);
4330 }
4331
4332 /* XXX SH_MEM regs */
4333 /* where to put LDS, scratch, GPUVM in FSA64 space */
4334 for (i = 0; i < 16; i++) {
b556b12e 4335 cik_srbm_select(rdev, 0, 0, 0, i);
21a93e13 4336 /* CP and shaders */
1c49165d
AD
4337 WREG32(SH_MEM_CONFIG, 0);
4338 WREG32(SH_MEM_APE1_BASE, 1);
4339 WREG32(SH_MEM_APE1_LIMIT, 0);
4340 WREG32(SH_MEM_BASES, 0);
21a93e13
AD
4341 /* SDMA GFX */
4342 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4343 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4344 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4345 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4346 /* XXX SDMA RLC - todo */
1c49165d 4347 }
b556b12e 4348 cik_srbm_select(rdev, 0, 0, 0, 0);
1c49165d
AD
4349
4350 cik_pcie_gart_tlb_flush(rdev);
4351 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352 (unsigned)(rdev->mc.gtt_size >> 20),
4353 (unsigned long long)rdev->gart.table_addr);
4354 rdev->gart.ready = true;
4355 return 0;
4356}
4357
4358/**
4359 * cik_pcie_gart_disable - gart disable
4360 *
4361 * @rdev: radeon_device pointer
4362 *
4363 * This disables all VM page table (CIK).
4364 */
4365static void cik_pcie_gart_disable(struct radeon_device *rdev)
4366{
4367 /* Disable all tables */
4368 WREG32(VM_CONTEXT0_CNTL, 0);
4369 WREG32(VM_CONTEXT1_CNTL, 0);
4370 /* Setup TLB control */
4371 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4372 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4373 /* Setup L2 cache */
4374 WREG32(VM_L2_CNTL,
4375 ENABLE_L2_FRAGMENT_PROCESSING |
4376 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4377 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4378 EFFECTIVE_L2_QUEUE_SIZE(7) |
4379 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4380 WREG32(VM_L2_CNTL2, 0);
4381 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4382 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4383 radeon_gart_table_vram_unpin(rdev);
4384}
4385
4386/**
4387 * cik_pcie_gart_fini - vm fini callback
4388 *
4389 * @rdev: radeon_device pointer
4390 *
4391 * Tears down the driver GART/VM setup (CIK).
4392 */
4393static void cik_pcie_gart_fini(struct radeon_device *rdev)
4394{
4395 cik_pcie_gart_disable(rdev);
4396 radeon_gart_table_vram_free(rdev);
4397 radeon_gart_fini(rdev);
4398}
4399
4400/* vm parser */
4401/**
4402 * cik_ib_parse - vm ib_parse callback
4403 *
4404 * @rdev: radeon_device pointer
4405 * @ib: indirect buffer pointer
4406 *
4407 * CIK uses hw IB checking so this is a nop (CIK).
4408 */
4409int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4410{
4411 return 0;
4412}
4413
4414/*
4415 * vm
4416 * VMID 0 is the physical GPU addresses as used by the kernel.
4417 * VMIDs 1-15 are used for userspace clients and are handled
4418 * by the radeon vm/hsa code.
4419 */
4420/**
4421 * cik_vm_init - cik vm init callback
4422 *
4423 * @rdev: radeon_device pointer
4424 *
4425 * Inits cik specific vm parameters (number of VMs, base of vram for
4426 * VMIDs 1-15) (CIK).
4427 * Returns 0 for success.
4428 */
4429int cik_vm_init(struct radeon_device *rdev)
4430{
4431 /* number of VMs */
4432 rdev->vm_manager.nvm = 16;
4433 /* base offset of vram pages */
4434 if (rdev->flags & RADEON_IS_IGP) {
4435 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4436 tmp <<= 22;
4437 rdev->vm_manager.vram_base_offset = tmp;
4438 } else
4439 rdev->vm_manager.vram_base_offset = 0;
4440
4441 return 0;
4442}
4443
4444/**
4445 * cik_vm_fini - cik vm fini callback
4446 *
4447 * @rdev: radeon_device pointer
4448 *
4449 * Tear down any asic specific VM setup (CIK).
4450 */
4451void cik_vm_fini(struct radeon_device *rdev)
4452{
4453}
4454
f96ab484
AD
4455/**
4456 * cik_vm_flush - cik vm flush using the CP
4457 *
4458 * @rdev: radeon_device pointer
4459 *
4460 * Update the page table base and flush the VM TLB
4461 * using the CP (CIK).
4462 */
4463void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4464{
4465 struct radeon_ring *ring = &rdev->ring[ridx];
4466
4467 if (vm == NULL)
4468 return;
4469
4470 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4471 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4472 WRITE_DATA_DST_SEL(0)));
4473 if (vm->id < 8) {
4474 radeon_ring_write(ring,
4475 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4476 } else {
4477 radeon_ring_write(ring,
4478 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4479 }
4480 radeon_ring_write(ring, 0);
4481 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4482
4483 /* update SH_MEM_* regs */
4484 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4485 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4486 WRITE_DATA_DST_SEL(0)));
4487 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4488 radeon_ring_write(ring, 0);
4489 radeon_ring_write(ring, VMID(vm->id));
4490
4491 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4492 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493 WRITE_DATA_DST_SEL(0)));
4494 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4495 radeon_ring_write(ring, 0);
4496
4497 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4498 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4499 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4500 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4501
4502 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4503 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4504 WRITE_DATA_DST_SEL(0)));
4505 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4506 radeon_ring_write(ring, 0);
4507 radeon_ring_write(ring, VMID(0));
4508
4509 /* HDP flush */
4510 /* We should be using the WAIT_REG_MEM packet here like in
4511 * cik_fence_ring_emit(), but it causes the CP to hang in this
4512 * context...
4513 */
4514 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516 WRITE_DATA_DST_SEL(0)));
4517 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4518 radeon_ring_write(ring, 0);
4519 radeon_ring_write(ring, 0);
4520
4521 /* bits 0-15 are the VM contexts0-15 */
4522 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4523 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4524 WRITE_DATA_DST_SEL(0)));
4525 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4526 radeon_ring_write(ring, 0);
4527 radeon_ring_write(ring, 1 << vm->id);
4528
b07fdd38
AD
4529 /* compute doesn't have PFP */
4530 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4531 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4532 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533 radeon_ring_write(ring, 0x0);
4534 }
f96ab484
AD
4535}
4536
d0e092d9
AD
4537/**
4538 * cik_vm_set_page - update the page tables using sDMA
4539 *
4540 * @rdev: radeon_device pointer
4541 * @ib: indirect buffer to fill with commands
4542 * @pe: addr of the page entry
4543 * @addr: dst addr to write into pe
4544 * @count: number of page entries to update
4545 * @incr: increase next addr by incr bytes
4546 * @flags: access flags
4547 *
4548 * Update the page tables using CP or sDMA (CIK).
4549 */
4550void cik_vm_set_page(struct radeon_device *rdev,
4551 struct radeon_ib *ib,
4552 uint64_t pe,
4553 uint64_t addr, unsigned count,
4554 uint32_t incr, uint32_t flags)
4555{
4556 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4557 uint64_t value;
4558 unsigned ndw;
4559
4560 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4561 /* CP */
4562 while (count) {
4563 ndw = 2 + count * 2;
4564 if (ndw > 0x3FFE)
4565 ndw = 0x3FFE;
4566
4567 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4568 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4569 WRITE_DATA_DST_SEL(1));
4570 ib->ptr[ib->length_dw++] = pe;
4571 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4572 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4573 if (flags & RADEON_VM_PAGE_SYSTEM) {
4574 value = radeon_vm_map_gart(rdev, addr);
4575 value &= 0xFFFFFFFFFFFFF000ULL;
4576 } else if (flags & RADEON_VM_PAGE_VALID) {
4577 value = addr;
4578 } else {
4579 value = 0;
4580 }
4581 addr += incr;
4582 value |= r600_flags;
4583 ib->ptr[ib->length_dw++] = value;
4584 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4585 }
4586 }
4587 } else {
4588 /* DMA */
4589 if (flags & RADEON_VM_PAGE_SYSTEM) {
4590 while (count) {
4591 ndw = count * 2;
4592 if (ndw > 0xFFFFE)
4593 ndw = 0xFFFFE;
4594
4595 /* for non-physically contiguous pages (system) */
4596 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4597 ib->ptr[ib->length_dw++] = pe;
4598 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4599 ib->ptr[ib->length_dw++] = ndw;
4600 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4601 if (flags & RADEON_VM_PAGE_SYSTEM) {
4602 value = radeon_vm_map_gart(rdev, addr);
4603 value &= 0xFFFFFFFFFFFFF000ULL;
4604 } else if (flags & RADEON_VM_PAGE_VALID) {
4605 value = addr;
4606 } else {
4607 value = 0;
4608 }
4609 addr += incr;
4610 value |= r600_flags;
4611 ib->ptr[ib->length_dw++] = value;
4612 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4613 }
4614 }
4615 } else {
4616 while (count) {
4617 ndw = count;
4618 if (ndw > 0x7FFFF)
4619 ndw = 0x7FFFF;
4620
4621 if (flags & RADEON_VM_PAGE_VALID)
4622 value = addr;
4623 else
4624 value = 0;
4625 /* for physically contiguous pages (vram) */
4626 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4627 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4628 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4629 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4630 ib->ptr[ib->length_dw++] = 0;
4631 ib->ptr[ib->length_dw++] = value; /* value */
4632 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4633 ib->ptr[ib->length_dw++] = incr; /* increment size */
4634 ib->ptr[ib->length_dw++] = 0;
4635 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4636 pe += ndw * 8;
4637 addr += ndw * incr;
4638 count -= ndw;
4639 }
4640 }
4641 while (ib->length_dw & 0x7)
4642 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4643 }
4644}
4645
605de6b9
AD
4646/**
4647 * cik_dma_vm_flush - cik vm flush using sDMA
4648 *
4649 * @rdev: radeon_device pointer
4650 *
4651 * Update the page table base and flush the VM TLB
4652 * using sDMA (CIK).
4653 */
4654void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4655{
4656 struct radeon_ring *ring = &rdev->ring[ridx];
4657 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4658 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4659 u32 ref_and_mask;
4660
4661 if (vm == NULL)
4662 return;
4663
4664 if (ridx == R600_RING_TYPE_DMA_INDEX)
4665 ref_and_mask = SDMA0;
4666 else
4667 ref_and_mask = SDMA1;
4668
4669 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4670 if (vm->id < 8) {
4671 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4672 } else {
4673 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4674 }
4675 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4676
4677 /* update SH_MEM_* regs */
4678 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4680 radeon_ring_write(ring, VMID(vm->id));
4681
4682 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4683 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4684 radeon_ring_write(ring, 0);
4685
4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4688 radeon_ring_write(ring, 0);
4689
4690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4692 radeon_ring_write(ring, 1);
4693
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4696 radeon_ring_write(ring, 0);
4697
4698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4700 radeon_ring_write(ring, VMID(0));
4701
4702 /* flush HDP */
4703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4704 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4705 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4706 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4707 radeon_ring_write(ring, ref_and_mask); /* MASK */
4708 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4709
4710 /* flush TLB */
4711 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713 radeon_ring_write(ring, 1 << vm->id);
4714}
4715
f6796cae
AD
4716/*
4717 * RLC
4718 * The RLC is a multi-purpose microengine that handles a
4719 * variety of functions, the most important of which is
4720 * the interrupt controller.
4721 */
4722/**
4723 * cik_rlc_stop - stop the RLC ME
4724 *
4725 * @rdev: radeon_device pointer
4726 *
4727 * Halt the RLC ME (MicroEngine) (CIK).
4728 */
4729static void cik_rlc_stop(struct radeon_device *rdev)
4730{
4731 int i, j, k;
4732 u32 mask, tmp;
4733
4734 tmp = RREG32(CP_INT_CNTL_RING0);
4735 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4736 WREG32(CP_INT_CNTL_RING0, tmp);
4737
4738 RREG32(CB_CGTT_SCLK_CTRL);
4739 RREG32(CB_CGTT_SCLK_CTRL);
4740 RREG32(CB_CGTT_SCLK_CTRL);
4741 RREG32(CB_CGTT_SCLK_CTRL);
4742
4743 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4744 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4745
4746 WREG32(RLC_CNTL, 0);
4747
4748 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4749 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4750 cik_select_se_sh(rdev, i, j);
4751 for (k = 0; k < rdev->usec_timeout; k++) {
4752 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4753 break;
4754 udelay(1);
4755 }
4756 }
4757 }
4758 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4759
4760 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4761 for (k = 0; k < rdev->usec_timeout; k++) {
4762 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4763 break;
4764 udelay(1);
4765 }
4766}
4767
4768/**
4769 * cik_rlc_start - start the RLC ME
4770 *
4771 * @rdev: radeon_device pointer
4772 *
4773 * Unhalt the RLC ME (MicroEngine) (CIK).
4774 */
4775static void cik_rlc_start(struct radeon_device *rdev)
4776{
4777 u32 tmp;
4778
4779 WREG32(RLC_CNTL, RLC_ENABLE);
4780
4781 tmp = RREG32(CP_INT_CNTL_RING0);
4782 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4783 WREG32(CP_INT_CNTL_RING0, tmp);
4784
4785 udelay(50);
4786}
4787
4788/**
4789 * cik_rlc_resume - setup the RLC hw
4790 *
4791 * @rdev: radeon_device pointer
4792 *
4793 * Initialize the RLC registers, load the ucode,
4794 * and start the RLC (CIK).
4795 * Returns 0 for success, -EINVAL if the ucode is not available.
4796 */
4797static int cik_rlc_resume(struct radeon_device *rdev)
4798{
4799 u32 i, size;
4800 u32 clear_state_info[3];
4801 const __be32 *fw_data;
4802
4803 if (!rdev->rlc_fw)
4804 return -EINVAL;
4805
4806 switch (rdev->family) {
4807 case CHIP_BONAIRE:
4808 default:
4809 size = BONAIRE_RLC_UCODE_SIZE;
4810 break;
4811 case CHIP_KAVERI:
4812 size = KV_RLC_UCODE_SIZE;
4813 break;
4814 case CHIP_KABINI:
4815 size = KB_RLC_UCODE_SIZE;
4816 break;
4817 }
4818
4819 cik_rlc_stop(rdev);
4820
4821 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4822 RREG32(GRBM_SOFT_RESET);
4823 udelay(50);
4824 WREG32(GRBM_SOFT_RESET, 0);
4825 RREG32(GRBM_SOFT_RESET);
4826 udelay(50);
4827
4828 WREG32(RLC_LB_CNTR_INIT, 0);
4829 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4830
4831 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4832 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4833 WREG32(RLC_LB_PARAMS, 0x00600408);
4834 WREG32(RLC_LB_CNTL, 0x80000004);
4835
4836 WREG32(RLC_MC_CNTL, 0);
4837 WREG32(RLC_UCODE_CNTL, 0);
4838
4839 fw_data = (const __be32 *)rdev->rlc_fw->data;
4840 WREG32(RLC_GPM_UCODE_ADDR, 0);
4841 for (i = 0; i < size; i++)
4842 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4843 WREG32(RLC_GPM_UCODE_ADDR, 0);
4844
4845 /* XXX */
4846 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4847 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4848 clear_state_info[2] = 0;//cik_default_size;
4849 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4850 for (i = 0; i < 3; i++)
4851 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4852 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4853
4854 cik_rlc_start(rdev);
4855
4856 return 0;
4857}
a59781bb
AD
4858
4859/*
4860 * Interrupts
4861 * Starting with r6xx, interrupts are handled via a ring buffer.
4862 * Ring buffers are areas of GPU accessible memory that the GPU
4863 * writes interrupt vectors into and the host reads vectors out of.
4864 * There is a rptr (read pointer) that determines where the
4865 * host is currently reading, and a wptr (write pointer)
4866 * which determines where the GPU has written. When the
4867 * pointers are equal, the ring is idle. When the GPU
4868 * writes vectors to the ring buffer, it increments the
4869 * wptr. When there is an interrupt, the host then starts
4870 * fetching commands and processing them until the pointers are
4871 * equal again at which point it updates the rptr.
4872 */
4873
4874/**
4875 * cik_enable_interrupts - Enable the interrupt ring buffer
4876 *
4877 * @rdev: radeon_device pointer
4878 *
4879 * Enable the interrupt ring buffer (CIK).
4880 */
4881static void cik_enable_interrupts(struct radeon_device *rdev)
4882{
4883 u32 ih_cntl = RREG32(IH_CNTL);
4884 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4885
4886 ih_cntl |= ENABLE_INTR;
4887 ih_rb_cntl |= IH_RB_ENABLE;
4888 WREG32(IH_CNTL, ih_cntl);
4889 WREG32(IH_RB_CNTL, ih_rb_cntl);
4890 rdev->ih.enabled = true;
4891}
4892
4893/**
4894 * cik_disable_interrupts - Disable the interrupt ring buffer
4895 *
4896 * @rdev: radeon_device pointer
4897 *
4898 * Disable the interrupt ring buffer (CIK).
4899 */
4900static void cik_disable_interrupts(struct radeon_device *rdev)
4901{
4902 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4903 u32 ih_cntl = RREG32(IH_CNTL);
4904
4905 ih_rb_cntl &= ~IH_RB_ENABLE;
4906 ih_cntl &= ~ENABLE_INTR;
4907 WREG32(IH_RB_CNTL, ih_rb_cntl);
4908 WREG32(IH_CNTL, ih_cntl);
4909 /* set rptr, wptr to 0 */
4910 WREG32(IH_RB_RPTR, 0);
4911 WREG32(IH_RB_WPTR, 0);
4912 rdev->ih.enabled = false;
4913 rdev->ih.rptr = 0;
4914}
4915
4916/**
4917 * cik_disable_interrupt_state - Disable all interrupt sources
4918 *
4919 * @rdev: radeon_device pointer
4920 *
4921 * Clear all interrupt enable bits used by the driver (CIK).
4922 */
4923static void cik_disable_interrupt_state(struct radeon_device *rdev)
4924{
4925 u32 tmp;
4926
4927 /* gfx ring */
4928 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
21a93e13
AD
4929 /* sdma */
4930 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4931 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4932 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4933 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
a59781bb
AD
4934 /* compute queues */
4935 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4936 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4937 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4938 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4939 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4940 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4941 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4942 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4943 /* grbm */
4944 WREG32(GRBM_INT_CNTL, 0);
4945 /* vline/vblank, etc. */
4946 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4948 if (rdev->num_crtc >= 4) {
4949 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4950 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4951 }
4952 if (rdev->num_crtc >= 6) {
4953 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4954 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4955 }
4956
4957 /* dac hotplug */
4958 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4959
4960 /* digital hotplug */
4961 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4962 WREG32(DC_HPD1_INT_CONTROL, tmp);
4963 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4964 WREG32(DC_HPD2_INT_CONTROL, tmp);
4965 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4966 WREG32(DC_HPD3_INT_CONTROL, tmp);
4967 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4968 WREG32(DC_HPD4_INT_CONTROL, tmp);
4969 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4970 WREG32(DC_HPD5_INT_CONTROL, tmp);
4971 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4972 WREG32(DC_HPD6_INT_CONTROL, tmp);
4973
4974}
4975
4976/**
4977 * cik_irq_init - init and enable the interrupt ring
4978 *
4979 * @rdev: radeon_device pointer
4980 *
4981 * Allocate a ring buffer for the interrupt controller,
4982 * enable the RLC, disable interrupts, enable the IH
4983 * ring buffer and enable it (CIK).
4984 * Called at device load and reume.
4985 * Returns 0 for success, errors for failure.
4986 */
4987static int cik_irq_init(struct radeon_device *rdev)
4988{
4989 int ret = 0;
4990 int rb_bufsz;
4991 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4992
4993 /* allocate ring */
4994 ret = r600_ih_ring_alloc(rdev);
4995 if (ret)
4996 return ret;
4997
4998 /* disable irqs */
4999 cik_disable_interrupts(rdev);
5000
5001 /* init rlc */
5002 ret = cik_rlc_resume(rdev);
5003 if (ret) {
5004 r600_ih_ring_fini(rdev);
5005 return ret;
5006 }
5007
5008 /* setup interrupt control */
5009 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5010 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5011 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5012 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5013 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5014 */
5015 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5016 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5017 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5018 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5019
5020 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5021 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5022
5023 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5024 IH_WPTR_OVERFLOW_CLEAR |
5025 (rb_bufsz << 1));
5026
5027 if (rdev->wb.enabled)
5028 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5029
5030 /* set the writeback address whether it's enabled or not */
5031 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5032 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5033
5034 WREG32(IH_RB_CNTL, ih_rb_cntl);
5035
5036 /* set rptr, wptr to 0 */
5037 WREG32(IH_RB_RPTR, 0);
5038 WREG32(IH_RB_WPTR, 0);
5039
5040 /* Default settings for IH_CNTL (disabled at first) */
5041 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5042 /* RPTR_REARM only works if msi's are enabled */
5043 if (rdev->msi_enabled)
5044 ih_cntl |= RPTR_REARM;
5045 WREG32(IH_CNTL, ih_cntl);
5046
5047 /* force the active interrupt state to all disabled */
5048 cik_disable_interrupt_state(rdev);
5049
5050 pci_set_master(rdev->pdev);
5051
5052 /* enable irqs */
5053 cik_enable_interrupts(rdev);
5054
5055 return ret;
5056}
5057
5058/**
5059 * cik_irq_set - enable/disable interrupt sources
5060 *
5061 * @rdev: radeon_device pointer
5062 *
5063 * Enable interrupt sources on the GPU (vblanks, hpd,
5064 * etc.) (CIK).
5065 * Returns 0 for success, errors for failure.
5066 */
5067int cik_irq_set(struct radeon_device *rdev)
5068{
5069 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5070 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
2b0781a6
AD
5071 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5072 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
a59781bb
AD
5073 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5074 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5075 u32 grbm_int_cntl = 0;
21a93e13 5076 u32 dma_cntl, dma_cntl1;
a59781bb
AD
5077
5078 if (!rdev->irq.installed) {
5079 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5080 return -EINVAL;
5081 }
5082 /* don't enable anything if the ih is disabled */
5083 if (!rdev->ih.enabled) {
5084 cik_disable_interrupts(rdev);
5085 /* force the active interrupt state to all disabled */
5086 cik_disable_interrupt_state(rdev);
5087 return 0;
5088 }
5089
5090 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5091 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5092 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5093 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5094 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5095 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5096
21a93e13
AD
5097 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5098 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5099
2b0781a6
AD
5100 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5101 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5102 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5103 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5104 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5105 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5106 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5107 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5108
a59781bb
AD
5109 /* enable CP interrupts on all rings */
5110 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5111 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5112 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5113 }
2b0781a6
AD
5114 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5115 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5116 DRM_DEBUG("si_irq_set: sw int cp1\n");
5117 if (ring->me == 1) {
5118 switch (ring->pipe) {
5119 case 0:
5120 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5121 break;
5122 case 1:
5123 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5124 break;
5125 case 2:
5126 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5127 break;
5128 case 3:
5129 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5130 break;
5131 default:
5132 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5133 break;
5134 }
5135 } else if (ring->me == 2) {
5136 switch (ring->pipe) {
5137 case 0:
5138 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5139 break;
5140 case 1:
5141 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5142 break;
5143 case 2:
5144 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5145 break;
5146 case 3:
5147 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5148 break;
5149 default:
5150 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5151 break;
5152 }
5153 } else {
5154 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5155 }
5156 }
5157 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5158 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5159 DRM_DEBUG("si_irq_set: sw int cp2\n");
5160 if (ring->me == 1) {
5161 switch (ring->pipe) {
5162 case 0:
5163 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5164 break;
5165 case 1:
5166 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5167 break;
5168 case 2:
5169 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5170 break;
5171 case 3:
5172 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5173 break;
5174 default:
5175 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5176 break;
5177 }
5178 } else if (ring->me == 2) {
5179 switch (ring->pipe) {
5180 case 0:
5181 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5182 break;
5183 case 1:
5184 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5185 break;
5186 case 2:
5187 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5188 break;
5189 case 3:
5190 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5191 break;
5192 default:
5193 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5194 break;
5195 }
5196 } else {
5197 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5198 }
5199 }
a59781bb 5200
21a93e13
AD
5201 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5202 DRM_DEBUG("cik_irq_set: sw int dma\n");
5203 dma_cntl |= TRAP_ENABLE;
5204 }
5205
5206 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5207 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5208 dma_cntl1 |= TRAP_ENABLE;
5209 }
5210
a59781bb
AD
5211 if (rdev->irq.crtc_vblank_int[0] ||
5212 atomic_read(&rdev->irq.pflip[0])) {
5213 DRM_DEBUG("cik_irq_set: vblank 0\n");
5214 crtc1 |= VBLANK_INTERRUPT_MASK;
5215 }
5216 if (rdev->irq.crtc_vblank_int[1] ||
5217 atomic_read(&rdev->irq.pflip[1])) {
5218 DRM_DEBUG("cik_irq_set: vblank 1\n");
5219 crtc2 |= VBLANK_INTERRUPT_MASK;
5220 }
5221 if (rdev->irq.crtc_vblank_int[2] ||
5222 atomic_read(&rdev->irq.pflip[2])) {
5223 DRM_DEBUG("cik_irq_set: vblank 2\n");
5224 crtc3 |= VBLANK_INTERRUPT_MASK;
5225 }
5226 if (rdev->irq.crtc_vblank_int[3] ||
5227 atomic_read(&rdev->irq.pflip[3])) {
5228 DRM_DEBUG("cik_irq_set: vblank 3\n");
5229 crtc4 |= VBLANK_INTERRUPT_MASK;
5230 }
5231 if (rdev->irq.crtc_vblank_int[4] ||
5232 atomic_read(&rdev->irq.pflip[4])) {
5233 DRM_DEBUG("cik_irq_set: vblank 4\n");
5234 crtc5 |= VBLANK_INTERRUPT_MASK;
5235 }
5236 if (rdev->irq.crtc_vblank_int[5] ||
5237 atomic_read(&rdev->irq.pflip[5])) {
5238 DRM_DEBUG("cik_irq_set: vblank 5\n");
5239 crtc6 |= VBLANK_INTERRUPT_MASK;
5240 }
5241 if (rdev->irq.hpd[0]) {
5242 DRM_DEBUG("cik_irq_set: hpd 1\n");
5243 hpd1 |= DC_HPDx_INT_EN;
5244 }
5245 if (rdev->irq.hpd[1]) {
5246 DRM_DEBUG("cik_irq_set: hpd 2\n");
5247 hpd2 |= DC_HPDx_INT_EN;
5248 }
5249 if (rdev->irq.hpd[2]) {
5250 DRM_DEBUG("cik_irq_set: hpd 3\n");
5251 hpd3 |= DC_HPDx_INT_EN;
5252 }
5253 if (rdev->irq.hpd[3]) {
5254 DRM_DEBUG("cik_irq_set: hpd 4\n");
5255 hpd4 |= DC_HPDx_INT_EN;
5256 }
5257 if (rdev->irq.hpd[4]) {
5258 DRM_DEBUG("cik_irq_set: hpd 5\n");
5259 hpd5 |= DC_HPDx_INT_EN;
5260 }
5261 if (rdev->irq.hpd[5]) {
5262 DRM_DEBUG("cik_irq_set: hpd 6\n");
5263 hpd6 |= DC_HPDx_INT_EN;
5264 }
5265
5266 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5267
21a93e13
AD
5268 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5269 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5270
2b0781a6
AD
5271 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5272 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5273 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5274 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5275 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5276 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5277 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5278 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5279
a59781bb
AD
5280 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5281
5282 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5283 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5284 if (rdev->num_crtc >= 4) {
5285 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5286 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5287 }
5288 if (rdev->num_crtc >= 6) {
5289 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5291 }
5292
5293 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5294 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5295 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5296 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5297 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5298 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5299
5300 return 0;
5301}
5302
5303/**
5304 * cik_irq_ack - ack interrupt sources
5305 *
5306 * @rdev: radeon_device pointer
5307 *
5308 * Ack interrupt sources on the GPU (vblanks, hpd,
5309 * etc.) (CIK). Certain interrupts sources are sw
5310 * generated and do not require an explicit ack.
5311 */
5312static inline void cik_irq_ack(struct radeon_device *rdev)
5313{
5314 u32 tmp;
5315
5316 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5317 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5318 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5319 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5320 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5321 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5322 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5323
5324 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5325 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5326 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5327 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5328 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5329 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5330 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5331 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5332
5333 if (rdev->num_crtc >= 4) {
5334 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5335 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5336 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5337 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5338 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5339 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5340 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5341 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5342 }
5343
5344 if (rdev->num_crtc >= 6) {
5345 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5349 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5350 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5351 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5352 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5353 }
5354
5355 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5356 tmp = RREG32(DC_HPD1_INT_CONTROL);
5357 tmp |= DC_HPDx_INT_ACK;
5358 WREG32(DC_HPD1_INT_CONTROL, tmp);
5359 }
5360 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5361 tmp = RREG32(DC_HPD2_INT_CONTROL);
5362 tmp |= DC_HPDx_INT_ACK;
5363 WREG32(DC_HPD2_INT_CONTROL, tmp);
5364 }
5365 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5366 tmp = RREG32(DC_HPD3_INT_CONTROL);
5367 tmp |= DC_HPDx_INT_ACK;
5368 WREG32(DC_HPD3_INT_CONTROL, tmp);
5369 }
5370 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5371 tmp = RREG32(DC_HPD4_INT_CONTROL);
5372 tmp |= DC_HPDx_INT_ACK;
5373 WREG32(DC_HPD4_INT_CONTROL, tmp);
5374 }
5375 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5376 tmp = RREG32(DC_HPD5_INT_CONTROL);
5377 tmp |= DC_HPDx_INT_ACK;
5378 WREG32(DC_HPD5_INT_CONTROL, tmp);
5379 }
5380 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5381 tmp = RREG32(DC_HPD5_INT_CONTROL);
5382 tmp |= DC_HPDx_INT_ACK;
5383 WREG32(DC_HPD6_INT_CONTROL, tmp);
5384 }
5385}
5386
5387/**
5388 * cik_irq_disable - disable interrupts
5389 *
5390 * @rdev: radeon_device pointer
5391 *
5392 * Disable interrupts on the hw (CIK).
5393 */
5394static void cik_irq_disable(struct radeon_device *rdev)
5395{
5396 cik_disable_interrupts(rdev);
5397 /* Wait and acknowledge irq */
5398 mdelay(1);
5399 cik_irq_ack(rdev);
5400 cik_disable_interrupt_state(rdev);
5401}
5402
5403/**
5404 * cik_irq_disable - disable interrupts for suspend
5405 *
5406 * @rdev: radeon_device pointer
5407 *
5408 * Disable interrupts and stop the RLC (CIK).
5409 * Used for suspend.
5410 */
5411static void cik_irq_suspend(struct radeon_device *rdev)
5412{
5413 cik_irq_disable(rdev);
5414 cik_rlc_stop(rdev);
5415}
5416
5417/**
5418 * cik_irq_fini - tear down interrupt support
5419 *
5420 * @rdev: radeon_device pointer
5421 *
5422 * Disable interrupts on the hw and free the IH ring
5423 * buffer (CIK).
5424 * Used for driver unload.
5425 */
5426static void cik_irq_fini(struct radeon_device *rdev)
5427{
5428 cik_irq_suspend(rdev);
5429 r600_ih_ring_fini(rdev);
5430}
5431
5432/**
5433 * cik_get_ih_wptr - get the IH ring buffer wptr
5434 *
5435 * @rdev: radeon_device pointer
5436 *
5437 * Get the IH ring buffer wptr from either the register
5438 * or the writeback memory buffer (CIK). Also check for
5439 * ring buffer overflow and deal with it.
5440 * Used by cik_irq_process().
5441 * Returns the value of the wptr.
5442 */
5443static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5444{
5445 u32 wptr, tmp;
5446
5447 if (rdev->wb.enabled)
5448 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5449 else
5450 wptr = RREG32(IH_RB_WPTR);
5451
5452 if (wptr & RB_OVERFLOW) {
5453 /* When a ring buffer overflow happen start parsing interrupt
5454 * from the last not overwritten vector (wptr + 16). Hopefully
5455 * this should allow us to catchup.
5456 */
5457 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5458 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5459 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5460 tmp = RREG32(IH_RB_CNTL);
5461 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5462 WREG32(IH_RB_CNTL, tmp);
5463 }
5464 return (wptr & rdev->ih.ptr_mask);
5465}
5466
5467/* CIK IV Ring
5468 * Each IV ring entry is 128 bits:
5469 * [7:0] - interrupt source id
5470 * [31:8] - reserved
5471 * [59:32] - interrupt source data
5472 * [63:60] - reserved
21a93e13
AD
5473 * [71:64] - RINGID
5474 * CP:
5475 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
a59781bb
AD
5476 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5477 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5478 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5479 * PIPE_ID - ME0 0=3D
5480 * - ME1&2 compute dispatcher (4 pipes each)
21a93e13
AD
5481 * SDMA:
5482 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5483 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5484 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
a59781bb
AD
5485 * [79:72] - VMID
5486 * [95:80] - PASID
5487 * [127:96] - reserved
5488 */
5489/**
5490 * cik_irq_process - interrupt handler
5491 *
5492 * @rdev: radeon_device pointer
5493 *
5494 * Interrupt hander (CIK). Walk the IH ring,
5495 * ack interrupts and schedule work to handle
5496 * interrupt events.
5497 * Returns irq process return code.
5498 */
5499int cik_irq_process(struct radeon_device *rdev)
5500{
2b0781a6
AD
5501 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5502 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
a59781bb
AD
5503 u32 wptr;
5504 u32 rptr;
5505 u32 src_id, src_data, ring_id;
5506 u8 me_id, pipe_id, queue_id;
5507 u32 ring_index;
5508 bool queue_hotplug = false;
5509 bool queue_reset = false;
5510
5511 if (!rdev->ih.enabled || rdev->shutdown)
5512 return IRQ_NONE;
5513
5514 wptr = cik_get_ih_wptr(rdev);
5515
5516restart_ih:
5517 /* is somebody else already processing irqs? */
5518 if (atomic_xchg(&rdev->ih.lock, 1))
5519 return IRQ_NONE;
5520
5521 rptr = rdev->ih.rptr;
5522 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5523
5524 /* Order reading of wptr vs. reading of IH ring data */
5525 rmb();
5526
5527 /* display interrupts */
5528 cik_irq_ack(rdev);
5529
5530 while (rptr != wptr) {
5531 /* wptr/rptr are in bytes! */
5532 ring_index = rptr / 4;
5533 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5534 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5535 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
a59781bb
AD
5536
5537 switch (src_id) {
5538 case 1: /* D1 vblank/vline */
5539 switch (src_data) {
5540 case 0: /* D1 vblank */
5541 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5542 if (rdev->irq.crtc_vblank_int[0]) {
5543 drm_handle_vblank(rdev->ddev, 0);
5544 rdev->pm.vblank_sync = true;
5545 wake_up(&rdev->irq.vblank_queue);
5546 }
5547 if (atomic_read(&rdev->irq.pflip[0]))
5548 radeon_crtc_handle_flip(rdev, 0);
5549 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5550 DRM_DEBUG("IH: D1 vblank\n");
5551 }
5552 break;
5553 case 1: /* D1 vline */
5554 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5555 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5556 DRM_DEBUG("IH: D1 vline\n");
5557 }
5558 break;
5559 default:
5560 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5561 break;
5562 }
5563 break;
5564 case 2: /* D2 vblank/vline */
5565 switch (src_data) {
5566 case 0: /* D2 vblank */
5567 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5568 if (rdev->irq.crtc_vblank_int[1]) {
5569 drm_handle_vblank(rdev->ddev, 1);
5570 rdev->pm.vblank_sync = true;
5571 wake_up(&rdev->irq.vblank_queue);
5572 }
5573 if (atomic_read(&rdev->irq.pflip[1]))
5574 radeon_crtc_handle_flip(rdev, 1);
5575 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5576 DRM_DEBUG("IH: D2 vblank\n");
5577 }
5578 break;
5579 case 1: /* D2 vline */
5580 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5581 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5582 DRM_DEBUG("IH: D2 vline\n");
5583 }
5584 break;
5585 default:
5586 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5587 break;
5588 }
5589 break;
5590 case 3: /* D3 vblank/vline */
5591 switch (src_data) {
5592 case 0: /* D3 vblank */
5593 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5594 if (rdev->irq.crtc_vblank_int[2]) {
5595 drm_handle_vblank(rdev->ddev, 2);
5596 rdev->pm.vblank_sync = true;
5597 wake_up(&rdev->irq.vblank_queue);
5598 }
5599 if (atomic_read(&rdev->irq.pflip[2]))
5600 radeon_crtc_handle_flip(rdev, 2);
5601 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5602 DRM_DEBUG("IH: D3 vblank\n");
5603 }
5604 break;
5605 case 1: /* D3 vline */
5606 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5607 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5608 DRM_DEBUG("IH: D3 vline\n");
5609 }
5610 break;
5611 default:
5612 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5613 break;
5614 }
5615 break;
5616 case 4: /* D4 vblank/vline */
5617 switch (src_data) {
5618 case 0: /* D4 vblank */
5619 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5620 if (rdev->irq.crtc_vblank_int[3]) {
5621 drm_handle_vblank(rdev->ddev, 3);
5622 rdev->pm.vblank_sync = true;
5623 wake_up(&rdev->irq.vblank_queue);
5624 }
5625 if (atomic_read(&rdev->irq.pflip[3]))
5626 radeon_crtc_handle_flip(rdev, 3);
5627 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5628 DRM_DEBUG("IH: D4 vblank\n");
5629 }
5630 break;
5631 case 1: /* D4 vline */
5632 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5633 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5634 DRM_DEBUG("IH: D4 vline\n");
5635 }
5636 break;
5637 default:
5638 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5639 break;
5640 }
5641 break;
5642 case 5: /* D5 vblank/vline */
5643 switch (src_data) {
5644 case 0: /* D5 vblank */
5645 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5646 if (rdev->irq.crtc_vblank_int[4]) {
5647 drm_handle_vblank(rdev->ddev, 4);
5648 rdev->pm.vblank_sync = true;
5649 wake_up(&rdev->irq.vblank_queue);
5650 }
5651 if (atomic_read(&rdev->irq.pflip[4]))
5652 radeon_crtc_handle_flip(rdev, 4);
5653 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5654 DRM_DEBUG("IH: D5 vblank\n");
5655 }
5656 break;
5657 case 1: /* D5 vline */
5658 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5659 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5660 DRM_DEBUG("IH: D5 vline\n");
5661 }
5662 break;
5663 default:
5664 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5665 break;
5666 }
5667 break;
5668 case 6: /* D6 vblank/vline */
5669 switch (src_data) {
5670 case 0: /* D6 vblank */
5671 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5672 if (rdev->irq.crtc_vblank_int[5]) {
5673 drm_handle_vblank(rdev->ddev, 5);
5674 rdev->pm.vblank_sync = true;
5675 wake_up(&rdev->irq.vblank_queue);
5676 }
5677 if (atomic_read(&rdev->irq.pflip[5]))
5678 radeon_crtc_handle_flip(rdev, 5);
5679 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5680 DRM_DEBUG("IH: D6 vblank\n");
5681 }
5682 break;
5683 case 1: /* D6 vline */
5684 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5685 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5686 DRM_DEBUG("IH: D6 vline\n");
5687 }
5688 break;
5689 default:
5690 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5691 break;
5692 }
5693 break;
5694 case 42: /* HPD hotplug */
5695 switch (src_data) {
5696 case 0:
5697 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5698 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5699 queue_hotplug = true;
5700 DRM_DEBUG("IH: HPD1\n");
5701 }
5702 break;
5703 case 1:
5704 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5705 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5706 queue_hotplug = true;
5707 DRM_DEBUG("IH: HPD2\n");
5708 }
5709 break;
5710 case 2:
5711 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5712 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5713 queue_hotplug = true;
5714 DRM_DEBUG("IH: HPD3\n");
5715 }
5716 break;
5717 case 3:
5718 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5719 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5720 queue_hotplug = true;
5721 DRM_DEBUG("IH: HPD4\n");
5722 }
5723 break;
5724 case 4:
5725 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5726 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5727 queue_hotplug = true;
5728 DRM_DEBUG("IH: HPD5\n");
5729 }
5730 break;
5731 case 5:
5732 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5733 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5734 queue_hotplug = true;
5735 DRM_DEBUG("IH: HPD6\n");
5736 }
5737 break;
5738 default:
5739 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5740 break;
5741 }
5742 break;
9d97c99b
AD
5743 case 146:
5744 case 147:
5745 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5746 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5747 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5748 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5749 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5750 /* reset addr and status */
5751 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5752 break;
a59781bb
AD
5753 case 176: /* GFX RB CP_INT */
5754 case 177: /* GFX IB CP_INT */
5755 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5756 break;
5757 case 181: /* CP EOP event */
5758 DRM_DEBUG("IH: CP EOP\n");
21a93e13
AD
5759 /* XXX check the bitfield order! */
5760 me_id = (ring_id & 0x60) >> 5;
5761 pipe_id = (ring_id & 0x18) >> 3;
5762 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
5763 switch (me_id) {
5764 case 0:
5765 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5766 break;
5767 case 1:
a59781bb 5768 case 2:
2b0781a6
AD
5769 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5770 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5771 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5772 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
a59781bb
AD
5773 break;
5774 }
5775 break;
5776 case 184: /* CP Privileged reg access */
5777 DRM_ERROR("Illegal register access in command stream\n");
5778 /* XXX check the bitfield order! */
5779 me_id = (ring_id & 0x60) >> 5;
5780 pipe_id = (ring_id & 0x18) >> 3;
5781 queue_id = (ring_id & 0x7) >> 0;
5782 switch (me_id) {
5783 case 0:
5784 /* This results in a full GPU reset, but all we need to do is soft
5785 * reset the CP for gfx
5786 */
5787 queue_reset = true;
5788 break;
5789 case 1:
5790 /* XXX compute */
2b0781a6 5791 queue_reset = true;
a59781bb
AD
5792 break;
5793 case 2:
5794 /* XXX compute */
2b0781a6 5795 queue_reset = true;
a59781bb
AD
5796 break;
5797 }
5798 break;
5799 case 185: /* CP Privileged inst */
5800 DRM_ERROR("Illegal instruction in command stream\n");
21a93e13
AD
5801 /* XXX check the bitfield order! */
5802 me_id = (ring_id & 0x60) >> 5;
5803 pipe_id = (ring_id & 0x18) >> 3;
5804 queue_id = (ring_id & 0x7) >> 0;
a59781bb
AD
5805 switch (me_id) {
5806 case 0:
5807 /* This results in a full GPU reset, but all we need to do is soft
5808 * reset the CP for gfx
5809 */
5810 queue_reset = true;
5811 break;
5812 case 1:
5813 /* XXX compute */
2b0781a6 5814 queue_reset = true;
a59781bb
AD
5815 break;
5816 case 2:
5817 /* XXX compute */
2b0781a6 5818 queue_reset = true;
a59781bb
AD
5819 break;
5820 }
5821 break;
21a93e13
AD
5822 case 224: /* SDMA trap event */
5823 /* XXX check the bitfield order! */
5824 me_id = (ring_id & 0x3) >> 0;
5825 queue_id = (ring_id & 0xc) >> 2;
5826 DRM_DEBUG("IH: SDMA trap\n");
5827 switch (me_id) {
5828 case 0:
5829 switch (queue_id) {
5830 case 0:
5831 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5832 break;
5833 case 1:
5834 /* XXX compute */
5835 break;
5836 case 2:
5837 /* XXX compute */
5838 break;
5839 }
5840 break;
5841 case 1:
5842 switch (queue_id) {
5843 case 0:
5844 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5845 break;
5846 case 1:
5847 /* XXX compute */
5848 break;
5849 case 2:
5850 /* XXX compute */
5851 break;
5852 }
5853 break;
5854 }
5855 break;
5856 case 241: /* SDMA Privileged inst */
5857 case 247: /* SDMA Privileged inst */
5858 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5859 /* XXX check the bitfield order! */
5860 me_id = (ring_id & 0x3) >> 0;
5861 queue_id = (ring_id & 0xc) >> 2;
5862 switch (me_id) {
5863 case 0:
5864 switch (queue_id) {
5865 case 0:
5866 queue_reset = true;
5867 break;
5868 case 1:
5869 /* XXX compute */
5870 queue_reset = true;
5871 break;
5872 case 2:
5873 /* XXX compute */
5874 queue_reset = true;
5875 break;
5876 }
5877 break;
5878 case 1:
5879 switch (queue_id) {
5880 case 0:
5881 queue_reset = true;
5882 break;
5883 case 1:
5884 /* XXX compute */
5885 queue_reset = true;
5886 break;
5887 case 2:
5888 /* XXX compute */
5889 queue_reset = true;
5890 break;
5891 }
5892 break;
5893 }
5894 break;
a59781bb
AD
5895 case 233: /* GUI IDLE */
5896 DRM_DEBUG("IH: GUI idle\n");
5897 break;
5898 default:
5899 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5900 break;
5901 }
5902
5903 /* wptr/rptr are in bytes! */
5904 rptr += 16;
5905 rptr &= rdev->ih.ptr_mask;
5906 }
5907 if (queue_hotplug)
5908 schedule_work(&rdev->hotplug_work);
5909 if (queue_reset)
5910 schedule_work(&rdev->reset_work);
5911 rdev->ih.rptr = rptr;
5912 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5913 atomic_set(&rdev->ih.lock, 0);
5914
5915 /* make sure wptr hasn't changed while processing */
5916 wptr = cik_get_ih_wptr(rdev);
5917 if (wptr != rptr)
5918 goto restart_ih;
5919
5920 return IRQ_HANDLED;
5921}
7bf94a2c
AD
5922
5923/*
5924 * startup/shutdown callbacks
5925 */
5926/**
5927 * cik_startup - program the asic to a functional state
5928 *
5929 * @rdev: radeon_device pointer
5930 *
5931 * Programs the asic to a functional state (CIK).
5932 * Called by cik_init() and cik_resume().
5933 * Returns 0 for success, error for failure.
5934 */
5935static int cik_startup(struct radeon_device *rdev)
5936{
5937 struct radeon_ring *ring;
5938 int r;
5939
5940 if (rdev->flags & RADEON_IS_IGP) {
5941 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5942 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5943 r = cik_init_microcode(rdev);
5944 if (r) {
5945 DRM_ERROR("Failed to load firmware!\n");
5946 return r;
5947 }
5948 }
5949 } else {
5950 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5951 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5952 !rdev->mc_fw) {
5953 r = cik_init_microcode(rdev);
5954 if (r) {
5955 DRM_ERROR("Failed to load firmware!\n");
5956 return r;
5957 }
5958 }
5959
5960 r = ci_mc_load_microcode(rdev);
5961 if (r) {
5962 DRM_ERROR("Failed to load MC firmware!\n");
5963 return r;
5964 }
5965 }
5966
5967 r = r600_vram_scratch_init(rdev);
5968 if (r)
5969 return r;
5970
5971 cik_mc_program(rdev);
5972 r = cik_pcie_gart_enable(rdev);
5973 if (r)
5974 return r;
5975 cik_gpu_init(rdev);
5976
5977 /* allocate rlc buffers */
5978 r = si_rlc_init(rdev);
5979 if (r) {
5980 DRM_ERROR("Failed to init rlc BOs!\n");
5981 return r;
5982 }
5983
5984 /* allocate wb buffer */
5985 r = radeon_wb_init(rdev);
5986 if (r)
5987 return r;
5988
963e81f9
AD
5989 /* allocate mec buffers */
5990 r = cik_mec_init(rdev);
5991 if (r) {
5992 DRM_ERROR("Failed to init MEC BOs!\n");
5993 return r;
5994 }
5995
7bf94a2c
AD
5996 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5997 if (r) {
5998 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5999 return r;
6000 }
6001
963e81f9
AD
6002 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6003 if (r) {
6004 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6005 return r;
6006 }
6007
6008 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6009 if (r) {
6010 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6011 return r;
6012 }
6013
7bf94a2c
AD
6014 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6015 if (r) {
6016 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6017 return r;
6018 }
6019
6020 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6021 if (r) {
6022 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6023 return r;
6024 }
6025
87167bb1
CK
6026 r = cik_uvd_resume(rdev);
6027 if (!r) {
6028 r = radeon_fence_driver_start_ring(rdev,
6029 R600_RING_TYPE_UVD_INDEX);
6030 if (r)
6031 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6032 }
6033 if (r)
6034 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6035
7bf94a2c
AD
6036 /* Enable IRQ */
6037 if (!rdev->irq.installed) {
6038 r = radeon_irq_kms_init(rdev);
6039 if (r)
6040 return r;
6041 }
6042
6043 r = cik_irq_init(rdev);
6044 if (r) {
6045 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6046 radeon_irq_kms_fini(rdev);
6047 return r;
6048 }
6049 cik_irq_set(rdev);
6050
6051 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6052 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6053 CP_RB0_RPTR, CP_RB0_WPTR,
6054 0, 0xfffff, RADEON_CP_PACKET2);
6055 if (r)
6056 return r;
6057
963e81f9 6058 /* set up the compute queues */
2615b53a 6059 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
6060 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6061 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6062 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 6063 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
6064 if (r)
6065 return r;
6066 ring->me = 1; /* first MEC */
6067 ring->pipe = 0; /* first pipe */
6068 ring->queue = 0; /* first queue */
6069 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6070
2615b53a 6071 /* type-2 packets are deprecated on MEC, use type-3 instead */
963e81f9
AD
6072 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6073 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6074 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
2615b53a 6075 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
963e81f9
AD
6076 if (r)
6077 return r;
6078 /* dGPU only have 1 MEC */
6079 ring->me = 1; /* first MEC */
6080 ring->pipe = 0; /* first pipe */
6081 ring->queue = 1; /* second queue */
6082 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6083
7bf94a2c
AD
6084 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6085 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6086 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6087 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6088 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6089 if (r)
6090 return r;
6091
6092 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6093 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6094 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6095 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6096 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6097 if (r)
6098 return r;
6099
6100 r = cik_cp_resume(rdev);
6101 if (r)
6102 return r;
6103
6104 r = cik_sdma_resume(rdev);
6105 if (r)
6106 return r;
6107
87167bb1
CK
6108 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6109 if (ring->ring_size) {
6110 r = radeon_ring_init(rdev, ring, ring->ring_size,
6111 R600_WB_UVD_RPTR_OFFSET,
6112 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6113 0, 0xfffff, RADEON_CP_PACKET2);
6114 if (!r)
6115 r = r600_uvd_init(rdev);
6116 if (r)
6117 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6118 }
6119
7bf94a2c
AD
6120 r = radeon_ib_pool_init(rdev);
6121 if (r) {
6122 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6123 return r;
6124 }
6125
6126 r = radeon_vm_manager_init(rdev);
6127 if (r) {
6128 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6129 return r;
6130 }
6131
6132 return 0;
6133}
6134
6135/**
6136 * cik_resume - resume the asic to a functional state
6137 *
6138 * @rdev: radeon_device pointer
6139 *
6140 * Programs the asic to a functional state (CIK).
6141 * Called at resume.
6142 * Returns 0 for success, error for failure.
6143 */
6144int cik_resume(struct radeon_device *rdev)
6145{
6146 int r;
6147
6148 /* post card */
6149 atom_asic_init(rdev->mode_info.atom_context);
6150
0aafd313
AD
6151 /* init golden registers */
6152 cik_init_golden_registers(rdev);
6153
7bf94a2c
AD
6154 rdev->accel_working = true;
6155 r = cik_startup(rdev);
6156 if (r) {
6157 DRM_ERROR("cik startup failed on resume\n");
6158 rdev->accel_working = false;
6159 return r;
6160 }
6161
6162 return r;
6163
6164}
6165
6166/**
6167 * cik_suspend - suspend the asic
6168 *
6169 * @rdev: radeon_device pointer
6170 *
6171 * Bring the chip into a state suitable for suspend (CIK).
6172 * Called at suspend.
6173 * Returns 0 for success.
6174 */
6175int cik_suspend(struct radeon_device *rdev)
6176{
6177 radeon_vm_manager_fini(rdev);
6178 cik_cp_enable(rdev, false);
6179 cik_sdma_enable(rdev, false);
87167bb1
CK
6180 r600_uvd_rbc_stop(rdev);
6181 radeon_uvd_suspend(rdev);
7bf94a2c
AD
6182 cik_irq_suspend(rdev);
6183 radeon_wb_disable(rdev);
6184 cik_pcie_gart_disable(rdev);
6185 return 0;
6186}
6187
6188/* Plan is to move initialization in that function and use
6189 * helper function so that radeon_device_init pretty much
6190 * do nothing more than calling asic specific function. This
6191 * should also allow to remove a bunch of callback function
6192 * like vram_info.
6193 */
6194/**
6195 * cik_init - asic specific driver and hw init
6196 *
6197 * @rdev: radeon_device pointer
6198 *
6199 * Setup asic specific driver variables and program the hw
6200 * to a functional state (CIK).
6201 * Called at driver startup.
6202 * Returns 0 for success, errors for failure.
6203 */
6204int cik_init(struct radeon_device *rdev)
6205{
6206 struct radeon_ring *ring;
6207 int r;
6208
6209 /* Read BIOS */
6210 if (!radeon_get_bios(rdev)) {
6211 if (ASIC_IS_AVIVO(rdev))
6212 return -EINVAL;
6213 }
6214 /* Must be an ATOMBIOS */
6215 if (!rdev->is_atom_bios) {
6216 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6217 return -EINVAL;
6218 }
6219 r = radeon_atombios_init(rdev);
6220 if (r)
6221 return r;
6222
6223 /* Post card if necessary */
6224 if (!radeon_card_posted(rdev)) {
6225 if (!rdev->bios) {
6226 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6227 return -EINVAL;
6228 }
6229 DRM_INFO("GPU not posted. posting now...\n");
6230 atom_asic_init(rdev->mode_info.atom_context);
6231 }
0aafd313
AD
6232 /* init golden registers */
6233 cik_init_golden_registers(rdev);
7bf94a2c
AD
6234 /* Initialize scratch registers */
6235 cik_scratch_init(rdev);
6236 /* Initialize surface registers */
6237 radeon_surface_init(rdev);
6238 /* Initialize clocks */
6239 radeon_get_clock_info(rdev->ddev);
6240
6241 /* Fence driver */
6242 r = radeon_fence_driver_init(rdev);
6243 if (r)
6244 return r;
6245
6246 /* initialize memory controller */
6247 r = cik_mc_init(rdev);
6248 if (r)
6249 return r;
6250 /* Memory manager */
6251 r = radeon_bo_init(rdev);
6252 if (r)
6253 return r;
6254
6255 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6256 ring->ring_obj = NULL;
6257 r600_ring_init(rdev, ring, 1024 * 1024);
6258
963e81f9
AD
6259 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6260 ring->ring_obj = NULL;
6261 r600_ring_init(rdev, ring, 1024 * 1024);
6262 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6263 if (r)
6264 return r;
6265
6266 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6267 ring->ring_obj = NULL;
6268 r600_ring_init(rdev, ring, 1024 * 1024);
6269 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6270 if (r)
6271 return r;
6272
7bf94a2c
AD
6273 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6274 ring->ring_obj = NULL;
6275 r600_ring_init(rdev, ring, 256 * 1024);
6276
6277 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6278 ring->ring_obj = NULL;
6279 r600_ring_init(rdev, ring, 256 * 1024);
6280
87167bb1
CK
6281 r = radeon_uvd_init(rdev);
6282 if (!r) {
6283 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6284 ring->ring_obj = NULL;
6285 r600_ring_init(rdev, ring, 4096);
6286 }
6287
7bf94a2c
AD
6288 rdev->ih.ring_obj = NULL;
6289 r600_ih_ring_init(rdev, 64 * 1024);
6290
6291 r = r600_pcie_gart_init(rdev);
6292 if (r)
6293 return r;
6294
6295 rdev->accel_working = true;
6296 r = cik_startup(rdev);
6297 if (r) {
6298 dev_err(rdev->dev, "disabling GPU acceleration\n");
6299 cik_cp_fini(rdev);
6300 cik_sdma_fini(rdev);
6301 cik_irq_fini(rdev);
6302 si_rlc_fini(rdev);
963e81f9 6303 cik_mec_fini(rdev);
7bf94a2c
AD
6304 radeon_wb_fini(rdev);
6305 radeon_ib_pool_fini(rdev);
6306 radeon_vm_manager_fini(rdev);
6307 radeon_irq_kms_fini(rdev);
6308 cik_pcie_gart_fini(rdev);
6309 rdev->accel_working = false;
6310 }
6311
6312 /* Don't start up if the MC ucode is missing.
6313 * The default clocks and voltages before the MC ucode
6314 * is loaded are not suffient for advanced operations.
6315 */
6316 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6317 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6318 return -EINVAL;
6319 }
6320
6321 return 0;
6322}
6323
6324/**
6325 * cik_fini - asic specific driver and hw fini
6326 *
6327 * @rdev: radeon_device pointer
6328 *
6329 * Tear down the asic specific driver variables and program the hw
6330 * to an idle state (CIK).
6331 * Called at driver unload.
6332 */
6333void cik_fini(struct radeon_device *rdev)
6334{
6335 cik_cp_fini(rdev);
6336 cik_sdma_fini(rdev);
6337 cik_irq_fini(rdev);
6338 si_rlc_fini(rdev);
963e81f9 6339 cik_mec_fini(rdev);
7bf94a2c
AD
6340 radeon_wb_fini(rdev);
6341 radeon_vm_manager_fini(rdev);
6342 radeon_ib_pool_fini(rdev);
6343 radeon_irq_kms_fini(rdev);
87167bb1 6344 radeon_uvd_fini(rdev);
7bf94a2c
AD
6345 cik_pcie_gart_fini(rdev);
6346 r600_vram_scratch_fini(rdev);
6347 radeon_gem_fini(rdev);
6348 radeon_fence_driver_fini(rdev);
6349 radeon_bo_fini(rdev);
6350 radeon_atombios_fini(rdev);
6351 kfree(rdev->bios);
6352 rdev->bios = NULL;
6353}
cd84a27d
AD
6354
6355/* display watermark setup */
6356/**
6357 * dce8_line_buffer_adjust - Set up the line buffer
6358 *
6359 * @rdev: radeon_device pointer
6360 * @radeon_crtc: the selected display controller
6361 * @mode: the current display mode on the selected display
6362 * controller
6363 *
6364 * Setup up the line buffer allocation for
6365 * the selected display controller (CIK).
6366 * Returns the line buffer size in pixels.
6367 */
6368static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6369 struct radeon_crtc *radeon_crtc,
6370 struct drm_display_mode *mode)
6371{
6372 u32 tmp;
6373
6374 /*
6375 * Line Buffer Setup
6376 * There are 6 line buffers, one for each display controllers.
6377 * There are 3 partitions per LB. Select the number of partitions
6378 * to enable based on the display width. For display widths larger
6379 * than 4096, you need use to use 2 display controllers and combine
6380 * them using the stereo blender.
6381 */
6382 if (radeon_crtc->base.enabled && mode) {
6383 if (mode->crtc_hdisplay < 1920)
6384 tmp = 1;
6385 else if (mode->crtc_hdisplay < 2560)
6386 tmp = 2;
6387 else if (mode->crtc_hdisplay < 4096)
6388 tmp = 0;
6389 else {
6390 DRM_DEBUG_KMS("Mode too big for LB!\n");
6391 tmp = 0;
6392 }
6393 } else
6394 tmp = 1;
6395
6396 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6397 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6398
6399 if (radeon_crtc->base.enabled && mode) {
6400 switch (tmp) {
6401 case 0:
6402 default:
6403 return 4096 * 2;
6404 case 1:
6405 return 1920 * 2;
6406 case 2:
6407 return 2560 * 2;
6408 }
6409 }
6410
6411 /* controller not enabled, so no lb used */
6412 return 0;
6413}
6414
6415/**
6416 * cik_get_number_of_dram_channels - get the number of dram channels
6417 *
6418 * @rdev: radeon_device pointer
6419 *
6420 * Look up the number of video ram channels (CIK).
6421 * Used for display watermark bandwidth calculations
6422 * Returns the number of dram channels
6423 */
6424static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6425{
6426 u32 tmp = RREG32(MC_SHARED_CHMAP);
6427
6428 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6429 case 0:
6430 default:
6431 return 1;
6432 case 1:
6433 return 2;
6434 case 2:
6435 return 4;
6436 case 3:
6437 return 8;
6438 case 4:
6439 return 3;
6440 case 5:
6441 return 6;
6442 case 6:
6443 return 10;
6444 case 7:
6445 return 12;
6446 case 8:
6447 return 16;
6448 }
6449}
6450
6451struct dce8_wm_params {
6452 u32 dram_channels; /* number of dram channels */
6453 u32 yclk; /* bandwidth per dram data pin in kHz */
6454 u32 sclk; /* engine clock in kHz */
6455 u32 disp_clk; /* display clock in kHz */
6456 u32 src_width; /* viewport width */
6457 u32 active_time; /* active display time in ns */
6458 u32 blank_time; /* blank time in ns */
6459 bool interlaced; /* mode is interlaced */
6460 fixed20_12 vsc; /* vertical scale ratio */
6461 u32 num_heads; /* number of active crtcs */
6462 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6463 u32 lb_size; /* line buffer allocated to pipe */
6464 u32 vtaps; /* vertical scaler taps */
6465};
6466
6467/**
6468 * dce8_dram_bandwidth - get the dram bandwidth
6469 *
6470 * @wm: watermark calculation data
6471 *
6472 * Calculate the raw dram bandwidth (CIK).
6473 * Used for display watermark bandwidth calculations
6474 * Returns the dram bandwidth in MBytes/s
6475 */
6476static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6477{
6478 /* Calculate raw DRAM Bandwidth */
6479 fixed20_12 dram_efficiency; /* 0.7 */
6480 fixed20_12 yclk, dram_channels, bandwidth;
6481 fixed20_12 a;
6482
6483 a.full = dfixed_const(1000);
6484 yclk.full = dfixed_const(wm->yclk);
6485 yclk.full = dfixed_div(yclk, a);
6486 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6487 a.full = dfixed_const(10);
6488 dram_efficiency.full = dfixed_const(7);
6489 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6490 bandwidth.full = dfixed_mul(dram_channels, yclk);
6491 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6492
6493 return dfixed_trunc(bandwidth);
6494}
6495
6496/**
6497 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6498 *
6499 * @wm: watermark calculation data
6500 *
6501 * Calculate the dram bandwidth used for display (CIK).
6502 * Used for display watermark bandwidth calculations
6503 * Returns the dram bandwidth for display in MBytes/s
6504 */
6505static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6506{
6507 /* Calculate DRAM Bandwidth and the part allocated to display. */
6508 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6509 fixed20_12 yclk, dram_channels, bandwidth;
6510 fixed20_12 a;
6511
6512 a.full = dfixed_const(1000);
6513 yclk.full = dfixed_const(wm->yclk);
6514 yclk.full = dfixed_div(yclk, a);
6515 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516 a.full = dfixed_const(10);
6517 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6518 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6519 bandwidth.full = dfixed_mul(dram_channels, yclk);
6520 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6521
6522 return dfixed_trunc(bandwidth);
6523}
6524
6525/**
6526 * dce8_data_return_bandwidth - get the data return bandwidth
6527 *
6528 * @wm: watermark calculation data
6529 *
6530 * Calculate the data return bandwidth used for display (CIK).
6531 * Used for display watermark bandwidth calculations
6532 * Returns the data return bandwidth in MBytes/s
6533 */
6534static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6535{
6536 /* Calculate the display Data return Bandwidth */
6537 fixed20_12 return_efficiency; /* 0.8 */
6538 fixed20_12 sclk, bandwidth;
6539 fixed20_12 a;
6540
6541 a.full = dfixed_const(1000);
6542 sclk.full = dfixed_const(wm->sclk);
6543 sclk.full = dfixed_div(sclk, a);
6544 a.full = dfixed_const(10);
6545 return_efficiency.full = dfixed_const(8);
6546 return_efficiency.full = dfixed_div(return_efficiency, a);
6547 a.full = dfixed_const(32);
6548 bandwidth.full = dfixed_mul(a, sclk);
6549 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6550
6551 return dfixed_trunc(bandwidth);
6552}
6553
6554/**
6555 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6556 *
6557 * @wm: watermark calculation data
6558 *
6559 * Calculate the dmif bandwidth used for display (CIK).
6560 * Used for display watermark bandwidth calculations
6561 * Returns the dmif bandwidth in MBytes/s
6562 */
6563static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6564{
6565 /* Calculate the DMIF Request Bandwidth */
6566 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6567 fixed20_12 disp_clk, bandwidth;
6568 fixed20_12 a, b;
6569
6570 a.full = dfixed_const(1000);
6571 disp_clk.full = dfixed_const(wm->disp_clk);
6572 disp_clk.full = dfixed_div(disp_clk, a);
6573 a.full = dfixed_const(32);
6574 b.full = dfixed_mul(a, disp_clk);
6575
6576 a.full = dfixed_const(10);
6577 disp_clk_request_efficiency.full = dfixed_const(8);
6578 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6579
6580 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6581
6582 return dfixed_trunc(bandwidth);
6583}
6584
6585/**
6586 * dce8_available_bandwidth - get the min available bandwidth
6587 *
6588 * @wm: watermark calculation data
6589 *
6590 * Calculate the min available bandwidth used for display (CIK).
6591 * Used for display watermark bandwidth calculations
6592 * Returns the min available bandwidth in MBytes/s
6593 */
6594static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6595{
6596 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6597 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6598 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6599 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6600
6601 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6602}
6603
6604/**
6605 * dce8_average_bandwidth - get the average available bandwidth
6606 *
6607 * @wm: watermark calculation data
6608 *
6609 * Calculate the average available bandwidth used for display (CIK).
6610 * Used for display watermark bandwidth calculations
6611 * Returns the average available bandwidth in MBytes/s
6612 */
6613static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6614{
6615 /* Calculate the display mode Average Bandwidth
6616 * DisplayMode should contain the source and destination dimensions,
6617 * timing, etc.
6618 */
6619 fixed20_12 bpp;
6620 fixed20_12 line_time;
6621 fixed20_12 src_width;
6622 fixed20_12 bandwidth;
6623 fixed20_12 a;
6624
6625 a.full = dfixed_const(1000);
6626 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6627 line_time.full = dfixed_div(line_time, a);
6628 bpp.full = dfixed_const(wm->bytes_per_pixel);
6629 src_width.full = dfixed_const(wm->src_width);
6630 bandwidth.full = dfixed_mul(src_width, bpp);
6631 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6632 bandwidth.full = dfixed_div(bandwidth, line_time);
6633
6634 return dfixed_trunc(bandwidth);
6635}
6636
6637/**
6638 * dce8_latency_watermark - get the latency watermark
6639 *
6640 * @wm: watermark calculation data
6641 *
6642 * Calculate the latency watermark (CIK).
6643 * Used for display watermark bandwidth calculations
6644 * Returns the latency watermark in ns
6645 */
6646static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6647{
6648 /* First calculate the latency in ns */
6649 u32 mc_latency = 2000; /* 2000 ns. */
6650 u32 available_bandwidth = dce8_available_bandwidth(wm);
6651 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6652 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6653 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6654 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6655 (wm->num_heads * cursor_line_pair_return_time);
6656 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6657 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6658 u32 tmp, dmif_size = 12288;
6659 fixed20_12 a, b, c;
6660
6661 if (wm->num_heads == 0)
6662 return 0;
6663
6664 a.full = dfixed_const(2);
6665 b.full = dfixed_const(1);
6666 if ((wm->vsc.full > a.full) ||
6667 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6668 (wm->vtaps >= 5) ||
6669 ((wm->vsc.full >= a.full) && wm->interlaced))
6670 max_src_lines_per_dst_line = 4;
6671 else
6672 max_src_lines_per_dst_line = 2;
6673
6674 a.full = dfixed_const(available_bandwidth);
6675 b.full = dfixed_const(wm->num_heads);
6676 a.full = dfixed_div(a, b);
6677
6678 b.full = dfixed_const(mc_latency + 512);
6679 c.full = dfixed_const(wm->disp_clk);
6680 b.full = dfixed_div(b, c);
6681
6682 c.full = dfixed_const(dmif_size);
6683 b.full = dfixed_div(c, b);
6684
6685 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6686
6687 b.full = dfixed_const(1000);
6688 c.full = dfixed_const(wm->disp_clk);
6689 b.full = dfixed_div(c, b);
6690 c.full = dfixed_const(wm->bytes_per_pixel);
6691 b.full = dfixed_mul(b, c);
6692
6693 lb_fill_bw = min(tmp, dfixed_trunc(b));
6694
6695 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6696 b.full = dfixed_const(1000);
6697 c.full = dfixed_const(lb_fill_bw);
6698 b.full = dfixed_div(c, b);
6699 a.full = dfixed_div(a, b);
6700 line_fill_time = dfixed_trunc(a);
6701
6702 if (line_fill_time < wm->active_time)
6703 return latency;
6704 else
6705 return latency + (line_fill_time - wm->active_time);
6706
6707}
6708
6709/**
6710 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6711 * average and available dram bandwidth
6712 *
6713 * @wm: watermark calculation data
6714 *
6715 * Check if the display average bandwidth fits in the display
6716 * dram bandwidth (CIK).
6717 * Used for display watermark bandwidth calculations
6718 * Returns true if the display fits, false if not.
6719 */
6720static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6721{
6722 if (dce8_average_bandwidth(wm) <=
6723 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6724 return true;
6725 else
6726 return false;
6727}
6728
6729/**
6730 * dce8_average_bandwidth_vs_available_bandwidth - check
6731 * average and available bandwidth
6732 *
6733 * @wm: watermark calculation data
6734 *
6735 * Check if the display average bandwidth fits in the display
6736 * available bandwidth (CIK).
6737 * Used for display watermark bandwidth calculations
6738 * Returns true if the display fits, false if not.
6739 */
6740static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6741{
6742 if (dce8_average_bandwidth(wm) <=
6743 (dce8_available_bandwidth(wm) / wm->num_heads))
6744 return true;
6745 else
6746 return false;
6747}
6748
6749/**
6750 * dce8_check_latency_hiding - check latency hiding
6751 *
6752 * @wm: watermark calculation data
6753 *
6754 * Check latency hiding (CIK).
6755 * Used for display watermark bandwidth calculations
6756 * Returns true if the display fits, false if not.
6757 */
6758static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6759{
6760 u32 lb_partitions = wm->lb_size / wm->src_width;
6761 u32 line_time = wm->active_time + wm->blank_time;
6762 u32 latency_tolerant_lines;
6763 u32 latency_hiding;
6764 fixed20_12 a;
6765
6766 a.full = dfixed_const(1);
6767 if (wm->vsc.full > a.full)
6768 latency_tolerant_lines = 1;
6769 else {
6770 if (lb_partitions <= (wm->vtaps + 1))
6771 latency_tolerant_lines = 1;
6772 else
6773 latency_tolerant_lines = 2;
6774 }
6775
6776 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6777
6778 if (dce8_latency_watermark(wm) <= latency_hiding)
6779 return true;
6780 else
6781 return false;
6782}
6783
6784/**
6785 * dce8_program_watermarks - program display watermarks
6786 *
6787 * @rdev: radeon_device pointer
6788 * @radeon_crtc: the selected display controller
6789 * @lb_size: line buffer size
6790 * @num_heads: number of display controllers in use
6791 *
6792 * Calculate and program the display watermarks for the
6793 * selected display controller (CIK).
6794 */
6795static void dce8_program_watermarks(struct radeon_device *rdev,
6796 struct radeon_crtc *radeon_crtc,
6797 u32 lb_size, u32 num_heads)
6798{
6799 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6800 struct dce8_wm_params wm;
6801 u32 pixel_period;
6802 u32 line_time = 0;
6803 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6804 u32 tmp, wm_mask;
6805
6806 if (radeon_crtc->base.enabled && num_heads && mode) {
6807 pixel_period = 1000000 / (u32)mode->clock;
6808 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6809
6810 wm.yclk = rdev->pm.current_mclk * 10;
6811 wm.sclk = rdev->pm.current_sclk * 10;
6812 wm.disp_clk = mode->clock;
6813 wm.src_width = mode->crtc_hdisplay;
6814 wm.active_time = mode->crtc_hdisplay * pixel_period;
6815 wm.blank_time = line_time - wm.active_time;
6816 wm.interlaced = false;
6817 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6818 wm.interlaced = true;
6819 wm.vsc = radeon_crtc->vsc;
6820 wm.vtaps = 1;
6821 if (radeon_crtc->rmx_type != RMX_OFF)
6822 wm.vtaps = 2;
6823 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6824 wm.lb_size = lb_size;
6825 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6826 wm.num_heads = num_heads;
6827
6828 /* set for high clocks */
6829 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6830 /* set for low clocks */
6831 /* wm.yclk = low clk; wm.sclk = low clk */
6832 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6833
6834 /* possibly force display priority to high */
6835 /* should really do this at mode validation time... */
6836 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6837 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6838 !dce8_check_latency_hiding(&wm) ||
6839 (rdev->disp_priority == 2)) {
6840 DRM_DEBUG_KMS("force priority to high\n");
6841 }
6842 }
6843
6844 /* select wm A */
6845 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6846 tmp = wm_mask;
6847 tmp &= ~LATENCY_WATERMARK_MASK(3);
6848 tmp |= LATENCY_WATERMARK_MASK(1);
6849 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6850 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6851 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6852 LATENCY_HIGH_WATERMARK(line_time)));
6853 /* select wm B */
6854 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6855 tmp &= ~LATENCY_WATERMARK_MASK(3);
6856 tmp |= LATENCY_WATERMARK_MASK(2);
6857 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6858 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6859 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6860 LATENCY_HIGH_WATERMARK(line_time)));
6861 /* restore original selection */
6862 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6863}
6864
6865/**
6866 * dce8_bandwidth_update - program display watermarks
6867 *
6868 * @rdev: radeon_device pointer
6869 *
6870 * Calculate and program the display watermarks and line
6871 * buffer allocation (CIK).
6872 */
6873void dce8_bandwidth_update(struct radeon_device *rdev)
6874{
6875 struct drm_display_mode *mode = NULL;
6876 u32 num_heads = 0, lb_size;
6877 int i;
6878
6879 radeon_update_display_priority(rdev);
6880
6881 for (i = 0; i < rdev->num_crtc; i++) {
6882 if (rdev->mode_info.crtcs[i]->base.enabled)
6883 num_heads++;
6884 }
6885 for (i = 0; i < rdev->num_crtc; i++) {
6886 mode = &rdev->mode_info.crtcs[i]->base.mode;
6887 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6888 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6889 }
6890}
44fa346f
AD
6891
6892/**
6893 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6894 *
6895 * @rdev: radeon_device pointer
6896 *
6897 * Fetches a GPU clock counter snapshot (SI).
6898 * Returns the 64 bit clock counter snapshot.
6899 */
6900uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6901{
6902 uint64_t clock;
6903
6904 mutex_lock(&rdev->gpu_clock_mutex);
6905 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6906 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6907 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6908 mutex_unlock(&rdev->gpu_clock_mutex);
6909 return clock;
6910}
6911
87167bb1
CK
6912static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6913 u32 cntl_reg, u32 status_reg)
6914{
6915 int r, i;
6916 struct atom_clock_dividers dividers;
6917 uint32_t tmp;
6918
6919 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6920 clock, false, &dividers);
6921 if (r)
6922 return r;
6923
6924 tmp = RREG32_SMC(cntl_reg);
6925 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6926 tmp |= dividers.post_divider;
6927 WREG32_SMC(cntl_reg, tmp);
6928
6929 for (i = 0; i < 100; i++) {
6930 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6931 break;
6932 mdelay(10);
6933 }
6934 if (i == 100)
6935 return -ETIMEDOUT;
6936
6937 return 0;
6938}
6939
6940int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6941{
6942 int r = 0;
6943
6944 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6945 if (r)
6946 return r;
6947
6948 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6949 return r;
6950}
6951
6952int cik_uvd_resume(struct radeon_device *rdev)
6953{
6954 uint64_t addr;
6955 uint32_t size;
6956 int r;
6957
6958 r = radeon_uvd_resume(rdev);
6959 if (r)
6960 return r;
6961
6962 /* programm the VCPU memory controller bits 0-27 */
6963 addr = rdev->uvd.gpu_addr >> 3;
6964 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6965 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6966 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6967
6968 addr += size;
6969 size = RADEON_UVD_STACK_SIZE >> 3;
6970 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6971 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6972
6973 addr += size;
6974 size = RADEON_UVD_HEAP_SIZE >> 3;
6975 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6976 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6977
6978 /* bits 28-31 */
6979 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6980 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6981
6982 /* bits 32-39 */
6983 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6984 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6985
6986 return 0;
6987}