]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e 68#include <linux/suspend.h>
c6a6e2db 69#include <drm/task_barrier.h>
d5ea093e 70
e2a75f88 71MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 72MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 73MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 74MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 75MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 76MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 77MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 78MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 79MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 80MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 81
2dc80b00
S
82#define AMDGPU_RESUME_MS 2000
83
050091ab 84const char *amdgpu_asic_name[] = {
da69c161
KW
85 "TAHITI",
86 "PITCAIRN",
87 "VERDE",
88 "OLAND",
89 "HAINAN",
d38ceaf9
AD
90 "BONAIRE",
91 "KAVERI",
92 "KABINI",
93 "HAWAII",
94 "MULLINS",
95 "TOPAZ",
96 "TONGA",
48299f95 97 "FIJI",
d38ceaf9 98 "CARRIZO",
139f4917 99 "STONEY",
2cc0c0b5
FC
100 "POLARIS10",
101 "POLARIS11",
c4642a47 102 "POLARIS12",
48ff108d 103 "VEGAM",
d4196f01 104 "VEGA10",
8fab806a 105 "VEGA12",
956fcddc 106 "VEGA20",
2ca8a5d2 107 "RAVEN",
d6c3b24e 108 "ARCTURUS",
1eee4228 109 "RENOIR",
852a6626 110 "NAVI10",
87dbad02 111 "NAVI14",
9802f5d7 112 "NAVI12",
d38ceaf9
AD
113 "LAST",
114};
115
dcea6e65
KR
116/**
117 * DOC: pcie_replay_count
118 *
119 * The amdgpu driver provides a sysfs API for reporting the total number
120 * of PCIe replays (NAKs)
121 * The file pcie_replay_count is used for this and returns the total
122 * number of replays as a sum of the NAKs generated and NAKs received
123 */
124
125static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct drm_device *ddev = dev_get_drvdata(dev);
129 struct amdgpu_device *adev = ddev->dev_private;
130 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
131
132 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
133}
134
135static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
136 amdgpu_device_get_pcie_replay_count, NULL);
137
5494d864
AD
138static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
139
e3ecdffa 140/**
31af062a 141 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
142 *
143 * @dev: drm_device pointer
144 *
145 * Returns true if the device is a dGPU with HG/PX power control,
146 * otherwise return false.
147 */
31af062a 148bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
149{
150 struct amdgpu_device *adev = dev->dev_private;
151
2f7d10b3 152 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
153 return true;
154 return false;
155}
156
a69cba42
AD
157/**
158 * amdgpu_device_supports_baco - Does the device support BACO
159 *
160 * @dev: drm_device pointer
161 *
162 * Returns true if the device supporte BACO,
163 * otherwise return false.
164 */
165bool amdgpu_device_supports_baco(struct drm_device *dev)
166{
167 struct amdgpu_device *adev = dev->dev_private;
168
169 return amdgpu_asic_supports_baco(adev);
170}
171
e35e2b11
TY
172/**
173 * VRAM access helper functions.
174 *
175 * amdgpu_device_vram_access - read/write a buffer in vram
176 *
177 * @adev: amdgpu_device pointer
178 * @pos: offset of the buffer in vram
179 * @buf: virtual address of the buffer in system memory
180 * @size: read/write size, sizeof(@buf) must > @size
181 * @write: true - write to vram, otherwise - read from vram
182 */
183void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
184 uint32_t *buf, size_t size, bool write)
185{
e35e2b11 186 unsigned long flags;
ce05ac56
CK
187 uint32_t hi = ~0;
188 uint64_t last;
189
c12b84d6
CK
190
191#ifdef CONFIG_64BIT
192 last = min(pos + size, adev->gmc.visible_vram_size);
193 if (last > pos) {
194 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
195 size_t count = last - pos;
196
197 if (write) {
198 memcpy_toio(addr, buf, count);
199 mb();
200 amdgpu_asic_flush_hdp(adev, NULL);
201 } else {
202 amdgpu_asic_invalidate_hdp(adev, NULL);
203 mb();
204 memcpy_fromio(buf, addr, count);
205 }
206
207 if (count == size)
208 return;
209
210 pos += count;
211 buf += count / 4;
212 size -= count;
213 }
214#endif
215
ce05ac56
CK
216 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
217 for (last = pos + size; pos < last; pos += 4) {
218 uint32_t tmp = pos >> 31;
e35e2b11 219
e35e2b11 220 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
221 if (tmp != hi) {
222 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
223 hi = tmp;
224 }
e35e2b11
TY
225 if (write)
226 WREG32_NO_KIQ(mmMM_DATA, *buf++);
227 else
228 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 229 }
ce05ac56 230 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
231}
232
d38ceaf9
AD
233/*
234 * MMIO register access helper functions.
235 */
e3ecdffa
AD
236/**
237 * amdgpu_mm_rreg - read a memory mapped IO register
238 *
239 * @adev: amdgpu_device pointer
240 * @reg: dword aligned register offset
241 * @acc_flags: access flags which require special behavior
242 *
243 * Returns the 32 bit value from the offset specified.
244 */
d38ceaf9 245uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 246 uint32_t acc_flags)
d38ceaf9 247{
f4b373f4
TSD
248 uint32_t ret;
249
c68dbcd8 250 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 251 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 252
15d72fd7 253 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 254 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
255 else {
256 unsigned long flags;
d38ceaf9
AD
257
258 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
259 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
260 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
261 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 262 }
f4b373f4
TSD
263 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
264 return ret;
d38ceaf9
AD
265}
266
421a2a30
ML
267/*
268 * MMIO register read with bytes helper functions
269 * @offset:bytes offset from MMIO start
270 *
271*/
272
e3ecdffa
AD
273/**
274 * amdgpu_mm_rreg8 - read a memory mapped IO register
275 *
276 * @adev: amdgpu_device pointer
277 * @offset: byte aligned register offset
278 *
279 * Returns the 8 bit value from the offset specified.
280 */
421a2a30
ML
281uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
282 if (offset < adev->rmmio_size)
283 return (readb(adev->rmmio + offset));
284 BUG();
285}
286
287/*
288 * MMIO register write with bytes helper functions
289 * @offset:bytes offset from MMIO start
290 * @value: the value want to be written to the register
291 *
292*/
e3ecdffa
AD
293/**
294 * amdgpu_mm_wreg8 - read a memory mapped IO register
295 *
296 * @adev: amdgpu_device pointer
297 * @offset: byte aligned register offset
298 * @value: 8 bit value to write
299 *
300 * Writes the value specified to the offset specified.
301 */
421a2a30
ML
302void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
303 if (offset < adev->rmmio_size)
304 writeb(value, adev->rmmio + offset);
305 else
306 BUG();
307}
308
2e0cc4d4
ML
309void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
310{
311 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
312
313 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
314 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
315 else {
316 unsigned long flags;
317
318 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
319 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
320 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
321 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
322 }
323
324 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
325 udelay(500);
326 }
327}
328
e3ecdffa
AD
329/**
330 * amdgpu_mm_wreg - write to a memory mapped IO register
331 *
332 * @adev: amdgpu_device pointer
333 * @reg: dword aligned register offset
334 * @v: 32 bit value to write to the register
335 * @acc_flags: access flags which require special behavior
336 *
337 * Writes the value specified to the offset specified.
338 */
d38ceaf9 339void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 340 uint32_t acc_flags)
d38ceaf9 341{
47ed4e1c
KW
342 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
343 adev->last_mm_index = v;
344 }
345
c68dbcd8 346 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 347 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 348
2e0cc4d4
ML
349 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
350}
d38ceaf9 351
2e0cc4d4
ML
352/*
353 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
354 *
355 * this function is invoked only the debugfs register access
356 * */
357void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
358 uint32_t acc_flags)
359{
360 if (amdgpu_sriov_fullaccess(adev) &&
361 adev->gfx.rlc.funcs &&
362 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 363
2e0cc4d4
ML
364 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
365 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 366 }
2e0cc4d4
ML
367
368 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
d38ceaf9
AD
369}
370
e3ecdffa
AD
371/**
372 * amdgpu_io_rreg - read an IO register
373 *
374 * @adev: amdgpu_device pointer
375 * @reg: dword aligned register offset
376 *
377 * Returns the 32 bit value from the offset specified.
378 */
d38ceaf9
AD
379u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
380{
381 if ((reg * 4) < adev->rio_mem_size)
382 return ioread32(adev->rio_mem + (reg * 4));
383 else {
384 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
385 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
386 }
387}
388
e3ecdffa
AD
389/**
390 * amdgpu_io_wreg - write to an IO register
391 *
392 * @adev: amdgpu_device pointer
393 * @reg: dword aligned register offset
394 * @v: 32 bit value to write to the register
395 *
396 * Writes the value specified to the offset specified.
397 */
d38ceaf9
AD
398void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
399{
47ed4e1c
KW
400 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
401 adev->last_mm_index = v;
402 }
d38ceaf9
AD
403
404 if ((reg * 4) < adev->rio_mem_size)
405 iowrite32(v, adev->rio_mem + (reg * 4));
406 else {
407 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
408 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
409 }
47ed4e1c
KW
410
411 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
412 udelay(500);
413 }
d38ceaf9
AD
414}
415
416/**
417 * amdgpu_mm_rdoorbell - read a doorbell dword
418 *
419 * @adev: amdgpu_device pointer
420 * @index: doorbell index
421 *
422 * Returns the value in the doorbell aperture at the
423 * requested doorbell index (CIK).
424 */
425u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
426{
427 if (index < adev->doorbell.num_doorbells) {
428 return readl(adev->doorbell.ptr + index);
429 } else {
430 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
431 return 0;
432 }
433}
434
435/**
436 * amdgpu_mm_wdoorbell - write a doorbell dword
437 *
438 * @adev: amdgpu_device pointer
439 * @index: doorbell index
440 * @v: value to write
441 *
442 * Writes @v to the doorbell aperture at the
443 * requested doorbell index (CIK).
444 */
445void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
446{
447 if (index < adev->doorbell.num_doorbells) {
448 writel(v, adev->doorbell.ptr + index);
449 } else {
450 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
451 }
452}
453
832be404
KW
454/**
455 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
456 *
457 * @adev: amdgpu_device pointer
458 * @index: doorbell index
459 *
460 * Returns the value in the doorbell aperture at the
461 * requested doorbell index (VEGA10+).
462 */
463u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
464{
465 if (index < adev->doorbell.num_doorbells) {
466 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
467 } else {
468 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
469 return 0;
470 }
471}
472
473/**
474 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
475 *
476 * @adev: amdgpu_device pointer
477 * @index: doorbell index
478 * @v: value to write
479 *
480 * Writes @v to the doorbell aperture at the
481 * requested doorbell index (VEGA10+).
482 */
483void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
484{
485 if (index < adev->doorbell.num_doorbells) {
486 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
487 } else {
488 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
489 }
490}
491
d38ceaf9
AD
492/**
493 * amdgpu_invalid_rreg - dummy reg read function
494 *
495 * @adev: amdgpu device pointer
496 * @reg: offset of register
497 *
498 * Dummy register read function. Used for register blocks
499 * that certain asics don't have (all asics).
500 * Returns the value in the register.
501 */
502static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
503{
504 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
505 BUG();
506 return 0;
507}
508
509/**
510 * amdgpu_invalid_wreg - dummy reg write function
511 *
512 * @adev: amdgpu device pointer
513 * @reg: offset of register
514 * @v: value to write to the register
515 *
516 * Dummy register read function. Used for register blocks
517 * that certain asics don't have (all asics).
518 */
519static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
520{
521 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
522 reg, v);
523 BUG();
524}
525
4fa1c6a6
TZ
526/**
527 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
528 *
529 * @adev: amdgpu device pointer
530 * @reg: offset of register
531 *
532 * Dummy register read function. Used for register blocks
533 * that certain asics don't have (all asics).
534 * Returns the value in the register.
535 */
536static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
537{
538 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
539 BUG();
540 return 0;
541}
542
543/**
544 * amdgpu_invalid_wreg64 - dummy reg write function
545 *
546 * @adev: amdgpu device pointer
547 * @reg: offset of register
548 * @v: value to write to the register
549 *
550 * Dummy register read function. Used for register blocks
551 * that certain asics don't have (all asics).
552 */
553static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
554{
555 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
556 reg, v);
557 BUG();
558}
559
d38ceaf9
AD
560/**
561 * amdgpu_block_invalid_rreg - dummy reg read function
562 *
563 * @adev: amdgpu device pointer
564 * @block: offset of instance
565 * @reg: offset of register
566 *
567 * Dummy register read function. Used for register blocks
568 * that certain asics don't have (all asics).
569 * Returns the value in the register.
570 */
571static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
572 uint32_t block, uint32_t reg)
573{
574 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
575 reg, block);
576 BUG();
577 return 0;
578}
579
580/**
581 * amdgpu_block_invalid_wreg - dummy reg write function
582 *
583 * @adev: amdgpu device pointer
584 * @block: offset of instance
585 * @reg: offset of register
586 * @v: value to write to the register
587 *
588 * Dummy register read function. Used for register blocks
589 * that certain asics don't have (all asics).
590 */
591static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
592 uint32_t block,
593 uint32_t reg, uint32_t v)
594{
595 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
596 reg, block, v);
597 BUG();
598}
599
e3ecdffa
AD
600/**
601 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
602 *
603 * @adev: amdgpu device pointer
604 *
605 * Allocates a scratch page of VRAM for use by various things in the
606 * driver.
607 */
06ec9070 608static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 609{
a4a02777
CK
610 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
611 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
612 &adev->vram_scratch.robj,
613 &adev->vram_scratch.gpu_addr,
614 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
615}
616
e3ecdffa
AD
617/**
618 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
619 *
620 * @adev: amdgpu device pointer
621 *
622 * Frees the VRAM scratch page.
623 */
06ec9070 624static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 625{
078af1a3 626 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
627}
628
629/**
9c3f2b54 630 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
631 *
632 * @adev: amdgpu_device pointer
633 * @registers: pointer to the register array
634 * @array_size: size of the register array
635 *
636 * Programs an array or registers with and and or masks.
637 * This is a helper for setting golden registers.
638 */
9c3f2b54
AD
639void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
640 const u32 *registers,
641 const u32 array_size)
d38ceaf9
AD
642{
643 u32 tmp, reg, and_mask, or_mask;
644 int i;
645
646 if (array_size % 3)
647 return;
648
649 for (i = 0; i < array_size; i +=3) {
650 reg = registers[i + 0];
651 and_mask = registers[i + 1];
652 or_mask = registers[i + 2];
653
654 if (and_mask == 0xffffffff) {
655 tmp = or_mask;
656 } else {
657 tmp = RREG32(reg);
658 tmp &= ~and_mask;
e0d07657
HZ
659 if (adev->family >= AMDGPU_FAMILY_AI)
660 tmp |= (or_mask & and_mask);
661 else
662 tmp |= or_mask;
d38ceaf9
AD
663 }
664 WREG32(reg, tmp);
665 }
666}
667
e3ecdffa
AD
668/**
669 * amdgpu_device_pci_config_reset - reset the GPU
670 *
671 * @adev: amdgpu_device pointer
672 *
673 * Resets the GPU using the pci config reset sequence.
674 * Only applicable to asics prior to vega10.
675 */
8111c387 676void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
677{
678 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
679}
680
681/*
682 * GPU doorbell aperture helpers function.
683 */
684/**
06ec9070 685 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
686 *
687 * @adev: amdgpu_device pointer
688 *
689 * Init doorbell driver information (CIK)
690 * Returns 0 on success, error on failure.
691 */
06ec9070 692static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 693{
6585661d 694
705e519e
CK
695 /* No doorbell on SI hardware generation */
696 if (adev->asic_type < CHIP_BONAIRE) {
697 adev->doorbell.base = 0;
698 adev->doorbell.size = 0;
699 adev->doorbell.num_doorbells = 0;
700 adev->doorbell.ptr = NULL;
701 return 0;
702 }
703
d6895ad3
CK
704 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
705 return -EINVAL;
706
22357775
AD
707 amdgpu_asic_init_doorbell_index(adev);
708
d38ceaf9
AD
709 /* doorbell bar mapping */
710 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
711 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
712
edf600da 713 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 714 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
715 if (adev->doorbell.num_doorbells == 0)
716 return -EINVAL;
717
ec3db8a6 718 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
719 * paging queue doorbell use the second page. The
720 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
721 * doorbells are in the first page. So with paging queue enabled,
722 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
723 */
724 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 725 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 726
8972e5d2
CK
727 adev->doorbell.ptr = ioremap(adev->doorbell.base,
728 adev->doorbell.num_doorbells *
729 sizeof(u32));
730 if (adev->doorbell.ptr == NULL)
d38ceaf9 731 return -ENOMEM;
d38ceaf9
AD
732
733 return 0;
734}
735
736/**
06ec9070 737 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
738 *
739 * @adev: amdgpu_device pointer
740 *
741 * Tear down doorbell driver information (CIK)
742 */
06ec9070 743static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
744{
745 iounmap(adev->doorbell.ptr);
746 adev->doorbell.ptr = NULL;
747}
748
22cb0164 749
d38ceaf9
AD
750
751/*
06ec9070 752 * amdgpu_device_wb_*()
455a7bc2 753 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 754 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
755 */
756
757/**
06ec9070 758 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
759 *
760 * @adev: amdgpu_device pointer
761 *
762 * Disables Writeback and frees the Writeback memory (all asics).
763 * Used at driver shutdown.
764 */
06ec9070 765static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
766{
767 if (adev->wb.wb_obj) {
a76ed485
AD
768 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
769 &adev->wb.gpu_addr,
770 (void **)&adev->wb.wb);
d38ceaf9
AD
771 adev->wb.wb_obj = NULL;
772 }
773}
774
775/**
06ec9070 776 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
777 *
778 * @adev: amdgpu_device pointer
779 *
455a7bc2 780 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
781 * Used at driver startup.
782 * Returns 0 on success or an -error on failure.
783 */
06ec9070 784static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
785{
786 int r;
787
788 if (adev->wb.wb_obj == NULL) {
97407b63
AD
789 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
790 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
791 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
792 &adev->wb.wb_obj, &adev->wb.gpu_addr,
793 (void **)&adev->wb.wb);
d38ceaf9
AD
794 if (r) {
795 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
796 return r;
797 }
d38ceaf9
AD
798
799 adev->wb.num_wb = AMDGPU_MAX_WB;
800 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
801
802 /* clear wb memory */
73469585 803 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
804 }
805
806 return 0;
807}
808
809/**
131b4b36 810 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
811 *
812 * @adev: amdgpu_device pointer
813 * @wb: wb index
814 *
815 * Allocate a wb slot for use by the driver (all asics).
816 * Returns 0 on success or -EINVAL on failure.
817 */
131b4b36 818int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
819{
820 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 821
97407b63 822 if (offset < adev->wb.num_wb) {
7014285a 823 __set_bit(offset, adev->wb.used);
63ae07ca 824 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
825 return 0;
826 } else {
827 return -EINVAL;
828 }
829}
830
d38ceaf9 831/**
131b4b36 832 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
833 *
834 * @adev: amdgpu_device pointer
835 * @wb: wb index
836 *
837 * Free a wb slot allocated for use by the driver (all asics)
838 */
131b4b36 839void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 840{
73469585 841 wb >>= 3;
d38ceaf9 842 if (wb < adev->wb.num_wb)
73469585 843 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
844}
845
d6895ad3
CK
846/**
847 * amdgpu_device_resize_fb_bar - try to resize FB BAR
848 *
849 * @adev: amdgpu_device pointer
850 *
851 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
852 * to fail, but if any of the BARs is not accessible after the size we abort
853 * driver loading by returning -ENODEV.
854 */
855int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
856{
770d13b1 857 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 858 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
859 struct pci_bus *root;
860 struct resource *res;
861 unsigned i;
d6895ad3
CK
862 u16 cmd;
863 int r;
864
0c03b912 865 /* Bypass for VF */
866 if (amdgpu_sriov_vf(adev))
867 return 0;
868
31b8adab
CK
869 /* Check if the root BUS has 64bit memory resources */
870 root = adev->pdev->bus;
871 while (root->parent)
872 root = root->parent;
873
874 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 875 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
876 res->start > 0x100000000ull)
877 break;
878 }
879
880 /* Trying to resize is pointless without a root hub window above 4GB */
881 if (!res)
882 return 0;
883
d6895ad3
CK
884 /* Disable memory decoding while we change the BAR addresses and size */
885 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
886 pci_write_config_word(adev->pdev, PCI_COMMAND,
887 cmd & ~PCI_COMMAND_MEMORY);
888
889 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 890 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
891 if (adev->asic_type >= CHIP_BONAIRE)
892 pci_release_resource(adev->pdev, 2);
893
894 pci_release_resource(adev->pdev, 0);
895
896 r = pci_resize_resource(adev->pdev, 0, rbar_size);
897 if (r == -ENOSPC)
898 DRM_INFO("Not enough PCI address space for a large BAR.");
899 else if (r && r != -ENOTSUPP)
900 DRM_ERROR("Problem resizing BAR0 (%d).", r);
901
902 pci_assign_unassigned_bus_resources(adev->pdev->bus);
903
904 /* When the doorbell or fb BAR isn't available we have no chance of
905 * using the device.
906 */
06ec9070 907 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
908 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
909 return -ENODEV;
910
911 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
912
913 return 0;
914}
a05502e5 915
d38ceaf9
AD
916/*
917 * GPU helpers function.
918 */
919/**
39c640c0 920 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
921 *
922 * @adev: amdgpu_device pointer
923 *
c836fec5
JQ
924 * Check if the asic has been initialized (all asics) at driver startup
925 * or post is needed if hw reset is performed.
926 * Returns true if need or false if not.
d38ceaf9 927 */
39c640c0 928bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
929{
930 uint32_t reg;
931
bec86378
ML
932 if (amdgpu_sriov_vf(adev))
933 return false;
934
935 if (amdgpu_passthrough(adev)) {
1da2c326
ML
936 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
937 * some old smc fw still need driver do vPost otherwise gpu hang, while
938 * those smc fw version above 22.15 doesn't have this flaw, so we force
939 * vpost executed for smc version below 22.15
bec86378
ML
940 */
941 if (adev->asic_type == CHIP_FIJI) {
942 int err;
943 uint32_t fw_ver;
944 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
945 /* force vPost if error occured */
946 if (err)
947 return true;
948
949 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
950 if (fw_ver < 0x00160e00)
951 return true;
bec86378 952 }
bec86378 953 }
91fe77eb 954
955 if (adev->has_hw_reset) {
956 adev->has_hw_reset = false;
957 return true;
958 }
959
960 /* bios scratch used on CIK+ */
961 if (adev->asic_type >= CHIP_BONAIRE)
962 return amdgpu_atombios_scratch_need_asic_init(adev);
963
964 /* check MEM_SIZE for older asics */
965 reg = amdgpu_asic_get_config_memsize(adev);
966
967 if ((reg != 0) && (reg != 0xffffffff))
968 return false;
969
970 return true;
bec86378
ML
971}
972
d38ceaf9
AD
973/* if we get transitioned to only one device, take VGA back */
974/**
06ec9070 975 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
976 *
977 * @cookie: amdgpu_device pointer
978 * @state: enable/disable vga decode
979 *
980 * Enable/disable vga decode (all asics).
981 * Returns VGA resource flags.
982 */
06ec9070 983static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
984{
985 struct amdgpu_device *adev = cookie;
986 amdgpu_asic_set_vga_state(adev, state);
987 if (state)
988 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
989 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
990 else
991 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
992}
993
e3ecdffa
AD
994/**
995 * amdgpu_device_check_block_size - validate the vm block size
996 *
997 * @adev: amdgpu_device pointer
998 *
999 * Validates the vm block size specified via module parameter.
1000 * The vm block size defines number of bits in page table versus page directory,
1001 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1002 * page table and the remaining bits are in the page directory.
1003 */
06ec9070 1004static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1005{
1006 /* defines number of bits in page table versus page directory,
1007 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1008 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1009 if (amdgpu_vm_block_size == -1)
1010 return;
a1adf8be 1011
bab4fee7 1012 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1013 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1014 amdgpu_vm_block_size);
97489129 1015 amdgpu_vm_block_size = -1;
a1adf8be 1016 }
a1adf8be
CZ
1017}
1018
e3ecdffa
AD
1019/**
1020 * amdgpu_device_check_vm_size - validate the vm size
1021 *
1022 * @adev: amdgpu_device pointer
1023 *
1024 * Validates the vm size in GB specified via module parameter.
1025 * The VM size is the size of the GPU virtual memory space in GB.
1026 */
06ec9070 1027static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1028{
64dab074
AD
1029 /* no need to check the default value */
1030 if (amdgpu_vm_size == -1)
1031 return;
1032
83ca145d
ZJ
1033 if (amdgpu_vm_size < 1) {
1034 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1035 amdgpu_vm_size);
f3368128 1036 amdgpu_vm_size = -1;
83ca145d 1037 }
83ca145d
ZJ
1038}
1039
7951e376
RZ
1040static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1041{
1042 struct sysinfo si;
a9d4fe2f 1043 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1044 uint64_t total_memory;
1045 uint64_t dram_size_seven_GB = 0x1B8000000;
1046 uint64_t dram_size_three_GB = 0xB8000000;
1047
1048 if (amdgpu_smu_memory_pool_size == 0)
1049 return;
1050
1051 if (!is_os_64) {
1052 DRM_WARN("Not 64-bit OS, feature not supported\n");
1053 goto def_value;
1054 }
1055 si_meminfo(&si);
1056 total_memory = (uint64_t)si.totalram * si.mem_unit;
1057
1058 if ((amdgpu_smu_memory_pool_size == 1) ||
1059 (amdgpu_smu_memory_pool_size == 2)) {
1060 if (total_memory < dram_size_three_GB)
1061 goto def_value1;
1062 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1063 (amdgpu_smu_memory_pool_size == 8)) {
1064 if (total_memory < dram_size_seven_GB)
1065 goto def_value1;
1066 } else {
1067 DRM_WARN("Smu memory pool size not supported\n");
1068 goto def_value;
1069 }
1070 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1071
1072 return;
1073
1074def_value1:
1075 DRM_WARN("No enough system memory\n");
1076def_value:
1077 adev->pm.smu_prv_buffer_size = 0;
1078}
1079
d38ceaf9 1080/**
06ec9070 1081 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1082 *
1083 * @adev: amdgpu_device pointer
1084 *
1085 * Validates certain module parameters and updates
1086 * the associated values used by the driver (all asics).
1087 */
912dfc84 1088static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1089{
5b011235
CZ
1090 if (amdgpu_sched_jobs < 4) {
1091 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1092 amdgpu_sched_jobs);
1093 amdgpu_sched_jobs = 4;
76117507 1094 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1095 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1096 amdgpu_sched_jobs);
1097 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1098 }
d38ceaf9 1099
83e74db6 1100 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1101 /* gart size must be greater or equal to 32M */
1102 dev_warn(adev->dev, "gart size (%d) too small\n",
1103 amdgpu_gart_size);
83e74db6 1104 amdgpu_gart_size = -1;
d38ceaf9
AD
1105 }
1106
36d38372 1107 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1108 /* gtt size must be greater or equal to 32M */
36d38372
CK
1109 dev_warn(adev->dev, "gtt size (%d) too small\n",
1110 amdgpu_gtt_size);
1111 amdgpu_gtt_size = -1;
d38ceaf9
AD
1112 }
1113
d07f14be
RH
1114 /* valid range is between 4 and 9 inclusive */
1115 if (amdgpu_vm_fragment_size != -1 &&
1116 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1117 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1118 amdgpu_vm_fragment_size = -1;
1119 }
1120
7951e376
RZ
1121 amdgpu_device_check_smu_prv_buffer_size(adev);
1122
06ec9070 1123 amdgpu_device_check_vm_size(adev);
d38ceaf9 1124
06ec9070 1125 amdgpu_device_check_block_size(adev);
6a7f76e7 1126
19aede77 1127 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1128
e3c00faa 1129 return 0;
d38ceaf9
AD
1130}
1131
1132/**
1133 * amdgpu_switcheroo_set_state - set switcheroo state
1134 *
1135 * @pdev: pci dev pointer
1694467b 1136 * @state: vga_switcheroo state
d38ceaf9
AD
1137 *
1138 * Callback for the switcheroo driver. Suspends or resumes the
1139 * the asics before or after it is powered up using ACPI methods.
1140 */
1141static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1142{
1143 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1144 int r;
d38ceaf9 1145
31af062a 1146 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1147 return;
1148
1149 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1150 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1151 /* don't suspend or resume card normally */
1152 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1153
de185019
AD
1154 pci_set_power_state(dev->pdev, PCI_D0);
1155 pci_restore_state(dev->pdev);
1156 r = pci_enable_device(dev->pdev);
1157 if (r)
1158 DRM_WARN("pci_enable_device failed (%d)\n", r);
1159 amdgpu_device_resume(dev, true);
d38ceaf9 1160
d38ceaf9
AD
1161 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1162 drm_kms_helper_poll_enable(dev);
1163 } else {
7ca85295 1164 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1165 drm_kms_helper_poll_disable(dev);
1166 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1167 amdgpu_device_suspend(dev, true);
1168 pci_save_state(dev->pdev);
1169 /* Shut down the device */
1170 pci_disable_device(dev->pdev);
1171 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1172 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1173 }
1174}
1175
1176/**
1177 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1178 *
1179 * @pdev: pci dev pointer
1180 *
1181 * Callback for the switcheroo driver. Check of the switcheroo
1182 * state can be changed.
1183 * Returns true if the state can be changed, false if not.
1184 */
1185static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1186{
1187 struct drm_device *dev = pci_get_drvdata(pdev);
1188
1189 /*
1190 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1191 * locking inversion with the driver load path. And the access here is
1192 * completely racy anyway. So don't bother with locking for now.
1193 */
7e13ad89 1194 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1195}
1196
1197static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1198 .set_gpu_state = amdgpu_switcheroo_set_state,
1199 .reprobe = NULL,
1200 .can_switch = amdgpu_switcheroo_can_switch,
1201};
1202
e3ecdffa
AD
1203/**
1204 * amdgpu_device_ip_set_clockgating_state - set the CG state
1205 *
87e3f136 1206 * @dev: amdgpu_device pointer
e3ecdffa
AD
1207 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1208 * @state: clockgating state (gate or ungate)
1209 *
1210 * Sets the requested clockgating state for all instances of
1211 * the hardware IP specified.
1212 * Returns the error code from the last instance.
1213 */
43fa561f 1214int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1215 enum amd_ip_block_type block_type,
1216 enum amd_clockgating_state state)
d38ceaf9 1217{
43fa561f 1218 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1219 int i, r = 0;
1220
1221 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1222 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1223 continue;
c722865a
RZ
1224 if (adev->ip_blocks[i].version->type != block_type)
1225 continue;
1226 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1227 continue;
1228 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1229 (void *)adev, state);
1230 if (r)
1231 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1232 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1233 }
1234 return r;
1235}
1236
e3ecdffa
AD
1237/**
1238 * amdgpu_device_ip_set_powergating_state - set the PG state
1239 *
87e3f136 1240 * @dev: amdgpu_device pointer
e3ecdffa
AD
1241 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1242 * @state: powergating state (gate or ungate)
1243 *
1244 * Sets the requested powergating state for all instances of
1245 * the hardware IP specified.
1246 * Returns the error code from the last instance.
1247 */
43fa561f 1248int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1249 enum amd_ip_block_type block_type,
1250 enum amd_powergating_state state)
d38ceaf9 1251{
43fa561f 1252 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1253 int i, r = 0;
1254
1255 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1256 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1257 continue;
c722865a
RZ
1258 if (adev->ip_blocks[i].version->type != block_type)
1259 continue;
1260 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1261 continue;
1262 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1263 (void *)adev, state);
1264 if (r)
1265 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1266 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1267 }
1268 return r;
1269}
1270
e3ecdffa
AD
1271/**
1272 * amdgpu_device_ip_get_clockgating_state - get the CG state
1273 *
1274 * @adev: amdgpu_device pointer
1275 * @flags: clockgating feature flags
1276 *
1277 * Walks the list of IPs on the device and updates the clockgating
1278 * flags for each IP.
1279 * Updates @flags with the feature flags for each hardware IP where
1280 * clockgating is enabled.
1281 */
2990a1fc
AD
1282void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1283 u32 *flags)
6cb2d4e4
HR
1284{
1285 int i;
1286
1287 for (i = 0; i < adev->num_ip_blocks; i++) {
1288 if (!adev->ip_blocks[i].status.valid)
1289 continue;
1290 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1291 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1292 }
1293}
1294
e3ecdffa
AD
1295/**
1296 * amdgpu_device_ip_wait_for_idle - wait for idle
1297 *
1298 * @adev: amdgpu_device pointer
1299 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1300 *
1301 * Waits for the request hardware IP to be idle.
1302 * Returns 0 for success or a negative error code on failure.
1303 */
2990a1fc
AD
1304int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1305 enum amd_ip_block_type block_type)
5dbbb60b
AD
1306{
1307 int i, r;
1308
1309 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1310 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1311 continue;
a1255107
AD
1312 if (adev->ip_blocks[i].version->type == block_type) {
1313 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1314 if (r)
1315 return r;
1316 break;
1317 }
1318 }
1319 return 0;
1320
1321}
1322
e3ecdffa
AD
1323/**
1324 * amdgpu_device_ip_is_idle - is the hardware IP idle
1325 *
1326 * @adev: amdgpu_device pointer
1327 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1328 *
1329 * Check if the hardware IP is idle or not.
1330 * Returns true if it the IP is idle, false if not.
1331 */
2990a1fc
AD
1332bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1333 enum amd_ip_block_type block_type)
5dbbb60b
AD
1334{
1335 int i;
1336
1337 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1338 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1339 continue;
a1255107
AD
1340 if (adev->ip_blocks[i].version->type == block_type)
1341 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1342 }
1343 return true;
1344
1345}
1346
e3ecdffa
AD
1347/**
1348 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1349 *
1350 * @adev: amdgpu_device pointer
87e3f136 1351 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1352 *
1353 * Returns a pointer to the hardware IP block structure
1354 * if it exists for the asic, otherwise NULL.
1355 */
2990a1fc
AD
1356struct amdgpu_ip_block *
1357amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1358 enum amd_ip_block_type type)
d38ceaf9
AD
1359{
1360 int i;
1361
1362 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1363 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1364 return &adev->ip_blocks[i];
1365
1366 return NULL;
1367}
1368
1369/**
2990a1fc 1370 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1371 *
1372 * @adev: amdgpu_device pointer
5fc3aeeb 1373 * @type: enum amd_ip_block_type
d38ceaf9
AD
1374 * @major: major version
1375 * @minor: minor version
1376 *
1377 * return 0 if equal or greater
1378 * return 1 if smaller or the ip_block doesn't exist
1379 */
2990a1fc
AD
1380int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1381 enum amd_ip_block_type type,
1382 u32 major, u32 minor)
d38ceaf9 1383{
2990a1fc 1384 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1385
a1255107
AD
1386 if (ip_block && ((ip_block->version->major > major) ||
1387 ((ip_block->version->major == major) &&
1388 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1389 return 0;
1390
1391 return 1;
1392}
1393
a1255107 1394/**
2990a1fc 1395 * amdgpu_device_ip_block_add
a1255107
AD
1396 *
1397 * @adev: amdgpu_device pointer
1398 * @ip_block_version: pointer to the IP to add
1399 *
1400 * Adds the IP block driver information to the collection of IPs
1401 * on the asic.
1402 */
2990a1fc
AD
1403int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1404 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1405{
1406 if (!ip_block_version)
1407 return -EINVAL;
1408
e966a725 1409 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1410 ip_block_version->funcs->name);
1411
a1255107
AD
1412 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1413
1414 return 0;
1415}
1416
e3ecdffa
AD
1417/**
1418 * amdgpu_device_enable_virtual_display - enable virtual display feature
1419 *
1420 * @adev: amdgpu_device pointer
1421 *
1422 * Enabled the virtual display feature if the user has enabled it via
1423 * the module parameter virtual_display. This feature provides a virtual
1424 * display hardware on headless boards or in virtualized environments.
1425 * This function parses and validates the configuration string specified by
1426 * the user and configues the virtual display configuration (number of
1427 * virtual connectors, crtcs, etc.) specified.
1428 */
483ef985 1429static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1430{
1431 adev->enable_virtual_display = false;
1432
1433 if (amdgpu_virtual_display) {
1434 struct drm_device *ddev = adev->ddev;
1435 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1436 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1437
1438 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1439 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1440 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1441 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1442 if (!strcmp("all", pciaddname)
1443 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1444 long num_crtc;
1445 int res = -1;
1446
9accf2fd 1447 adev->enable_virtual_display = true;
0f66356d
ED
1448
1449 if (pciaddname_tmp)
1450 res = kstrtol(pciaddname_tmp, 10,
1451 &num_crtc);
1452
1453 if (!res) {
1454 if (num_crtc < 1)
1455 num_crtc = 1;
1456 if (num_crtc > 6)
1457 num_crtc = 6;
1458 adev->mode_info.num_crtc = num_crtc;
1459 } else {
1460 adev->mode_info.num_crtc = 1;
1461 }
9accf2fd
ED
1462 break;
1463 }
1464 }
1465
0f66356d
ED
1466 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1467 amdgpu_virtual_display, pci_address_name,
1468 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1469
1470 kfree(pciaddstr);
1471 }
1472}
1473
e3ecdffa
AD
1474/**
1475 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1476 *
1477 * @adev: amdgpu_device pointer
1478 *
1479 * Parses the asic configuration parameters specified in the gpu info
1480 * firmware and makes them availale to the driver for use in configuring
1481 * the asic.
1482 * Returns 0 on success, -EINVAL on failure.
1483 */
e2a75f88
AD
1484static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1485{
e2a75f88
AD
1486 const char *chip_name;
1487 char fw_name[30];
1488 int err;
1489 const struct gpu_info_firmware_header_v1_0 *hdr;
1490
ab4fe3e1
HR
1491 adev->firmware.gpu_info_fw = NULL;
1492
e2a75f88
AD
1493 switch (adev->asic_type) {
1494 case CHIP_TOPAZ:
1495 case CHIP_TONGA:
1496 case CHIP_FIJI:
e2a75f88 1497 case CHIP_POLARIS10:
cc07f18d 1498 case CHIP_POLARIS11:
e2a75f88 1499 case CHIP_POLARIS12:
cc07f18d 1500 case CHIP_VEGAM:
e2a75f88
AD
1501 case CHIP_CARRIZO:
1502 case CHIP_STONEY:
1503#ifdef CONFIG_DRM_AMDGPU_SI
1504 case CHIP_VERDE:
1505 case CHIP_TAHITI:
1506 case CHIP_PITCAIRN:
1507 case CHIP_OLAND:
1508 case CHIP_HAINAN:
1509#endif
1510#ifdef CONFIG_DRM_AMDGPU_CIK
1511 case CHIP_BONAIRE:
1512 case CHIP_HAWAII:
1513 case CHIP_KAVERI:
1514 case CHIP_KABINI:
1515 case CHIP_MULLINS:
1516#endif
27c0bc71 1517 case CHIP_VEGA20:
e2a75f88
AD
1518 default:
1519 return 0;
1520 case CHIP_VEGA10:
1521 chip_name = "vega10";
1522 break;
3f76dced
AD
1523 case CHIP_VEGA12:
1524 chip_name = "vega12";
1525 break;
2d2e5e7e 1526 case CHIP_RAVEN:
54c4d17e
FX
1527 if (adev->rev_id >= 8)
1528 chip_name = "raven2";
741deade
AD
1529 else if (adev->pdev->device == 0x15d8)
1530 chip_name = "picasso";
54c4d17e
FX
1531 else
1532 chip_name = "raven";
2d2e5e7e 1533 break;
65e60f6e
LM
1534 case CHIP_ARCTURUS:
1535 chip_name = "arcturus";
1536 break;
b51a26a0
HR
1537 case CHIP_RENOIR:
1538 chip_name = "renoir";
1539 break;
23c6268e
HR
1540 case CHIP_NAVI10:
1541 chip_name = "navi10";
1542 break;
ed42cfe1
XY
1543 case CHIP_NAVI14:
1544 chip_name = "navi14";
1545 break;
42b325e5
XY
1546 case CHIP_NAVI12:
1547 chip_name = "navi12";
1548 break;
e2a75f88
AD
1549 }
1550
1551 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1552 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1553 if (err) {
1554 dev_err(adev->dev,
1555 "Failed to load gpu_info firmware \"%s\"\n",
1556 fw_name);
1557 goto out;
1558 }
ab4fe3e1 1559 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1560 if (err) {
1561 dev_err(adev->dev,
1562 "Failed to validate gpu_info firmware \"%s\"\n",
1563 fw_name);
1564 goto out;
1565 }
1566
ab4fe3e1 1567 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1568 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1569
1570 switch (hdr->version_major) {
1571 case 1:
1572 {
1573 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1574 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1575 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1576
ec51d3fa
XY
1577 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1578 goto parse_soc_bounding_box;
1579
b5ab16bf
AD
1580 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1581 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1582 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1583 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1584 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1585 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1586 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1587 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1588 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1589 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1590 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1591 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1592 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1593 adev->gfx.cu_info.max_waves_per_simd =
1594 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1595 adev->gfx.cu_info.max_scratch_slots_per_cu =
1596 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1597 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1598 if (hdr->version_minor >= 1) {
35c2e910
HZ
1599 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1600 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1601 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1602 adev->gfx.config.num_sc_per_sh =
1603 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1604 adev->gfx.config.num_packer_per_sc =
1605 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1606 }
ec51d3fa
XY
1607
1608parse_soc_bounding_box:
ec51d3fa
XY
1609 /*
1610 * soc bounding box info is not integrated in disocovery table,
1611 * we always need to parse it from gpu info firmware.
1612 */
48321c3d
HW
1613 if (hdr->version_minor == 2) {
1614 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1615 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1616 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1617 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1618 }
e2a75f88
AD
1619 break;
1620 }
1621 default:
1622 dev_err(adev->dev,
1623 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1624 err = -EINVAL;
1625 goto out;
1626 }
1627out:
e2a75f88
AD
1628 return err;
1629}
1630
e3ecdffa
AD
1631/**
1632 * amdgpu_device_ip_early_init - run early init for hardware IPs
1633 *
1634 * @adev: amdgpu_device pointer
1635 *
1636 * Early initialization pass for hardware IPs. The hardware IPs that make
1637 * up each asic are discovered each IP's early_init callback is run. This
1638 * is the first stage in initializing the asic.
1639 * Returns 0 on success, negative error code on failure.
1640 */
06ec9070 1641static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1642{
aaa36a97 1643 int i, r;
d38ceaf9 1644
483ef985 1645 amdgpu_device_enable_virtual_display(adev);
a6be7570 1646
d38ceaf9 1647 switch (adev->asic_type) {
aaa36a97
AD
1648 case CHIP_TOPAZ:
1649 case CHIP_TONGA:
48299f95 1650 case CHIP_FIJI:
2cc0c0b5 1651 case CHIP_POLARIS10:
32cc7e53 1652 case CHIP_POLARIS11:
c4642a47 1653 case CHIP_POLARIS12:
32cc7e53 1654 case CHIP_VEGAM:
aaa36a97 1655 case CHIP_CARRIZO:
39bb0c92
SL
1656 case CHIP_STONEY:
1657 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1658 adev->family = AMDGPU_FAMILY_CZ;
1659 else
1660 adev->family = AMDGPU_FAMILY_VI;
1661
1662 r = vi_set_ip_blocks(adev);
1663 if (r)
1664 return r;
1665 break;
33f34802
KW
1666#ifdef CONFIG_DRM_AMDGPU_SI
1667 case CHIP_VERDE:
1668 case CHIP_TAHITI:
1669 case CHIP_PITCAIRN:
1670 case CHIP_OLAND:
1671 case CHIP_HAINAN:
295d0daf 1672 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1673 r = si_set_ip_blocks(adev);
1674 if (r)
1675 return r;
1676 break;
1677#endif
a2e73f56
AD
1678#ifdef CONFIG_DRM_AMDGPU_CIK
1679 case CHIP_BONAIRE:
1680 case CHIP_HAWAII:
1681 case CHIP_KAVERI:
1682 case CHIP_KABINI:
1683 case CHIP_MULLINS:
1684 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1685 adev->family = AMDGPU_FAMILY_CI;
1686 else
1687 adev->family = AMDGPU_FAMILY_KV;
1688
1689 r = cik_set_ip_blocks(adev);
1690 if (r)
1691 return r;
1692 break;
1693#endif
e48a3cd9
AD
1694 case CHIP_VEGA10:
1695 case CHIP_VEGA12:
e4bd8170 1696 case CHIP_VEGA20:
e48a3cd9 1697 case CHIP_RAVEN:
61cf44c1 1698 case CHIP_ARCTURUS:
b51a26a0
HR
1699 case CHIP_RENOIR:
1700 if (adev->asic_type == CHIP_RAVEN ||
1701 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1702 adev->family = AMDGPU_FAMILY_RV;
1703 else
1704 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1705
1706 r = soc15_set_ip_blocks(adev);
1707 if (r)
1708 return r;
1709 break;
0a5b8c7b 1710 case CHIP_NAVI10:
7ecb5cd4 1711 case CHIP_NAVI14:
4808cf9c 1712 case CHIP_NAVI12:
0a5b8c7b
HR
1713 adev->family = AMDGPU_FAMILY_NV;
1714
1715 r = nv_set_ip_blocks(adev);
1716 if (r)
1717 return r;
1718 break;
d38ceaf9
AD
1719 default:
1720 /* FIXME: not supported yet */
1721 return -EINVAL;
1722 }
1723
e2a75f88
AD
1724 r = amdgpu_device_parse_gpu_info_fw(adev);
1725 if (r)
1726 return r;
1727
ec51d3fa
XY
1728 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1729 amdgpu_discovery_get_gfx_info(adev);
1730
1884734a 1731 amdgpu_amdkfd_device_probe(adev);
1732
3149d9da
XY
1733 if (amdgpu_sriov_vf(adev)) {
1734 r = amdgpu_virt_request_full_gpu(adev, true);
1735 if (r)
5ffa61c1 1736 return -EAGAIN;
3149d9da
XY
1737 }
1738
3b94fb10 1739 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1740 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1741 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1742
d38ceaf9
AD
1743 for (i = 0; i < adev->num_ip_blocks; i++) {
1744 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1745 DRM_ERROR("disabled ip block: %d <%s>\n",
1746 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1747 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1748 } else {
a1255107
AD
1749 if (adev->ip_blocks[i].version->funcs->early_init) {
1750 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1751 if (r == -ENOENT) {
a1255107 1752 adev->ip_blocks[i].status.valid = false;
2c1a2784 1753 } else if (r) {
a1255107
AD
1754 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1755 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1756 return r;
2c1a2784 1757 } else {
a1255107 1758 adev->ip_blocks[i].status.valid = true;
2c1a2784 1759 }
974e6b64 1760 } else {
a1255107 1761 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1762 }
d38ceaf9 1763 }
21a249ca
AD
1764 /* get the vbios after the asic_funcs are set up */
1765 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1766 /* Read BIOS */
1767 if (!amdgpu_get_bios(adev))
1768 return -EINVAL;
1769
1770 r = amdgpu_atombios_init(adev);
1771 if (r) {
1772 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1773 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1774 return r;
1775 }
1776 }
d38ceaf9
AD
1777 }
1778
395d1fb9
NH
1779 adev->cg_flags &= amdgpu_cg_mask;
1780 adev->pg_flags &= amdgpu_pg_mask;
1781
d38ceaf9
AD
1782 return 0;
1783}
1784
0a4f2520
RZ
1785static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1786{
1787 int i, r;
1788
1789 for (i = 0; i < adev->num_ip_blocks; i++) {
1790 if (!adev->ip_blocks[i].status.sw)
1791 continue;
1792 if (adev->ip_blocks[i].status.hw)
1793 continue;
1794 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1795 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1796 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1797 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1798 if (r) {
1799 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1800 adev->ip_blocks[i].version->funcs->name, r);
1801 return r;
1802 }
1803 adev->ip_blocks[i].status.hw = true;
1804 }
1805 }
1806
1807 return 0;
1808}
1809
1810static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1811{
1812 int i, r;
1813
1814 for (i = 0; i < adev->num_ip_blocks; i++) {
1815 if (!adev->ip_blocks[i].status.sw)
1816 continue;
1817 if (adev->ip_blocks[i].status.hw)
1818 continue;
1819 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1820 if (r) {
1821 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1822 adev->ip_blocks[i].version->funcs->name, r);
1823 return r;
1824 }
1825 adev->ip_blocks[i].status.hw = true;
1826 }
1827
1828 return 0;
1829}
1830
7a3e0bb2
RZ
1831static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1832{
1833 int r = 0;
1834 int i;
80f41f84 1835 uint32_t smu_version;
7a3e0bb2
RZ
1836
1837 if (adev->asic_type >= CHIP_VEGA10) {
1838 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1839 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1840 continue;
1841
1842 /* no need to do the fw loading again if already done*/
1843 if (adev->ip_blocks[i].status.hw == true)
1844 break;
1845
1846 if (adev->in_gpu_reset || adev->in_suspend) {
1847 r = adev->ip_blocks[i].version->funcs->resume(adev);
1848 if (r) {
1849 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1850 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1851 return r;
1852 }
1853 } else {
1854 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1855 if (r) {
1856 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1857 adev->ip_blocks[i].version->funcs->name, r);
1858 return r;
7a3e0bb2 1859 }
7a3e0bb2 1860 }
482f0e53
ML
1861
1862 adev->ip_blocks[i].status.hw = true;
1863 break;
7a3e0bb2
RZ
1864 }
1865 }
482f0e53 1866
8973d9ec
ED
1867 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1868 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1869
80f41f84 1870 return r;
7a3e0bb2
RZ
1871}
1872
e3ecdffa
AD
1873/**
1874 * amdgpu_device_ip_init - run init for hardware IPs
1875 *
1876 * @adev: amdgpu_device pointer
1877 *
1878 * Main initialization pass for hardware IPs. The list of all the hardware
1879 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1880 * are run. sw_init initializes the software state associated with each IP
1881 * and hw_init initializes the hardware associated with each IP.
1882 * Returns 0 on success, negative error code on failure.
1883 */
06ec9070 1884static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1885{
1886 int i, r;
1887
c030f2e4 1888 r = amdgpu_ras_init(adev);
1889 if (r)
1890 return r;
1891
d38ceaf9 1892 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1893 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1894 continue;
a1255107 1895 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1896 if (r) {
a1255107
AD
1897 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1898 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1899 goto init_failed;
2c1a2784 1900 }
a1255107 1901 adev->ip_blocks[i].status.sw = true;
bfca0289 1902
d38ceaf9 1903 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1904 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1905 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1906 if (r) {
1907 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1908 goto init_failed;
2c1a2784 1909 }
a1255107 1910 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1911 if (r) {
1912 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1913 goto init_failed;
2c1a2784 1914 }
06ec9070 1915 r = amdgpu_device_wb_init(adev);
2c1a2784 1916 if (r) {
06ec9070 1917 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1918 goto init_failed;
2c1a2784 1919 }
a1255107 1920 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1921
1922 /* right after GMC hw init, we create CSA */
f92d5c61 1923 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1924 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1925 AMDGPU_GEM_DOMAIN_VRAM,
1926 AMDGPU_CSA_SIZE);
2493664f
ML
1927 if (r) {
1928 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1929 goto init_failed;
2493664f
ML
1930 }
1931 }
d38ceaf9
AD
1932 }
1933 }
1934
c9ffa427
YT
1935 if (amdgpu_sriov_vf(adev))
1936 amdgpu_virt_init_data_exchange(adev);
1937
533aed27
AG
1938 r = amdgpu_ib_pool_init(adev);
1939 if (r) {
1940 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1941 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1942 goto init_failed;
1943 }
1944
c8963ea4
RZ
1945 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1946 if (r)
72d3f592 1947 goto init_failed;
0a4f2520
RZ
1948
1949 r = amdgpu_device_ip_hw_init_phase1(adev);
1950 if (r)
72d3f592 1951 goto init_failed;
0a4f2520 1952
7a3e0bb2
RZ
1953 r = amdgpu_device_fw_loading(adev);
1954 if (r)
72d3f592 1955 goto init_failed;
7a3e0bb2 1956
0a4f2520
RZ
1957 r = amdgpu_device_ip_hw_init_phase2(adev);
1958 if (r)
72d3f592 1959 goto init_failed;
d38ceaf9 1960
121a2bc6
AG
1961 /*
1962 * retired pages will be loaded from eeprom and reserved here,
1963 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1964 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1965 * for I2C communication which only true at this point.
1966 * recovery_init may fail, but it can free all resources allocated by
1967 * itself and its failure should not stop amdgpu init process.
1968 *
1969 * Note: theoretically, this should be called before all vram allocations
1970 * to protect retired page from abusing
1971 */
1972 amdgpu_ras_recovery_init(adev);
1973
3e2e2ab5
HZ
1974 if (adev->gmc.xgmi.num_physical_nodes > 1)
1975 amdgpu_xgmi_add_device(adev);
1884734a 1976 amdgpu_amdkfd_device_init(adev);
c6332b97 1977
72d3f592 1978init_failed:
c9ffa427 1979 if (amdgpu_sriov_vf(adev))
c6332b97 1980 amdgpu_virt_release_full_gpu(adev, true);
1981
72d3f592 1982 return r;
d38ceaf9
AD
1983}
1984
e3ecdffa
AD
1985/**
1986 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1987 *
1988 * @adev: amdgpu_device pointer
1989 *
1990 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1991 * this function before a GPU reset. If the value is retained after a
1992 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1993 */
06ec9070 1994static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1995{
1996 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1997}
1998
e3ecdffa
AD
1999/**
2000 * amdgpu_device_check_vram_lost - check if vram is valid
2001 *
2002 * @adev: amdgpu_device pointer
2003 *
2004 * Checks the reset magic value written to the gart pointer in VRAM.
2005 * The driver calls this after a GPU reset to see if the contents of
2006 * VRAM is lost or now.
2007 * returns true if vram is lost, false if not.
2008 */
06ec9070 2009static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2010{
028cfb24
EQ
2011 if (memcmp(adev->gart.ptr, adev->reset_magic,
2012 AMDGPU_RESET_MAGIC_NUM))
2013 return true;
2014
2015 if (!adev->in_gpu_reset)
2016 return false;
2017
2018 /*
2019 * For all ASICs with baco/mode1 reset, the VRAM is
2020 * always assumed to be lost.
2021 */
2022 switch (amdgpu_asic_reset_method(adev)) {
2023 case AMD_RESET_METHOD_BACO:
2024 case AMD_RESET_METHOD_MODE1:
2025 return true;
2026 default:
2027 return false;
2028 }
0c49e0b8
CZ
2029}
2030
e3ecdffa 2031/**
1112a46b 2032 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2033 *
2034 * @adev: amdgpu_device pointer
b8b72130 2035 * @state: clockgating state (gate or ungate)
e3ecdffa 2036 *
e3ecdffa 2037 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2038 * set_clockgating_state callbacks are run.
2039 * Late initialization pass enabling clockgating for hardware IPs.
2040 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2041 * Returns 0 on success, negative error code on failure.
2042 */
fdd34271 2043
1112a46b
RZ
2044static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2045 enum amd_clockgating_state state)
d38ceaf9 2046{
1112a46b 2047 int i, j, r;
d38ceaf9 2048
4a2ba394
SL
2049 if (amdgpu_emu_mode == 1)
2050 return 0;
2051
1112a46b
RZ
2052 for (j = 0; j < adev->num_ip_blocks; j++) {
2053 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2054 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2055 continue;
4a446d55 2056 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2057 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2058 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2059 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2060 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2061 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2062 /* enable clockgating to save power */
a1255107 2063 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2064 state);
4a446d55
AD
2065 if (r) {
2066 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2067 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2068 return r;
2069 }
b0b00ff1 2070 }
d38ceaf9 2071 }
06b18f61 2072
c9f96fd5
RZ
2073 return 0;
2074}
2075
1112a46b 2076static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2077{
1112a46b 2078 int i, j, r;
06b18f61 2079
c9f96fd5
RZ
2080 if (amdgpu_emu_mode == 1)
2081 return 0;
2082
1112a46b
RZ
2083 for (j = 0; j < adev->num_ip_blocks; j++) {
2084 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2085 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2086 continue;
2087 /* skip CG for VCE/UVD, it's handled specially */
2088 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2089 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2090 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2091 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2092 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2093 /* enable powergating to save power */
2094 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2095 state);
c9f96fd5
RZ
2096 if (r) {
2097 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2098 adev->ip_blocks[i].version->funcs->name, r);
2099 return r;
2100 }
2101 }
2102 }
2dc80b00
S
2103 return 0;
2104}
2105
beff74bc
AD
2106static int amdgpu_device_enable_mgpu_fan_boost(void)
2107{
2108 struct amdgpu_gpu_instance *gpu_ins;
2109 struct amdgpu_device *adev;
2110 int i, ret = 0;
2111
2112 mutex_lock(&mgpu_info.mutex);
2113
2114 /*
2115 * MGPU fan boost feature should be enabled
2116 * only when there are two or more dGPUs in
2117 * the system
2118 */
2119 if (mgpu_info.num_dgpu < 2)
2120 goto out;
2121
2122 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2123 gpu_ins = &(mgpu_info.gpu_ins[i]);
2124 adev = gpu_ins->adev;
2125 if (!(adev->flags & AMD_IS_APU) &&
2126 !gpu_ins->mgpu_fan_enabled &&
2127 adev->powerplay.pp_funcs &&
2128 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2129 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2130 if (ret)
2131 break;
2132
2133 gpu_ins->mgpu_fan_enabled = 1;
2134 }
2135 }
2136
2137out:
2138 mutex_unlock(&mgpu_info.mutex);
2139
2140 return ret;
2141}
2142
e3ecdffa
AD
2143/**
2144 * amdgpu_device_ip_late_init - run late init for hardware IPs
2145 *
2146 * @adev: amdgpu_device pointer
2147 *
2148 * Late initialization pass for hardware IPs. The list of all the hardware
2149 * IPs that make up the asic is walked and the late_init callbacks are run.
2150 * late_init covers any special initialization that an IP requires
2151 * after all of the have been initialized or something that needs to happen
2152 * late in the init process.
2153 * Returns 0 on success, negative error code on failure.
2154 */
06ec9070 2155static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2156{
60599a03 2157 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2158 int i = 0, r;
2159
2160 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2161 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2162 continue;
2163 if (adev->ip_blocks[i].version->funcs->late_init) {
2164 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2165 if (r) {
2166 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2167 adev->ip_blocks[i].version->funcs->name, r);
2168 return r;
2169 }
2dc80b00 2170 }
73f847db 2171 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2172 }
2173
1112a46b
RZ
2174 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2175 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2176
06ec9070 2177 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2178
beff74bc
AD
2179 r = amdgpu_device_enable_mgpu_fan_boost();
2180 if (r)
2181 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2182
60599a03
EQ
2183
2184 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2185 mutex_lock(&mgpu_info.mutex);
2186
2187 /*
2188 * Reset device p-state to low as this was booted with high.
2189 *
2190 * This should be performed only after all devices from the same
2191 * hive get initialized.
2192 *
2193 * However, it's unknown how many device in the hive in advance.
2194 * As this is counted one by one during devices initializations.
2195 *
2196 * So, we wait for all XGMI interlinked devices initialized.
2197 * This may bring some delays as those devices may come from
2198 * different hives. But that should be OK.
2199 */
2200 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2201 for (i = 0; i < mgpu_info.num_gpu; i++) {
2202 gpu_instance = &(mgpu_info.gpu_ins[i]);
2203 if (gpu_instance->adev->flags & AMD_IS_APU)
2204 continue;
2205
2206 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2207 if (r) {
2208 DRM_ERROR("pstate setting failed (%d).\n", r);
2209 break;
2210 }
2211 }
2212 }
2213
2214 mutex_unlock(&mgpu_info.mutex);
2215 }
2216
d38ceaf9
AD
2217 return 0;
2218}
2219
e3ecdffa
AD
2220/**
2221 * amdgpu_device_ip_fini - run fini for hardware IPs
2222 *
2223 * @adev: amdgpu_device pointer
2224 *
2225 * Main teardown pass for hardware IPs. The list of all the hardware
2226 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2227 * are run. hw_fini tears down the hardware associated with each IP
2228 * and sw_fini tears down any software state associated with each IP.
2229 * Returns 0 on success, negative error code on failure.
2230 */
06ec9070 2231static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2232{
2233 int i, r;
2234
c030f2e4 2235 amdgpu_ras_pre_fini(adev);
2236
a82400b5
AG
2237 if (adev->gmc.xgmi.num_physical_nodes > 1)
2238 amdgpu_xgmi_remove_device(adev);
2239
1884734a 2240 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2241
2242 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2243 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2244
3e96dbfd
AD
2245 /* need to disable SMC first */
2246 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2247 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2248 continue;
fdd34271 2249 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2250 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2251 /* XXX handle errors */
2252 if (r) {
2253 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2254 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2255 }
a1255107 2256 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2257 break;
2258 }
2259 }
2260
d38ceaf9 2261 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2262 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2263 continue;
8201a67a 2264
a1255107 2265 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2266 /* XXX handle errors */
2c1a2784 2267 if (r) {
a1255107
AD
2268 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2269 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2270 }
8201a67a 2271
a1255107 2272 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2273 }
2274
9950cda2 2275
d38ceaf9 2276 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2277 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2278 continue;
c12aba3a
ML
2279
2280 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2281 amdgpu_ucode_free_bo(adev);
1e256e27 2282 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2283 amdgpu_device_wb_fini(adev);
2284 amdgpu_device_vram_scratch_fini(adev);
533aed27 2285 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2286 }
2287
a1255107 2288 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2289 /* XXX handle errors */
2c1a2784 2290 if (r) {
a1255107
AD
2291 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2292 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2293 }
a1255107
AD
2294 adev->ip_blocks[i].status.sw = false;
2295 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2296 }
2297
a6dcfd9c 2298 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2299 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2300 continue;
a1255107
AD
2301 if (adev->ip_blocks[i].version->funcs->late_fini)
2302 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2303 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2304 }
2305
c030f2e4 2306 amdgpu_ras_fini(adev);
2307
030308fc 2308 if (amdgpu_sriov_vf(adev))
24136135
ML
2309 if (amdgpu_virt_release_full_gpu(adev, false))
2310 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2311
d38ceaf9
AD
2312 return 0;
2313}
2314
e3ecdffa 2315/**
beff74bc 2316 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2317 *
1112a46b 2318 * @work: work_struct.
e3ecdffa 2319 */
beff74bc 2320static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2321{
2322 struct amdgpu_device *adev =
beff74bc 2323 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2324 int r;
2325
2326 r = amdgpu_ib_ring_tests(adev);
2327 if (r)
2328 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2329}
2330
1e317b99
RZ
2331static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2332{
2333 struct amdgpu_device *adev =
2334 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2335
2336 mutex_lock(&adev->gfx.gfx_off_mutex);
2337 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2338 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2339 adev->gfx.gfx_off_state = true;
2340 }
2341 mutex_unlock(&adev->gfx.gfx_off_mutex);
2342}
2343
e3ecdffa 2344/**
e7854a03 2345 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2346 *
2347 * @adev: amdgpu_device pointer
2348 *
2349 * Main suspend function for hardware IPs. The list of all the hardware
2350 * IPs that make up the asic is walked, clockgating is disabled and the
2351 * suspend callbacks are run. suspend puts the hardware and software state
2352 * in each IP into a state suitable for suspend.
2353 * Returns 0 on success, negative error code on failure.
2354 */
e7854a03
AD
2355static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2356{
2357 int i, r;
2358
b2a7e973
PL
2359 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2360 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2361
e7854a03
AD
2362 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2363 if (!adev->ip_blocks[i].status.valid)
2364 continue;
2365 /* displays are handled separately */
2366 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2367 /* XXX handle errors */
2368 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2369 /* XXX handle errors */
2370 if (r) {
2371 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2372 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2373 return r;
e7854a03 2374 }
482f0e53 2375 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2376 }
2377 }
2378
e7854a03
AD
2379 return 0;
2380}
2381
2382/**
2383 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2384 *
2385 * @adev: amdgpu_device pointer
2386 *
2387 * Main suspend function for hardware IPs. The list of all the hardware
2388 * IPs that make up the asic is walked, clockgating is disabled and the
2389 * suspend callbacks are run. suspend puts the hardware and software state
2390 * in each IP into a state suitable for suspend.
2391 * Returns 0 on success, negative error code on failure.
2392 */
2393static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2394{
2395 int i, r;
2396
2397 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2398 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2399 continue;
e7854a03
AD
2400 /* displays are handled in phase1 */
2401 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2402 continue;
bff77e86
LM
2403 /* PSP lost connection when err_event_athub occurs */
2404 if (amdgpu_ras_intr_triggered() &&
2405 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2406 adev->ip_blocks[i].status.hw = false;
2407 continue;
2408 }
d38ceaf9 2409 /* XXX handle errors */
a1255107 2410 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2411 /* XXX handle errors */
2c1a2784 2412 if (r) {
a1255107
AD
2413 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2414 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2415 }
876923fb 2416 adev->ip_blocks[i].status.hw = false;
a3a09142 2417 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2418 if(!amdgpu_sriov_vf(adev)){
2419 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2420 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2421 if (r) {
2422 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2423 adev->mp1_state, r);
2424 return r;
2425 }
a3a09142
AD
2426 }
2427 }
b5507c7e 2428 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2429 }
2430
2431 return 0;
2432}
2433
e7854a03
AD
2434/**
2435 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2436 *
2437 * @adev: amdgpu_device pointer
2438 *
2439 * Main suspend function for hardware IPs. The list of all the hardware
2440 * IPs that make up the asic is walked, clockgating is disabled and the
2441 * suspend callbacks are run. suspend puts the hardware and software state
2442 * in each IP into a state suitable for suspend.
2443 * Returns 0 on success, negative error code on failure.
2444 */
2445int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2446{
2447 int r;
2448
e7819644
YT
2449 if (amdgpu_sriov_vf(adev))
2450 amdgpu_virt_request_full_gpu(adev, false);
2451
e7854a03
AD
2452 r = amdgpu_device_ip_suspend_phase1(adev);
2453 if (r)
2454 return r;
2455 r = amdgpu_device_ip_suspend_phase2(adev);
2456
e7819644
YT
2457 if (amdgpu_sriov_vf(adev))
2458 amdgpu_virt_release_full_gpu(adev, false);
2459
e7854a03
AD
2460 return r;
2461}
2462
06ec9070 2463static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2464{
2465 int i, r;
2466
2cb681b6
ML
2467 static enum amd_ip_block_type ip_order[] = {
2468 AMD_IP_BLOCK_TYPE_GMC,
2469 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2470 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2471 AMD_IP_BLOCK_TYPE_IH,
2472 };
a90ad3c2 2473
2cb681b6
ML
2474 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2475 int j;
2476 struct amdgpu_ip_block *block;
a90ad3c2 2477
2cb681b6
ML
2478 for (j = 0; j < adev->num_ip_blocks; j++) {
2479 block = &adev->ip_blocks[j];
2480
482f0e53 2481 block->status.hw = false;
2cb681b6
ML
2482 if (block->version->type != ip_order[i] ||
2483 !block->status.valid)
2484 continue;
2485
2486 r = block->version->funcs->hw_init(adev);
0aaeefcc 2487 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2488 if (r)
2489 return r;
482f0e53 2490 block->status.hw = true;
a90ad3c2
ML
2491 }
2492 }
2493
2494 return 0;
2495}
2496
06ec9070 2497static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2498{
2499 int i, r;
2500
2cb681b6
ML
2501 static enum amd_ip_block_type ip_order[] = {
2502 AMD_IP_BLOCK_TYPE_SMC,
2503 AMD_IP_BLOCK_TYPE_DCE,
2504 AMD_IP_BLOCK_TYPE_GFX,
2505 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2506 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2507 AMD_IP_BLOCK_TYPE_VCE,
2508 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2509 };
a90ad3c2 2510
2cb681b6
ML
2511 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2512 int j;
2513 struct amdgpu_ip_block *block;
a90ad3c2 2514
2cb681b6
ML
2515 for (j = 0; j < adev->num_ip_blocks; j++) {
2516 block = &adev->ip_blocks[j];
2517
2518 if (block->version->type != ip_order[i] ||
482f0e53
ML
2519 !block->status.valid ||
2520 block->status.hw)
2cb681b6
ML
2521 continue;
2522
895bd048
JZ
2523 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2524 r = block->version->funcs->resume(adev);
2525 else
2526 r = block->version->funcs->hw_init(adev);
2527
0aaeefcc 2528 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2529 if (r)
2530 return r;
482f0e53 2531 block->status.hw = true;
a90ad3c2
ML
2532 }
2533 }
2534
2535 return 0;
2536}
2537
e3ecdffa
AD
2538/**
2539 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2540 *
2541 * @adev: amdgpu_device pointer
2542 *
2543 * First resume function for hardware IPs. The list of all the hardware
2544 * IPs that make up the asic is walked and the resume callbacks are run for
2545 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2546 * after a suspend and updates the software state as necessary. This
2547 * function is also used for restoring the GPU after a GPU reset.
2548 * Returns 0 on success, negative error code on failure.
2549 */
06ec9070 2550static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2551{
2552 int i, r;
2553
a90ad3c2 2554 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2555 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2556 continue;
a90ad3c2 2557 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2558 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2559 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2560
fcf0649f
CZ
2561 r = adev->ip_blocks[i].version->funcs->resume(adev);
2562 if (r) {
2563 DRM_ERROR("resume of IP block <%s> failed %d\n",
2564 adev->ip_blocks[i].version->funcs->name, r);
2565 return r;
2566 }
482f0e53 2567 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2568 }
2569 }
2570
2571 return 0;
2572}
2573
e3ecdffa
AD
2574/**
2575 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2576 *
2577 * @adev: amdgpu_device pointer
2578 *
2579 * First resume function for hardware IPs. The list of all the hardware
2580 * IPs that make up the asic is walked and the resume callbacks are run for
2581 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2582 * functional state after a suspend and updates the software state as
2583 * necessary. This function is also used for restoring the GPU after a GPU
2584 * reset.
2585 * Returns 0 on success, negative error code on failure.
2586 */
06ec9070 2587static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2588{
2589 int i, r;
2590
2591 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2592 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2593 continue;
fcf0649f 2594 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2595 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2596 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2597 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2598 continue;
a1255107 2599 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2600 if (r) {
a1255107
AD
2601 DRM_ERROR("resume of IP block <%s> failed %d\n",
2602 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2603 return r;
2c1a2784 2604 }
482f0e53 2605 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2606 }
2607
2608 return 0;
2609}
2610
e3ecdffa
AD
2611/**
2612 * amdgpu_device_ip_resume - run resume for hardware IPs
2613 *
2614 * @adev: amdgpu_device pointer
2615 *
2616 * Main resume function for hardware IPs. The hardware IPs
2617 * are split into two resume functions because they are
2618 * are also used in in recovering from a GPU reset and some additional
2619 * steps need to be take between them. In this case (S3/S4) they are
2620 * run sequentially.
2621 * Returns 0 on success, negative error code on failure.
2622 */
06ec9070 2623static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2624{
2625 int r;
2626
06ec9070 2627 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2628 if (r)
2629 return r;
7a3e0bb2
RZ
2630
2631 r = amdgpu_device_fw_loading(adev);
2632 if (r)
2633 return r;
2634
06ec9070 2635 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2636
2637 return r;
2638}
2639
e3ecdffa
AD
2640/**
2641 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2642 *
2643 * @adev: amdgpu_device pointer
2644 *
2645 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2646 */
4e99a44e 2647static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2648{
6867e1b5
ML
2649 if (amdgpu_sriov_vf(adev)) {
2650 if (adev->is_atom_fw) {
2651 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2652 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2653 } else {
2654 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2655 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2656 }
2657
2658 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2659 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2660 }
048765ad
AR
2661}
2662
e3ecdffa
AD
2663/**
2664 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2665 *
2666 * @asic_type: AMD asic type
2667 *
2668 * Check if there is DC (new modesetting infrastructre) support for an asic.
2669 * returns true if DC has support, false if not.
2670 */
4562236b
HW
2671bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2672{
2673 switch (asic_type) {
2674#if defined(CONFIG_DRM_AMD_DC)
2675 case CHIP_BONAIRE:
0d6fbccb 2676 case CHIP_KAVERI:
367e6687
AD
2677 case CHIP_KABINI:
2678 case CHIP_MULLINS:
d9fda248
HW
2679 /*
2680 * We have systems in the wild with these ASICs that require
2681 * LVDS and VGA support which is not supported with DC.
2682 *
2683 * Fallback to the non-DC driver here by default so as not to
2684 * cause regressions.
2685 */
2686 return amdgpu_dc > 0;
2687 case CHIP_HAWAII:
4562236b
HW
2688 case CHIP_CARRIZO:
2689 case CHIP_STONEY:
4562236b 2690 case CHIP_POLARIS10:
675fd32b 2691 case CHIP_POLARIS11:
2c8ad2d5 2692 case CHIP_POLARIS12:
675fd32b 2693 case CHIP_VEGAM:
4562236b
HW
2694 case CHIP_TONGA:
2695 case CHIP_FIJI:
42f8ffa1 2696 case CHIP_VEGA10:
dca7b401 2697 case CHIP_VEGA12:
c6034aa2 2698 case CHIP_VEGA20:
b86a1aa3 2699#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2700 case CHIP_RAVEN:
b4f199c7 2701 case CHIP_NAVI10:
8fceceb6 2702 case CHIP_NAVI14:
078655d9 2703 case CHIP_NAVI12:
e1c14c43 2704 case CHIP_RENOIR:
42f8ffa1 2705#endif
fd187853 2706 return amdgpu_dc != 0;
4562236b
HW
2707#endif
2708 default:
93b09a9a
SS
2709 if (amdgpu_dc > 0)
2710 DRM_INFO("Display Core has been requested via kernel parameter "
2711 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2712 return false;
2713 }
2714}
2715
2716/**
2717 * amdgpu_device_has_dc_support - check if dc is supported
2718 *
2719 * @adev: amdgpu_device_pointer
2720 *
2721 * Returns true for supported, false for not supported
2722 */
2723bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2724{
2555039d
XY
2725 if (amdgpu_sriov_vf(adev))
2726 return false;
2727
4562236b
HW
2728 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2729}
2730
d4535e2c
AG
2731
2732static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2733{
2734 struct amdgpu_device *adev =
2735 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2736 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2737
c6a6e2db
AG
2738 /* It's a bug to not have a hive within this function */
2739 if (WARN_ON(!hive))
2740 return;
2741
2742 /*
2743 * Use task barrier to synchronize all xgmi reset works across the
2744 * hive. task_barrier_enter and task_barrier_exit will block
2745 * until all the threads running the xgmi reset works reach
2746 * those points. task_barrier_full will do both blocks.
2747 */
2748 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2749
2750 task_barrier_enter(&hive->tb);
2751 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2752
2753 if (adev->asic_reset_res)
2754 goto fail;
2755
2756 task_barrier_exit(&hive->tb);
2757 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2758
2759 if (adev->asic_reset_res)
2760 goto fail;
43c4d576
JC
2761
2762 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2763 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2764 } else {
2765
2766 task_barrier_full(&hive->tb);
2767 adev->asic_reset_res = amdgpu_asic_reset(adev);
2768 }
ce316fa5 2769
c6a6e2db 2770fail:
d4535e2c 2771 if (adev->asic_reset_res)
fed184e9 2772 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2773 adev->asic_reset_res, adev->ddev->unique);
2774}
2775
71f98027
AD
2776static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2777{
2778 char *input = amdgpu_lockup_timeout;
2779 char *timeout_setting = NULL;
2780 int index = 0;
2781 long timeout;
2782 int ret = 0;
2783
2784 /*
2785 * By default timeout for non compute jobs is 10000.
2786 * And there is no timeout enforced on compute jobs.
2787 * In SR-IOV or passthrough mode, timeout for compute
2788 * jobs are 10000 by default.
2789 */
2790 adev->gfx_timeout = msecs_to_jiffies(10000);
2791 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2792 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2793 adev->compute_timeout = adev->gfx_timeout;
2794 else
2795 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2796
f440ff44 2797 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2798 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2799 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2800 ret = kstrtol(timeout_setting, 0, &timeout);
2801 if (ret)
2802 return ret;
2803
2804 if (timeout == 0) {
2805 index++;
2806 continue;
2807 } else if (timeout < 0) {
2808 timeout = MAX_SCHEDULE_TIMEOUT;
2809 } else {
2810 timeout = msecs_to_jiffies(timeout);
2811 }
2812
2813 switch (index++) {
2814 case 0:
2815 adev->gfx_timeout = timeout;
2816 break;
2817 case 1:
2818 adev->compute_timeout = timeout;
2819 break;
2820 case 2:
2821 adev->sdma_timeout = timeout;
2822 break;
2823 case 3:
2824 adev->video_timeout = timeout;
2825 break;
2826 default:
2827 break;
2828 }
2829 }
2830 /*
2831 * There is only one value specified and
2832 * it should apply to all non-compute jobs.
2833 */
bcccee89 2834 if (index == 1) {
71f98027 2835 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2836 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2837 adev->compute_timeout = adev->gfx_timeout;
2838 }
71f98027
AD
2839 }
2840
2841 return ret;
2842}
d4535e2c 2843
d38ceaf9
AD
2844/**
2845 * amdgpu_device_init - initialize the driver
2846 *
2847 * @adev: amdgpu_device pointer
87e3f136 2848 * @ddev: drm dev pointer
d38ceaf9
AD
2849 * @pdev: pci dev pointer
2850 * @flags: driver flags
2851 *
2852 * Initializes the driver info and hw (all asics).
2853 * Returns 0 for success or an error on failure.
2854 * Called at driver startup.
2855 */
2856int amdgpu_device_init(struct amdgpu_device *adev,
2857 struct drm_device *ddev,
2858 struct pci_dev *pdev,
2859 uint32_t flags)
2860{
2861 int r, i;
3840c5bc 2862 bool boco = false;
95844d20 2863 u32 max_MBps;
d38ceaf9
AD
2864
2865 adev->shutdown = false;
2866 adev->dev = &pdev->dev;
2867 adev->ddev = ddev;
2868 adev->pdev = pdev;
2869 adev->flags = flags;
4e66d7d2
YZ
2870
2871 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2872 adev->asic_type = amdgpu_force_asic_type;
2873 else
2874 adev->asic_type = flags & AMD_ASIC_MASK;
2875
d38ceaf9 2876 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2877 if (amdgpu_emu_mode == 1)
8bdab6bb 2878 adev->usec_timeout *= 10;
770d13b1 2879 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2880 adev->accel_working = false;
2881 adev->num_rings = 0;
2882 adev->mman.buffer_funcs = NULL;
2883 adev->mman.buffer_funcs_ring = NULL;
2884 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2885 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2886 adev->gmc.gmc_funcs = NULL;
f54d1867 2887 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2888 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2889
2890 adev->smc_rreg = &amdgpu_invalid_rreg;
2891 adev->smc_wreg = &amdgpu_invalid_wreg;
2892 adev->pcie_rreg = &amdgpu_invalid_rreg;
2893 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2894 adev->pciep_rreg = &amdgpu_invalid_rreg;
2895 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2896 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2897 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2898 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2899 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2900 adev->didt_rreg = &amdgpu_invalid_rreg;
2901 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2902 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2903 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2904 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2905 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2906
3e39ab90
AD
2907 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2908 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2909 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2910
2911 /* mutex initialization are all done here so we
2912 * can recall function without having locking issues */
d38ceaf9 2913 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2914 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2915 mutex_init(&adev->pm.mutex);
2916 mutex_init(&adev->gfx.gpu_clock_mutex);
2917 mutex_init(&adev->srbm_mutex);
b8866c26 2918 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2919 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2920 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2921 mutex_init(&adev->mn_lock);
e23b74aa 2922 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2923 hash_init(adev->mn_hash);
13a752e3 2924 mutex_init(&adev->lock_reset);
32eaeae0 2925 mutex_init(&adev->psp.mutex);
bd052211 2926 mutex_init(&adev->notifier_lock);
d38ceaf9 2927
912dfc84
EQ
2928 r = amdgpu_device_check_arguments(adev);
2929 if (r)
2930 return r;
d38ceaf9 2931
d38ceaf9
AD
2932 spin_lock_init(&adev->mmio_idx_lock);
2933 spin_lock_init(&adev->smc_idx_lock);
2934 spin_lock_init(&adev->pcie_idx_lock);
2935 spin_lock_init(&adev->uvd_ctx_idx_lock);
2936 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2937 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2938 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2939 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2940 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2941
0c4e7fa5
CZ
2942 INIT_LIST_HEAD(&adev->shadow_list);
2943 mutex_init(&adev->shadow_list_lock);
2944
795f2813
AR
2945 INIT_LIST_HEAD(&adev->ring_lru_list);
2946 spin_lock_init(&adev->ring_lru_list_lock);
2947
beff74bc
AD
2948 INIT_DELAYED_WORK(&adev->delayed_init_work,
2949 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2950 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2951 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2952
d4535e2c
AG
2953 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2954
d23ee13f 2955 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2956 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2957
0fa49558
AX
2958 /* Registers mapping */
2959 /* TODO: block userspace mapping of io register */
da69c161
KW
2960 if (adev->asic_type >= CHIP_BONAIRE) {
2961 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2962 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2963 } else {
2964 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2965 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2966 }
d38ceaf9 2967
d38ceaf9
AD
2968 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2969 if (adev->rmmio == NULL) {
2970 return -ENOMEM;
2971 }
2972 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2973 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2974
d38ceaf9
AD
2975 /* io port mapping */
2976 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2977 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2978 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2979 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2980 break;
2981 }
2982 }
2983 if (adev->rio_mem == NULL)
b64a18c5 2984 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2985
b2109d8e
JX
2986 /* enable PCIE atomic ops */
2987 r = pci_enable_atomic_ops_to_root(adev->pdev,
2988 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2989 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2990 if (r) {
2991 adev->have_atomics_support = false;
2992 DRM_INFO("PCIE atomic ops is not supported\n");
2993 } else {
2994 adev->have_atomics_support = true;
2995 }
2996
5494d864
AD
2997 amdgpu_device_get_pcie_info(adev);
2998
b239c017
JX
2999 if (amdgpu_mcbp)
3000 DRM_INFO("MCBP is enabled\n");
3001
5f84cc63
JX
3002 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3003 adev->enable_mes = true;
3004
f54eeab4 3005 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
3006 r = amdgpu_discovery_init(adev);
3007 if (r) {
3008 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
3009 return r;
3010 }
3011 }
3012
d38ceaf9 3013 /* early init functions */
06ec9070 3014 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3015 if (r)
3016 return r;
3017
df99ac0f
JZ
3018 r = amdgpu_device_get_job_timeout_settings(adev);
3019 if (r) {
3020 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3021 return r;
3022 }
3023
6585661d
OZ
3024 /* doorbell bar mapping and doorbell index init*/
3025 amdgpu_device_doorbell_init(adev);
3026
d38ceaf9
AD
3027 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3028 /* this will fail for cards that aren't VGA class devices, just
3029 * ignore it */
06ec9070 3030 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3031
31af062a 3032 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3033 boco = true;
3034 if (amdgpu_has_atpx() &&
3035 (amdgpu_is_atpx_hybrid() ||
3036 amdgpu_has_atpx_dgpu_power_cntl()) &&
3037 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3038 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3039 &amdgpu_switcheroo_ops, boco);
3040 if (boco)
d38ceaf9
AD
3041 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3042
9475a943
SL
3043 if (amdgpu_emu_mode == 1) {
3044 /* post the asic on emulation mode */
3045 emu_soc_asic_init(adev);
bfca0289 3046 goto fence_driver_init;
9475a943 3047 }
bfca0289 3048
4e99a44e
ML
3049 /* detect if we are with an SRIOV vbios */
3050 amdgpu_device_detect_sriov_bios(adev);
048765ad 3051
95e8e59e
AD
3052 /* check if we need to reset the asic
3053 * E.g., driver was not cleanly unloaded previously, etc.
3054 */
f14899fd 3055 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3056 r = amdgpu_asic_reset(adev);
3057 if (r) {
3058 dev_err(adev->dev, "asic reset on init failed\n");
3059 goto failed;
3060 }
3061 }
3062
d38ceaf9 3063 /* Post card if necessary */
39c640c0 3064 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3065 if (!adev->bios) {
bec86378 3066 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3067 r = -EINVAL;
3068 goto failed;
d38ceaf9 3069 }
bec86378 3070 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3071 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3072 if (r) {
3073 dev_err(adev->dev, "gpu post error!\n");
3074 goto failed;
3075 }
d38ceaf9
AD
3076 }
3077
88b64e95
AD
3078 if (adev->is_atom_fw) {
3079 /* Initialize clocks */
3080 r = amdgpu_atomfirmware_get_clock_info(adev);
3081 if (r) {
3082 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3083 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3084 goto failed;
3085 }
3086 } else {
a5bde2f9
AD
3087 /* Initialize clocks */
3088 r = amdgpu_atombios_get_clock_info(adev);
3089 if (r) {
3090 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3091 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3092 goto failed;
a5bde2f9
AD
3093 }
3094 /* init i2c buses */
4562236b
HW
3095 if (!amdgpu_device_has_dc_support(adev))
3096 amdgpu_atombios_i2c_init(adev);
2c1a2784 3097 }
d38ceaf9 3098
bfca0289 3099fence_driver_init:
d38ceaf9
AD
3100 /* Fence driver */
3101 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3102 if (r) {
3103 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3104 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3105 goto failed;
2c1a2784 3106 }
d38ceaf9
AD
3107
3108 /* init the mode config */
3109 drm_mode_config_init(adev->ddev);
3110
06ec9070 3111 r = amdgpu_device_ip_init(adev);
d38ceaf9 3112 if (r) {
8840a387 3113 /* failed in exclusive mode due to timeout */
3114 if (amdgpu_sriov_vf(adev) &&
3115 !amdgpu_sriov_runtime(adev) &&
3116 amdgpu_virt_mmio_blocked(adev) &&
3117 !amdgpu_virt_wait_reset(adev)) {
3118 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3119 /* Don't send request since VF is inactive. */
3120 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3121 adev->virt.ops = NULL;
8840a387 3122 r = -EAGAIN;
3123 goto failed;
3124 }
06ec9070 3125 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3126 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3127 goto failed;
d38ceaf9
AD
3128 }
3129
d7f72fe4
YZ
3130 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3131 adev->gfx.config.max_shader_engines,
3132 adev->gfx.config.max_sh_per_se,
3133 adev->gfx.config.max_cu_per_sh,
3134 adev->gfx.cu_info.number);
3135
f880799d
ND
3136 amdgpu_ctx_init_sched(adev);
3137
d38ceaf9
AD
3138 adev->accel_working = true;
3139
e59c0205
AX
3140 amdgpu_vm_check_compute_bug(adev);
3141
95844d20
MO
3142 /* Initialize the buffer migration limit. */
3143 if (amdgpu_moverate >= 0)
3144 max_MBps = amdgpu_moverate;
3145 else
3146 max_MBps = 8; /* Allow 8 MB/s. */
3147 /* Get a log2 for easy divisions. */
3148 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3149
9bc92b9c
ML
3150 amdgpu_fbdev_init(adev);
3151
d2f52ac8 3152 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3153 if (r) {
3154 adev->pm_sysfs_en = false;
d2f52ac8 3155 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3156 } else
3157 adev->pm_sysfs_en = true;
d2f52ac8 3158
5bb23532 3159 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3160 if (r) {
3161 adev->ucode_sysfs_en = false;
5bb23532 3162 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3163 } else
3164 adev->ucode_sysfs_en = true;
5bb23532 3165
d38ceaf9
AD
3166 if ((amdgpu_testing & 1)) {
3167 if (adev->accel_working)
3168 amdgpu_test_moves(adev);
3169 else
3170 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3171 }
d38ceaf9
AD
3172 if (amdgpu_benchmarking) {
3173 if (adev->accel_working)
3174 amdgpu_benchmark(adev, amdgpu_benchmarking);
3175 else
3176 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3177 }
3178
b0adca4d
EQ
3179 /*
3180 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3181 * Otherwise the mgpu fan boost feature will be skipped due to the
3182 * gpu instance is counted less.
3183 */
3184 amdgpu_register_gpu_instance(adev);
3185
d38ceaf9
AD
3186 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3187 * explicit gating rather than handling it automatically.
3188 */
06ec9070 3189 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3190 if (r) {
06ec9070 3191 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3192 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3193 goto failed;
2c1a2784 3194 }
d38ceaf9 3195
108c6a63 3196 /* must succeed. */
511fdbc3 3197 amdgpu_ras_resume(adev);
108c6a63 3198
beff74bc
AD
3199 queue_delayed_work(system_wq, &adev->delayed_init_work,
3200 msecs_to_jiffies(AMDGPU_RESUME_MS));
3201
dcea6e65
KR
3202 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3203 if (r) {
3204 dev_err(adev->dev, "Could not create pcie_replay_count");
3205 return r;
3206 }
108c6a63 3207
d155bef0
AB
3208 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3209 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3210 if (r)
3211 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3212
d38ceaf9 3213 return 0;
83ba126a
AD
3214
3215failed:
89041940 3216 amdgpu_vf_error_trans_all(adev);
3840c5bc 3217 if (boco)
83ba126a 3218 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3219
83ba126a 3220 return r;
d38ceaf9
AD
3221}
3222
d38ceaf9
AD
3223/**
3224 * amdgpu_device_fini - tear down the driver
3225 *
3226 * @adev: amdgpu_device pointer
3227 *
3228 * Tear down the driver info (all asics).
3229 * Called at driver shutdown.
3230 */
3231void amdgpu_device_fini(struct amdgpu_device *adev)
3232{
3233 int r;
3234
3235 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3236 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3237 adev->shutdown = true;
9f875167 3238
752c683d
ML
3239 /* make sure IB test finished before entering exclusive mode
3240 * to avoid preemption on IB test
3241 * */
3242 if (amdgpu_sriov_vf(adev))
3243 amdgpu_virt_request_full_gpu(adev, false);
3244
e5b03032
ML
3245 /* disable all interrupts */
3246 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3247 if (adev->mode_info.mode_config_initialized){
3248 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3249 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3250 else
3251 drm_atomic_helper_shutdown(adev->ddev);
3252 }
d38ceaf9 3253 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3254 if (adev->pm_sysfs_en)
3255 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3256 amdgpu_fbdev_fini(adev);
06ec9070 3257 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3258 if (adev->firmware.gpu_info_fw) {
3259 release_firmware(adev->firmware.gpu_info_fw);
3260 adev->firmware.gpu_info_fw = NULL;
3261 }
d38ceaf9
AD
3262 adev->accel_working = false;
3263 /* free i2c buses */
4562236b
HW
3264 if (!amdgpu_device_has_dc_support(adev))
3265 amdgpu_i2c_fini(adev);
bfca0289
SL
3266
3267 if (amdgpu_emu_mode != 1)
3268 amdgpu_atombios_fini(adev);
3269
d38ceaf9
AD
3270 kfree(adev->bios);
3271 adev->bios = NULL;
3840c5bc
AD
3272 if (amdgpu_has_atpx() &&
3273 (amdgpu_is_atpx_hybrid() ||
3274 amdgpu_has_atpx_dgpu_power_cntl()) &&
3275 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3276 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3277 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3278 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3279 vga_client_register(adev->pdev, NULL, NULL, NULL);
3280 if (adev->rio_mem)
3281 pci_iounmap(adev->pdev, adev->rio_mem);
3282 adev->rio_mem = NULL;
3283 iounmap(adev->rmmio);
3284 adev->rmmio = NULL;
06ec9070 3285 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3286
dcea6e65 3287 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3288 if (adev->ucode_sysfs_en)
3289 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3290 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3291 amdgpu_pmu_fini(adev);
f54eeab4 3292 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3293 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3294}
3295
3296
3297/*
3298 * Suspend & resume.
3299 */
3300/**
810ddc3a 3301 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3302 *
87e3f136
DP
3303 * @dev: drm dev pointer
3304 * @suspend: suspend state
3305 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3306 *
3307 * Puts the hw in the suspend state (all asics).
3308 * Returns 0 for success or an error on failure.
3309 * Called at driver suspend.
3310 */
de185019 3311int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3312{
3313 struct amdgpu_device *adev;
3314 struct drm_crtc *crtc;
3315 struct drm_connector *connector;
f8d2d39e 3316 struct drm_connector_list_iter iter;
5ceb54c6 3317 int r;
d38ceaf9
AD
3318
3319 if (dev == NULL || dev->dev_private == NULL) {
3320 return -ENODEV;
3321 }
3322
3323 adev = dev->dev_private;
3324
3325 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3326 return 0;
3327
44779b43 3328 adev->in_suspend = true;
d38ceaf9
AD
3329 drm_kms_helper_poll_disable(dev);
3330
5f818173
S
3331 if (fbcon)
3332 amdgpu_fbdev_set_suspend(adev, 1);
3333
beff74bc 3334 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3335
4562236b
HW
3336 if (!amdgpu_device_has_dc_support(adev)) {
3337 /* turn off display hw */
3338 drm_modeset_lock_all(dev);
f8d2d39e
LP
3339 drm_connector_list_iter_begin(dev, &iter);
3340 drm_for_each_connector_iter(connector, &iter)
3341 drm_helper_connector_dpms(connector,
3342 DRM_MODE_DPMS_OFF);
3343 drm_connector_list_iter_end(&iter);
4562236b 3344 drm_modeset_unlock_all(dev);
fe1053b7
AD
3345 /* unpin the front buffers and cursors */
3346 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3347 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3348 struct drm_framebuffer *fb = crtc->primary->fb;
3349 struct amdgpu_bo *robj;
3350
91334223 3351 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3352 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3353 r = amdgpu_bo_reserve(aobj, true);
3354 if (r == 0) {
3355 amdgpu_bo_unpin(aobj);
3356 amdgpu_bo_unreserve(aobj);
3357 }
756e6880 3358 }
756e6880 3359
fe1053b7
AD
3360 if (fb == NULL || fb->obj[0] == NULL) {
3361 continue;
3362 }
3363 robj = gem_to_amdgpu_bo(fb->obj[0]);
3364 /* don't unpin kernel fb objects */
3365 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3366 r = amdgpu_bo_reserve(robj, true);
3367 if (r == 0) {
3368 amdgpu_bo_unpin(robj);
3369 amdgpu_bo_unreserve(robj);
3370 }
d38ceaf9
AD
3371 }
3372 }
3373 }
fe1053b7 3374
5e6932fe 3375 amdgpu_ras_suspend(adev);
3376
fe1053b7
AD
3377 r = amdgpu_device_ip_suspend_phase1(adev);
3378
c457a273
EQ
3379 amdgpu_amdkfd_suspend(adev, !fbcon);
3380
d38ceaf9
AD
3381 /* evict vram memory */
3382 amdgpu_bo_evict_vram(adev);
3383
5ceb54c6 3384 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3385
fe1053b7 3386 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3387
a0a71e49
AD
3388 /* evict remaining vram memory
3389 * This second call to evict vram is to evict the gart page table
3390 * using the CPU.
3391 */
d38ceaf9
AD
3392 amdgpu_bo_evict_vram(adev);
3393
d38ceaf9
AD
3394 return 0;
3395}
3396
3397/**
810ddc3a 3398 * amdgpu_device_resume - initiate device resume
d38ceaf9 3399 *
87e3f136
DP
3400 * @dev: drm dev pointer
3401 * @resume: resume state
3402 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3403 *
3404 * Bring the hw back to operating state (all asics).
3405 * Returns 0 for success or an error on failure.
3406 * Called at driver resume.
3407 */
de185019 3408int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3409{
3410 struct drm_connector *connector;
f8d2d39e 3411 struct drm_connector_list_iter iter;
d38ceaf9 3412 struct amdgpu_device *adev = dev->dev_private;
756e6880 3413 struct drm_crtc *crtc;
03161a6e 3414 int r = 0;
d38ceaf9
AD
3415
3416 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3417 return 0;
3418
d38ceaf9 3419 /* post card */
39c640c0 3420 if (amdgpu_device_need_post(adev)) {
74b0b157 3421 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3422 if (r)
3423 DRM_ERROR("amdgpu asic init failed\n");
3424 }
d38ceaf9 3425
06ec9070 3426 r = amdgpu_device_ip_resume(adev);
e6707218 3427 if (r) {
06ec9070 3428 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3429 return r;
e6707218 3430 }
5ceb54c6
AD
3431 amdgpu_fence_driver_resume(adev);
3432
d38ceaf9 3433
06ec9070 3434 r = amdgpu_device_ip_late_init(adev);
03161a6e 3435 if (r)
4d3b9ae5 3436 return r;
d38ceaf9 3437
beff74bc
AD
3438 queue_delayed_work(system_wq, &adev->delayed_init_work,
3439 msecs_to_jiffies(AMDGPU_RESUME_MS));
3440
fe1053b7
AD
3441 if (!amdgpu_device_has_dc_support(adev)) {
3442 /* pin cursors */
3443 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3444 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3445
91334223 3446 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3447 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3448 r = amdgpu_bo_reserve(aobj, true);
3449 if (r == 0) {
3450 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3451 if (r != 0)
3452 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3453 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3454 amdgpu_bo_unreserve(aobj);
3455 }
756e6880
AD
3456 }
3457 }
3458 }
9593f4d6 3459 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3460 if (r)
3461 return r;
756e6880 3462
96a5d8d4 3463 /* Make sure IB tests flushed */
beff74bc 3464 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3465
d38ceaf9
AD
3466 /* blat the mode back in */
3467 if (fbcon) {
4562236b
HW
3468 if (!amdgpu_device_has_dc_support(adev)) {
3469 /* pre DCE11 */
3470 drm_helper_resume_force_mode(dev);
3471
3472 /* turn on display hw */
3473 drm_modeset_lock_all(dev);
f8d2d39e
LP
3474
3475 drm_connector_list_iter_begin(dev, &iter);
3476 drm_for_each_connector_iter(connector, &iter)
3477 drm_helper_connector_dpms(connector,
3478 DRM_MODE_DPMS_ON);
3479 drm_connector_list_iter_end(&iter);
3480
4562236b 3481 drm_modeset_unlock_all(dev);
d38ceaf9 3482 }
4d3b9ae5 3483 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3484 }
3485
3486 drm_kms_helper_poll_enable(dev);
23a1a9e5 3487
5e6932fe 3488 amdgpu_ras_resume(adev);
3489
23a1a9e5
L
3490 /*
3491 * Most of the connector probing functions try to acquire runtime pm
3492 * refs to ensure that the GPU is powered on when connector polling is
3493 * performed. Since we're calling this from a runtime PM callback,
3494 * trying to acquire rpm refs will cause us to deadlock.
3495 *
3496 * Since we're guaranteed to be holding the rpm lock, it's safe to
3497 * temporarily disable the rpm helpers so this doesn't deadlock us.
3498 */
3499#ifdef CONFIG_PM
3500 dev->dev->power.disable_depth++;
3501#endif
4562236b
HW
3502 if (!amdgpu_device_has_dc_support(adev))
3503 drm_helper_hpd_irq_event(dev);
3504 else
3505 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3506#ifdef CONFIG_PM
3507 dev->dev->power.disable_depth--;
3508#endif
44779b43
RZ
3509 adev->in_suspend = false;
3510
4d3b9ae5 3511 return 0;
d38ceaf9
AD
3512}
3513
e3ecdffa
AD
3514/**
3515 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3516 *
3517 * @adev: amdgpu_device pointer
3518 *
3519 * The list of all the hardware IPs that make up the asic is walked and
3520 * the check_soft_reset callbacks are run. check_soft_reset determines
3521 * if the asic is still hung or not.
3522 * Returns true if any of the IPs are still in a hung state, false if not.
3523 */
06ec9070 3524static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3525{
3526 int i;
3527 bool asic_hang = false;
3528
f993d628
ML
3529 if (amdgpu_sriov_vf(adev))
3530 return true;
3531
8bc04c29
AD
3532 if (amdgpu_asic_need_full_reset(adev))
3533 return true;
3534
63fbf42f 3535 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3536 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3537 continue;
a1255107
AD
3538 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3539 adev->ip_blocks[i].status.hang =
3540 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3541 if (adev->ip_blocks[i].status.hang) {
3542 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3543 asic_hang = true;
3544 }
3545 }
3546 return asic_hang;
3547}
3548
e3ecdffa
AD
3549/**
3550 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3551 *
3552 * @adev: amdgpu_device pointer
3553 *
3554 * The list of all the hardware IPs that make up the asic is walked and the
3555 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3556 * handles any IP specific hardware or software state changes that are
3557 * necessary for a soft reset to succeed.
3558 * Returns 0 on success, negative error code on failure.
3559 */
06ec9070 3560static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3561{
3562 int i, r = 0;
3563
3564 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3565 if (!adev->ip_blocks[i].status.valid)
d31a501e 3566 continue;
a1255107
AD
3567 if (adev->ip_blocks[i].status.hang &&
3568 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3569 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3570 if (r)
3571 return r;
3572 }
3573 }
3574
3575 return 0;
3576}
3577
e3ecdffa
AD
3578/**
3579 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3580 *
3581 * @adev: amdgpu_device pointer
3582 *
3583 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3584 * reset is necessary to recover.
3585 * Returns true if a full asic reset is required, false if not.
3586 */
06ec9070 3587static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3588{
da146d3b
AD
3589 int i;
3590
8bc04c29
AD
3591 if (amdgpu_asic_need_full_reset(adev))
3592 return true;
3593
da146d3b 3594 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3595 if (!adev->ip_blocks[i].status.valid)
da146d3b 3596 continue;
a1255107
AD
3597 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3598 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3599 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3600 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3601 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3602 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3603 DRM_INFO("Some block need full reset!\n");
3604 return true;
3605 }
3606 }
35d782fe
CZ
3607 }
3608 return false;
3609}
3610
e3ecdffa
AD
3611/**
3612 * amdgpu_device_ip_soft_reset - do a soft reset
3613 *
3614 * @adev: amdgpu_device pointer
3615 *
3616 * The list of all the hardware IPs that make up the asic is walked and the
3617 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3618 * IP specific hardware or software state changes that are necessary to soft
3619 * reset the IP.
3620 * Returns 0 on success, negative error code on failure.
3621 */
06ec9070 3622static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3623{
3624 int i, r = 0;
3625
3626 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3627 if (!adev->ip_blocks[i].status.valid)
35d782fe 3628 continue;
a1255107
AD
3629 if (adev->ip_blocks[i].status.hang &&
3630 adev->ip_blocks[i].version->funcs->soft_reset) {
3631 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3632 if (r)
3633 return r;
3634 }
3635 }
3636
3637 return 0;
3638}
3639
e3ecdffa
AD
3640/**
3641 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3642 *
3643 * @adev: amdgpu_device pointer
3644 *
3645 * The list of all the hardware IPs that make up the asic is walked and the
3646 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3647 * handles any IP specific hardware or software state changes that are
3648 * necessary after the IP has been soft reset.
3649 * Returns 0 on success, negative error code on failure.
3650 */
06ec9070 3651static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3652{
3653 int i, r = 0;
3654
3655 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3656 if (!adev->ip_blocks[i].status.valid)
35d782fe 3657 continue;
a1255107
AD
3658 if (adev->ip_blocks[i].status.hang &&
3659 adev->ip_blocks[i].version->funcs->post_soft_reset)
3660 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3661 if (r)
3662 return r;
3663 }
3664
3665 return 0;
3666}
3667
e3ecdffa 3668/**
c33adbc7 3669 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3670 *
3671 * @adev: amdgpu_device pointer
3672 *
3673 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3674 * restore things like GPUVM page tables after a GPU reset where
3675 * the contents of VRAM might be lost.
403009bf
CK
3676 *
3677 * Returns:
3678 * 0 on success, negative error code on failure.
e3ecdffa 3679 */
c33adbc7 3680static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3681{
c41d1cf6 3682 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3683 struct amdgpu_bo *shadow;
3684 long r = 1, tmo;
c41d1cf6
ML
3685
3686 if (amdgpu_sriov_runtime(adev))
b045d3af 3687 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3688 else
3689 tmo = msecs_to_jiffies(100);
3690
3691 DRM_INFO("recover vram bo from shadow start\n");
3692 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3693 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3694
3695 /* No need to recover an evicted BO */
3696 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3697 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3698 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3699 continue;
3700
3701 r = amdgpu_bo_restore_shadow(shadow, &next);
3702 if (r)
3703 break;
3704
c41d1cf6 3705 if (fence) {
1712fb1a 3706 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3707 dma_fence_put(fence);
3708 fence = next;
1712fb1a 3709 if (tmo == 0) {
3710 r = -ETIMEDOUT;
c41d1cf6 3711 break;
1712fb1a 3712 } else if (tmo < 0) {
3713 r = tmo;
3714 break;
3715 }
403009bf
CK
3716 } else {
3717 fence = next;
c41d1cf6 3718 }
c41d1cf6
ML
3719 }
3720 mutex_unlock(&adev->shadow_list_lock);
3721
403009bf
CK
3722 if (fence)
3723 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3724 dma_fence_put(fence);
3725
1712fb1a 3726 if (r < 0 || tmo <= 0) {
3727 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3728 return -EIO;
3729 }
c41d1cf6 3730
403009bf
CK
3731 DRM_INFO("recover vram bo from shadow done\n");
3732 return 0;
c41d1cf6
ML
3733}
3734
a90ad3c2 3735
e3ecdffa 3736/**
06ec9070 3737 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3738 *
3739 * @adev: amdgpu device pointer
87e3f136 3740 * @from_hypervisor: request from hypervisor
5740682e
ML
3741 *
3742 * do VF FLR and reinitialize Asic
3f48c681 3743 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3744 */
3745static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3746 bool from_hypervisor)
5740682e
ML
3747{
3748 int r;
3749
3750 if (from_hypervisor)
3751 r = amdgpu_virt_request_full_gpu(adev, true);
3752 else
3753 r = amdgpu_virt_reset_gpu(adev);
3754 if (r)
3755 return r;
a90ad3c2
ML
3756
3757 /* Resume IP prior to SMC */
06ec9070 3758 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3759 if (r)
3760 goto error;
a90ad3c2 3761
c9ffa427 3762 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3763 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3764 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3765
7a3e0bb2
RZ
3766 r = amdgpu_device_fw_loading(adev);
3767 if (r)
3768 return r;
3769
a90ad3c2 3770 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3771 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3772 if (r)
3773 goto error;
a90ad3c2
ML
3774
3775 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3776 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3777 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3778
abc34253
ED
3779error:
3780 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3781 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3782 amdgpu_inc_vram_lost(adev);
c33adbc7 3783 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3784 }
3785
3786 return r;
3787}
3788
12938fad
CK
3789/**
3790 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3791 *
3792 * @adev: amdgpu device pointer
3793 *
3794 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3795 * a hung GPU.
3796 */
3797bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3798{
3799 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3800 DRM_INFO("Timeout, but no hardware hang detected.\n");
3801 return false;
3802 }
3803
3ba7b418
AG
3804 if (amdgpu_gpu_recovery == 0)
3805 goto disabled;
3806
3807 if (amdgpu_sriov_vf(adev))
3808 return true;
3809
3810 if (amdgpu_gpu_recovery == -1) {
3811 switch (adev->asic_type) {
fc42d47c
AG
3812 case CHIP_BONAIRE:
3813 case CHIP_HAWAII:
3ba7b418
AG
3814 case CHIP_TOPAZ:
3815 case CHIP_TONGA:
3816 case CHIP_FIJI:
3817 case CHIP_POLARIS10:
3818 case CHIP_POLARIS11:
3819 case CHIP_POLARIS12:
3820 case CHIP_VEGAM:
3821 case CHIP_VEGA20:
3822 case CHIP_VEGA10:
3823 case CHIP_VEGA12:
c43b849f 3824 case CHIP_RAVEN:
e9d4cf91 3825 case CHIP_ARCTURUS:
2cb44fb0 3826 case CHIP_RENOIR:
658c6639
AD
3827 case CHIP_NAVI10:
3828 case CHIP_NAVI14:
3829 case CHIP_NAVI12:
3ba7b418
AG
3830 break;
3831 default:
3832 goto disabled;
3833 }
12938fad
CK
3834 }
3835
3836 return true;
3ba7b418
AG
3837
3838disabled:
3839 DRM_INFO("GPU recovery disabled.\n");
3840 return false;
12938fad
CK
3841}
3842
5c6dd71e 3843
26bc5340
AG
3844static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3845 struct amdgpu_job *job,
3846 bool *need_full_reset_arg)
3847{
3848 int i, r = 0;
3849 bool need_full_reset = *need_full_reset_arg;
71182665 3850
71182665 3851 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3852 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3853 struct amdgpu_ring *ring = adev->rings[i];
3854
51687759 3855 if (!ring || !ring->sched.thread)
0875dc9e 3856 continue;
5740682e 3857
2f9d4084
ML
3858 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3859 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3860 }
d38ceaf9 3861
222b5f04
AG
3862 if(job)
3863 drm_sched_increase_karma(&job->base);
3864
1d721ed6 3865 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3866 if (!amdgpu_sriov_vf(adev)) {
3867
3868 if (!need_full_reset)
3869 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3870
3871 if (!need_full_reset) {
3872 amdgpu_device_ip_pre_soft_reset(adev);
3873 r = amdgpu_device_ip_soft_reset(adev);
3874 amdgpu_device_ip_post_soft_reset(adev);
3875 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3876 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3877 need_full_reset = true;
3878 }
3879 }
3880
3881 if (need_full_reset)
3882 r = amdgpu_device_ip_suspend(adev);
3883
3884 *need_full_reset_arg = need_full_reset;
3885 }
3886
3887 return r;
3888}
3889
041a62bc 3890static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3891 struct list_head *device_list_handle,
3892 bool *need_full_reset_arg)
3893{
3894 struct amdgpu_device *tmp_adev = NULL;
3895 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3896 int r = 0;
3897
3898 /*
3899 * ASIC reset has to be done on all HGMI hive nodes ASAP
3900 * to allow proper links negotiation in FW (within 1 sec)
3901 */
3902 if (need_full_reset) {
3903 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3904 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3905 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3906 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3907 r = -EALREADY;
3908 } else
3909 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3910
041a62bc
AG
3911 if (r) {
3912 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3913 r, tmp_adev->ddev->unique);
3914 break;
ce316fa5
LM
3915 }
3916 }
3917
041a62bc
AG
3918 /* For XGMI wait for all resets to complete before proceed */
3919 if (!r) {
ce316fa5
LM
3920 list_for_each_entry(tmp_adev, device_list_handle,
3921 gmc.xgmi.head) {
3922 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3923 flush_work(&tmp_adev->xgmi_reset_work);
3924 r = tmp_adev->asic_reset_res;
3925 if (r)
3926 break;
ce316fa5
LM
3927 }
3928 }
3929 }
ce316fa5 3930 }
26bc5340 3931
43c4d576
JC
3932 if (!r && amdgpu_ras_intr_triggered()) {
3933 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3934 if (tmp_adev->mmhub.funcs &&
3935 tmp_adev->mmhub.funcs->reset_ras_error_count)
3936 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
3937 }
3938
00eaa571 3939 amdgpu_ras_intr_cleared();
43c4d576 3940 }
00eaa571 3941
26bc5340
AG
3942 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3943 if (need_full_reset) {
3944 /* post card */
3945 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3946 DRM_WARN("asic atom init failed!");
3947
3948 if (!r) {
3949 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3950 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3951 if (r)
3952 goto out;
3953
3954 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3955 if (vram_lost) {
77e7f829 3956 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3957 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3958 }
3959
3960 r = amdgpu_gtt_mgr_recover(
3961 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3962 if (r)
3963 goto out;
3964
3965 r = amdgpu_device_fw_loading(tmp_adev);
3966 if (r)
3967 return r;
3968
3969 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3970 if (r)
3971 goto out;
3972
3973 if (vram_lost)
3974 amdgpu_device_fill_reset_magic(tmp_adev);
3975
fdafb359
EQ
3976 /*
3977 * Add this ASIC as tracked as reset was already
3978 * complete successfully.
3979 */
3980 amdgpu_register_gpu_instance(tmp_adev);
3981
7c04ca50 3982 r = amdgpu_device_ip_late_init(tmp_adev);
3983 if (r)
3984 goto out;
3985
565d1941
EQ
3986 amdgpu_fbdev_set_suspend(tmp_adev, 0);
3987
e79a04d5 3988 /* must succeed. */
511fdbc3 3989 amdgpu_ras_resume(tmp_adev);
e79a04d5 3990
26bc5340
AG
3991 /* Update PSP FW topology after reset */
3992 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3993 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3994 }
3995 }
3996
3997
3998out:
3999 if (!r) {
4000 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4001 r = amdgpu_ib_ring_tests(tmp_adev);
4002 if (r) {
4003 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4004 r = amdgpu_device_ip_suspend(tmp_adev);
4005 need_full_reset = true;
4006 r = -EAGAIN;
4007 goto end;
4008 }
4009 }
4010
4011 if (!r)
4012 r = amdgpu_device_recover_vram(tmp_adev);
4013 else
4014 tmp_adev->asic_reset_res = r;
4015 }
4016
4017end:
4018 *need_full_reset_arg = need_full_reset;
4019 return r;
4020}
4021
1d721ed6 4022static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4023{
1d721ed6
AG
4024 if (trylock) {
4025 if (!mutex_trylock(&adev->lock_reset))
4026 return false;
4027 } else
4028 mutex_lock(&adev->lock_reset);
5740682e 4029
26bc5340 4030 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4031 adev->in_gpu_reset = true;
a3a09142
AD
4032 switch (amdgpu_asic_reset_method(adev)) {
4033 case AMD_RESET_METHOD_MODE1:
4034 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4035 break;
4036 case AMD_RESET_METHOD_MODE2:
4037 adev->mp1_state = PP_MP1_STATE_RESET;
4038 break;
4039 default:
4040 adev->mp1_state = PP_MP1_STATE_NONE;
4041 break;
4042 }
1d721ed6
AG
4043
4044 return true;
26bc5340 4045}
d38ceaf9 4046
26bc5340
AG
4047static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4048{
89041940 4049 amdgpu_vf_error_trans_all(adev);
a3a09142 4050 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4051 adev->in_gpu_reset = false;
13a752e3 4052 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4053}
4054
26bc5340
AG
4055/**
4056 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4057 *
4058 * @adev: amdgpu device pointer
4059 * @job: which job trigger hang
4060 *
4061 * Attempt to reset the GPU if it has hung (all asics).
4062 * Attempt to do soft-reset or full-reset and reinitialize Asic
4063 * Returns 0 for success or an error on failure.
4064 */
4065
4066int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4067 struct amdgpu_job *job)
4068{
1d721ed6
AG
4069 struct list_head device_list, *device_list_handle = NULL;
4070 bool need_full_reset, job_signaled;
26bc5340 4071 struct amdgpu_hive_info *hive = NULL;
26bc5340 4072 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4073 int i, r = 0;
7c6e68c7 4074 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4075 bool use_baco =
4076 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4077 true : false;
26bc5340 4078
d5ea093e
AG
4079 /*
4080 * Flush RAM to disk so that after reboot
4081 * the user can read log and see why the system rebooted.
4082 */
b823821f 4083 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4084
4085 DRM_WARN("Emergency reboot.");
4086
4087 ksys_sync_helper();
4088 emergency_restart();
4089 }
4090
1d721ed6 4091 need_full_reset = job_signaled = false;
26bc5340
AG
4092 INIT_LIST_HEAD(&device_list);
4093
b823821f
LM
4094 dev_info(adev->dev, "GPU %s begin!\n",
4095 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4096
beff74bc 4097 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4098
1d721ed6
AG
4099 hive = amdgpu_get_xgmi_hive(adev, false);
4100
26bc5340 4101 /*
1d721ed6
AG
4102 * Here we trylock to avoid chain of resets executing from
4103 * either trigger by jobs on different adevs in XGMI hive or jobs on
4104 * different schedulers for same device while this TO handler is running.
4105 * We always reset all schedulers for device and all devices for XGMI
4106 * hive so that should take care of them too.
26bc5340 4107 */
1d721ed6
AG
4108
4109 if (hive && !mutex_trylock(&hive->reset_lock)) {
4110 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4111 job ? job->base.id : -1, hive->hive_id);
26bc5340 4112 return 0;
1d721ed6 4113 }
26bc5340
AG
4114
4115 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4116 if (!amdgpu_device_lock_adev(adev, !hive)) {
4117 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4118 job ? job->base.id : -1);
1d721ed6 4119 return 0;
26bc5340
AG
4120 }
4121
7c6e68c7
AG
4122 /* Block kfd: SRIOV would do it separately */
4123 if (!amdgpu_sriov_vf(adev))
4124 amdgpu_amdkfd_pre_reset(adev);
4125
26bc5340 4126 /* Build list of devices to reset */
1d721ed6 4127 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4128 if (!hive) {
7c6e68c7
AG
4129 /*unlock kfd: SRIOV would do it separately */
4130 if (!amdgpu_sriov_vf(adev))
4131 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4132 amdgpu_device_unlock_adev(adev);
4133 return -ENODEV;
4134 }
4135
4136 /*
4137 * In case we are in XGMI hive mode device reset is done for all the
4138 * nodes in the hive to retrain all XGMI links and hence the reset
4139 * sequence is executed in loop on all nodes.
4140 */
4141 device_list_handle = &hive->device_list;
4142 } else {
4143 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4144 device_list_handle = &device_list;
4145 }
4146
1d721ed6
AG
4147 /* block all schedulers and reset given job's ring */
4148 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4149 if (tmp_adev != adev) {
12ffa55d 4150 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4151 if (!amdgpu_sriov_vf(tmp_adev))
4152 amdgpu_amdkfd_pre_reset(tmp_adev);
4153 }
4154
12ffa55d
AG
4155 /*
4156 * Mark these ASICs to be reseted as untracked first
4157 * And add them back after reset completed
4158 */
4159 amdgpu_unregister_gpu_instance(tmp_adev);
4160
565d1941
EQ
4161 amdgpu_fbdev_set_suspend(adev, 1);
4162
f1c1314b 4163 /* disable ras on ALL IPs */
b823821f
LM
4164 if (!(in_ras_intr && !use_baco) &&
4165 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4166 amdgpu_ras_suspend(tmp_adev);
4167
1d721ed6
AG
4168 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4169 struct amdgpu_ring *ring = tmp_adev->rings[i];
4170
4171 if (!ring || !ring->sched.thread)
4172 continue;
4173
0b2d2c2e 4174 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4175
b823821f 4176 if (in_ras_intr && !use_baco)
7c6e68c7 4177 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4178 }
4179 }
4180
4181
b823821f 4182 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4183 goto skip_sched_resume;
4184
1d721ed6
AG
4185 /*
4186 * Must check guilty signal here since after this point all old
4187 * HW fences are force signaled.
4188 *
4189 * job->base holds a reference to parent fence
4190 */
4191 if (job && job->base.s_fence->parent &&
4192 dma_fence_is_signaled(job->base.s_fence->parent))
4193 job_signaled = true;
4194
1d721ed6
AG
4195 if (job_signaled) {
4196 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4197 goto skip_hw_reset;
4198 }
4199
4200
4201 /* Guilty job will be freed after this*/
0b2d2c2e 4202 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4203 if (r) {
4204 /*TODO Should we stop ?*/
4205 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4206 r, adev->ddev->unique);
4207 adev->asic_reset_res = r;
4208 }
4209
26bc5340
AG
4210retry: /* Rest of adevs pre asic reset from XGMI hive. */
4211 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4212
4213 if (tmp_adev == adev)
4214 continue;
4215
26bc5340
AG
4216 r = amdgpu_device_pre_asic_reset(tmp_adev,
4217 NULL,
4218 &need_full_reset);
4219 /*TODO Should we stop ?*/
4220 if (r) {
4221 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4222 r, tmp_adev->ddev->unique);
4223 tmp_adev->asic_reset_res = r;
4224 }
4225 }
4226
4227 /* Actual ASIC resets if needed.*/
4228 /* TODO Implement XGMI hive reset logic for SRIOV */
4229 if (amdgpu_sriov_vf(adev)) {
4230 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4231 if (r)
4232 adev->asic_reset_res = r;
4233 } else {
041a62bc 4234 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4235 if (r && r == -EAGAIN)
4236 goto retry;
4237 }
4238
1d721ed6
AG
4239skip_hw_reset:
4240
26bc5340
AG
4241 /* Post ASIC reset for all devs .*/
4242 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4243
1d721ed6
AG
4244 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4245 struct amdgpu_ring *ring = tmp_adev->rings[i];
4246
4247 if (!ring || !ring->sched.thread)
4248 continue;
4249
4250 /* No point to resubmit jobs if we didn't HW reset*/
4251 if (!tmp_adev->asic_reset_res && !job_signaled)
4252 drm_sched_resubmit_jobs(&ring->sched);
4253
4254 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4255 }
4256
4257 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4258 drm_helper_resume_force_mode(tmp_adev->ddev);
4259 }
4260
4261 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4262
4263 if (r) {
4264 /* bad news, how to tell it to userspace ? */
12ffa55d 4265 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4266 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4267 } else {
12ffa55d 4268 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4269 }
7c6e68c7 4270 }
26bc5340 4271
7c6e68c7
AG
4272skip_sched_resume:
4273 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4274 /*unlock kfd: SRIOV would do it separately */
b823821f 4275 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4276 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4277 amdgpu_device_unlock_adev(tmp_adev);
4278 }
4279
1d721ed6 4280 if (hive)
22d6575b 4281 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4282
4283 if (r)
4284 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4285 return r;
4286}
4287
e3ecdffa
AD
4288/**
4289 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4290 *
4291 * @adev: amdgpu_device pointer
4292 *
4293 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4294 * and lanes) of the slot the device is in. Handles APUs and
4295 * virtualized environments where PCIE config space may not be available.
4296 */
5494d864 4297static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4298{
5d9a6330 4299 struct pci_dev *pdev;
c5313457
HK
4300 enum pci_bus_speed speed_cap, platform_speed_cap;
4301 enum pcie_link_width platform_link_width;
d0dd7f0c 4302
cd474ba0
AD
4303 if (amdgpu_pcie_gen_cap)
4304 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4305
cd474ba0
AD
4306 if (amdgpu_pcie_lane_cap)
4307 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4308
cd474ba0
AD
4309 /* covers APUs as well */
4310 if (pci_is_root_bus(adev->pdev->bus)) {
4311 if (adev->pm.pcie_gen_mask == 0)
4312 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4313 if (adev->pm.pcie_mlw_mask == 0)
4314 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4315 return;
cd474ba0 4316 }
d0dd7f0c 4317
c5313457
HK
4318 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4319 return;
4320
dbaa922b
AD
4321 pcie_bandwidth_available(adev->pdev, NULL,
4322 &platform_speed_cap, &platform_link_width);
c5313457 4323
cd474ba0 4324 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4325 /* asic caps */
4326 pdev = adev->pdev;
4327 speed_cap = pcie_get_speed_cap(pdev);
4328 if (speed_cap == PCI_SPEED_UNKNOWN) {
4329 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4330 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4331 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4332 } else {
5d9a6330
AD
4333 if (speed_cap == PCIE_SPEED_16_0GT)
4334 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4335 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4336 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4337 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4338 else if (speed_cap == PCIE_SPEED_8_0GT)
4339 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4340 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4341 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4342 else if (speed_cap == PCIE_SPEED_5_0GT)
4343 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4344 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4345 else
4346 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4347 }
4348 /* platform caps */
c5313457 4349 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4350 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4351 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4352 } else {
c5313457 4353 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4354 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4355 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4356 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4357 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4358 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4359 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4360 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4361 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4362 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4363 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4364 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4365 else
4366 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4367
cd474ba0
AD
4368 }
4369 }
4370 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4371 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4372 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4373 } else {
c5313457 4374 switch (platform_link_width) {
5d9a6330 4375 case PCIE_LNK_X32:
cd474ba0
AD
4376 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4377 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4378 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4379 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4380 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4381 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4382 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4383 break;
5d9a6330 4384 case PCIE_LNK_X16:
cd474ba0
AD
4385 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4386 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4387 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4388 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4389 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4390 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4391 break;
5d9a6330 4392 case PCIE_LNK_X12:
cd474ba0
AD
4393 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4394 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4395 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4396 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4397 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4398 break;
5d9a6330 4399 case PCIE_LNK_X8:
cd474ba0
AD
4400 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4401 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4402 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4403 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4404 break;
5d9a6330 4405 case PCIE_LNK_X4:
cd474ba0
AD
4406 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4407 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4408 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4409 break;
5d9a6330 4410 case PCIE_LNK_X2:
cd474ba0
AD
4411 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4412 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4413 break;
5d9a6330 4414 case PCIE_LNK_X1:
cd474ba0
AD
4415 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4416 break;
4417 default:
4418 break;
4419 }
d0dd7f0c
AD
4420 }
4421 }
4422}
d38ceaf9 4423
361dbd01
AD
4424int amdgpu_device_baco_enter(struct drm_device *dev)
4425{
4426 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4427 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4428
4429 if (!amdgpu_device_supports_baco(adev->ddev))
4430 return -ENOTSUPP;
4431
7a22677b
LM
4432 if (ras && ras->supported)
4433 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4434
9530273e 4435 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4436}
4437
4438int amdgpu_device_baco_exit(struct drm_device *dev)
4439{
4440 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4441 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4442 int ret = 0;
361dbd01
AD
4443
4444 if (!amdgpu_device_supports_baco(adev->ddev))
4445 return -ENOTSUPP;
4446
9530273e
EQ
4447 ret = amdgpu_dpm_baco_exit(adev);
4448 if (ret)
4449 return ret;
7a22677b
LM
4450
4451 if (ras && ras->supported)
4452 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4453
4454 return 0;
361dbd01 4455}