]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drm/amdgpu: Doorbell layout for vega20 and future asic
[thirdparty/kernel/stable.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
e2a75f88 62MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 63MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 64MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 65MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 66MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 67
2dc80b00
S
68#define AMDGPU_RESUME_MS 2000
69
d38ceaf9 70static const char *amdgpu_asic_name[] = {
da69c161
KW
71 "TAHITI",
72 "PITCAIRN",
73 "VERDE",
74 "OLAND",
75 "HAINAN",
d38ceaf9
AD
76 "BONAIRE",
77 "KAVERI",
78 "KABINI",
79 "HAWAII",
80 "MULLINS",
81 "TOPAZ",
82 "TONGA",
48299f95 83 "FIJI",
d38ceaf9 84 "CARRIZO",
139f4917 85 "STONEY",
2cc0c0b5
FC
86 "POLARIS10",
87 "POLARIS11",
c4642a47 88 "POLARIS12",
48ff108d 89 "VEGAM",
d4196f01 90 "VEGA10",
8fab806a 91 "VEGA12",
956fcddc 92 "VEGA20",
2ca8a5d2 93 "RAVEN",
d38ceaf9
AD
94 "LAST",
95};
96
5494d864
AD
97static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
98
e3ecdffa
AD
99/**
100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
101 *
102 * @dev: drm_device pointer
103 *
104 * Returns true if the device is a dGPU with HG/PX power control,
105 * otherwise return false.
106 */
d38ceaf9
AD
107bool amdgpu_device_is_px(struct drm_device *dev)
108{
109 struct amdgpu_device *adev = dev->dev_private;
110
2f7d10b3 111 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
112 return true;
113 return false;
114}
115
116/*
117 * MMIO register access helper functions.
118 */
e3ecdffa
AD
119/**
120 * amdgpu_mm_rreg - read a memory mapped IO register
121 *
122 * @adev: amdgpu_device pointer
123 * @reg: dword aligned register offset
124 * @acc_flags: access flags which require special behavior
125 *
126 * Returns the 32 bit value from the offset specified.
127 */
d38ceaf9 128uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 129 uint32_t acc_flags)
d38ceaf9 130{
f4b373f4
TSD
131 uint32_t ret;
132
43ca8efa 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 134 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 135
15d72fd7 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
138 else {
139 unsigned long flags;
d38ceaf9
AD
140
141 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 145 }
f4b373f4
TSD
146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
147 return ret;
d38ceaf9
AD
148}
149
421a2a30
ML
150/*
151 * MMIO register read with bytes helper functions
152 * @offset:bytes offset from MMIO start
153 *
154*/
155
e3ecdffa
AD
156/**
157 * amdgpu_mm_rreg8 - read a memory mapped IO register
158 *
159 * @adev: amdgpu_device pointer
160 * @offset: byte aligned register offset
161 *
162 * Returns the 8 bit value from the offset specified.
163 */
421a2a30
ML
164uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
165 if (offset < adev->rmmio_size)
166 return (readb(adev->rmmio + offset));
167 BUG();
168}
169
170/*
171 * MMIO register write with bytes helper functions
172 * @offset:bytes offset from MMIO start
173 * @value: the value want to be written to the register
174 *
175*/
e3ecdffa
AD
176/**
177 * amdgpu_mm_wreg8 - read a memory mapped IO register
178 *
179 * @adev: amdgpu_device pointer
180 * @offset: byte aligned register offset
181 * @value: 8 bit value to write
182 *
183 * Writes the value specified to the offset specified.
184 */
421a2a30
ML
185void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
186 if (offset < adev->rmmio_size)
187 writeb(value, adev->rmmio + offset);
188 else
189 BUG();
190}
191
e3ecdffa
AD
192/**
193 * amdgpu_mm_wreg - write to a memory mapped IO register
194 *
195 * @adev: amdgpu_device pointer
196 * @reg: dword aligned register offset
197 * @v: 32 bit value to write to the register
198 * @acc_flags: access flags which require special behavior
199 *
200 * Writes the value specified to the offset specified.
201 */
d38ceaf9 202void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 203 uint32_t acc_flags)
d38ceaf9 204{
f4b373f4 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 206
47ed4e1c
KW
207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
208 adev->last_mm_index = v;
209 }
210
43ca8efa 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 212 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 213
15d72fd7 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
216 else {
217 unsigned long flags;
218
219 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
223 }
47ed4e1c
KW
224
225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
226 udelay(500);
227 }
d38ceaf9
AD
228}
229
e3ecdffa
AD
230/**
231 * amdgpu_io_rreg - read an IO register
232 *
233 * @adev: amdgpu_device pointer
234 * @reg: dword aligned register offset
235 *
236 * Returns the 32 bit value from the offset specified.
237 */
d38ceaf9
AD
238u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
239{
240 if ((reg * 4) < adev->rio_mem_size)
241 return ioread32(adev->rio_mem + (reg * 4));
242 else {
243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
244 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
245 }
246}
247
e3ecdffa
AD
248/**
249 * amdgpu_io_wreg - write to an IO register
250 *
251 * @adev: amdgpu_device pointer
252 * @reg: dword aligned register offset
253 * @v: 32 bit value to write to the register
254 *
255 * Writes the value specified to the offset specified.
256 */
d38ceaf9
AD
257void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
258{
47ed4e1c
KW
259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
260 adev->last_mm_index = v;
261 }
d38ceaf9
AD
262
263 if ((reg * 4) < adev->rio_mem_size)
264 iowrite32(v, adev->rio_mem + (reg * 4));
265 else {
266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
268 }
47ed4e1c
KW
269
270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
271 udelay(500);
272 }
d38ceaf9
AD
273}
274
275/**
276 * amdgpu_mm_rdoorbell - read a doorbell dword
277 *
278 * @adev: amdgpu_device pointer
279 * @index: doorbell index
280 *
281 * Returns the value in the doorbell aperture at the
282 * requested doorbell index (CIK).
283 */
284u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
285{
286 if (index < adev->doorbell.num_doorbells) {
287 return readl(adev->doorbell.ptr + index);
288 } else {
289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
290 return 0;
291 }
292}
293
294/**
295 * amdgpu_mm_wdoorbell - write a doorbell dword
296 *
297 * @adev: amdgpu_device pointer
298 * @index: doorbell index
299 * @v: value to write
300 *
301 * Writes @v to the doorbell aperture at the
302 * requested doorbell index (CIK).
303 */
304void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
305{
306 if (index < adev->doorbell.num_doorbells) {
307 writel(v, adev->doorbell.ptr + index);
308 } else {
309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
310 }
311}
312
832be404
KW
313/**
314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
315 *
316 * @adev: amdgpu_device pointer
317 * @index: doorbell index
318 *
319 * Returns the value in the doorbell aperture at the
320 * requested doorbell index (VEGA10+).
321 */
322u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
323{
324 if (index < adev->doorbell.num_doorbells) {
325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
326 } else {
327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
328 return 0;
329 }
330}
331
332/**
333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
334 *
335 * @adev: amdgpu_device pointer
336 * @index: doorbell index
337 * @v: value to write
338 *
339 * Writes @v to the doorbell aperture at the
340 * requested doorbell index (VEGA10+).
341 */
342void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
343{
344 if (index < adev->doorbell.num_doorbells) {
345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
346 } else {
347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
348 }
349}
350
d38ceaf9
AD
351/**
352 * amdgpu_invalid_rreg - dummy reg read function
353 *
354 * @adev: amdgpu device pointer
355 * @reg: offset of register
356 *
357 * Dummy register read function. Used for register blocks
358 * that certain asics don't have (all asics).
359 * Returns the value in the register.
360 */
361static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
362{
363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
364 BUG();
365 return 0;
366}
367
368/**
369 * amdgpu_invalid_wreg - dummy reg write function
370 *
371 * @adev: amdgpu device pointer
372 * @reg: offset of register
373 * @v: value to write to the register
374 *
375 * Dummy register read function. Used for register blocks
376 * that certain asics don't have (all asics).
377 */
378static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
379{
380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
381 reg, v);
382 BUG();
383}
384
385/**
386 * amdgpu_block_invalid_rreg - dummy reg read function
387 *
388 * @adev: amdgpu device pointer
389 * @block: offset of instance
390 * @reg: offset of register
391 *
392 * Dummy register read function. Used for register blocks
393 * that certain asics don't have (all asics).
394 * Returns the value in the register.
395 */
396static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
397 uint32_t block, uint32_t reg)
398{
399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
400 reg, block);
401 BUG();
402 return 0;
403}
404
405/**
406 * amdgpu_block_invalid_wreg - dummy reg write function
407 *
408 * @adev: amdgpu device pointer
409 * @block: offset of instance
410 * @reg: offset of register
411 * @v: value to write to the register
412 *
413 * Dummy register read function. Used for register blocks
414 * that certain asics don't have (all asics).
415 */
416static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
417 uint32_t block,
418 uint32_t reg, uint32_t v)
419{
420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
421 reg, block, v);
422 BUG();
423}
424
e3ecdffa
AD
425/**
426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
427 *
428 * @adev: amdgpu device pointer
429 *
430 * Allocates a scratch page of VRAM for use by various things in the
431 * driver.
432 */
06ec9070 433static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 434{
a4a02777
CK
435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
437 &adev->vram_scratch.robj,
438 &adev->vram_scratch.gpu_addr,
439 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
440}
441
e3ecdffa
AD
442/**
443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
444 *
445 * @adev: amdgpu device pointer
446 *
447 * Frees the VRAM scratch page.
448 */
06ec9070 449static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 450{
078af1a3 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
452}
453
454/**
9c3f2b54 455 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
456 *
457 * @adev: amdgpu_device pointer
458 * @registers: pointer to the register array
459 * @array_size: size of the register array
460 *
461 * Programs an array or registers with and and or masks.
462 * This is a helper for setting golden registers.
463 */
9c3f2b54
AD
464void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
465 const u32 *registers,
466 const u32 array_size)
d38ceaf9
AD
467{
468 u32 tmp, reg, and_mask, or_mask;
469 int i;
470
471 if (array_size % 3)
472 return;
473
474 for (i = 0; i < array_size; i +=3) {
475 reg = registers[i + 0];
476 and_mask = registers[i + 1];
477 or_mask = registers[i + 2];
478
479 if (and_mask == 0xffffffff) {
480 tmp = or_mask;
481 } else {
482 tmp = RREG32(reg);
483 tmp &= ~and_mask;
484 tmp |= or_mask;
485 }
486 WREG32(reg, tmp);
487 }
488}
489
e3ecdffa
AD
490/**
491 * amdgpu_device_pci_config_reset - reset the GPU
492 *
493 * @adev: amdgpu_device pointer
494 *
495 * Resets the GPU using the pci config reset sequence.
496 * Only applicable to asics prior to vega10.
497 */
8111c387 498void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
499{
500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
501}
502
503/*
504 * GPU doorbell aperture helpers function.
505 */
506/**
06ec9070 507 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
508 *
509 * @adev: amdgpu_device pointer
510 *
511 * Init doorbell driver information (CIK)
512 * Returns 0 on success, error on failure.
513 */
06ec9070 514static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 515{
705e519e
CK
516 /* No doorbell on SI hardware generation */
517 if (adev->asic_type < CHIP_BONAIRE) {
518 adev->doorbell.base = 0;
519 adev->doorbell.size = 0;
520 adev->doorbell.num_doorbells = 0;
521 adev->doorbell.ptr = NULL;
522 return 0;
523 }
524
d6895ad3
CK
525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
526 return -EINVAL;
527
d38ceaf9
AD
528 /* doorbell bar mapping */
529 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
530 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
531
edf600da 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
d38ceaf9
AD
533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
534 if (adev->doorbell.num_doorbells == 0)
535 return -EINVAL;
536
ec3db8a6
PY
537 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
538 * paging queue doorbell use the second page
539 */
540 if (adev->asic_type >= CHIP_VEGA10)
541 adev->doorbell.num_doorbells *= 2;
542
8972e5d2
CK
543 adev->doorbell.ptr = ioremap(adev->doorbell.base,
544 adev->doorbell.num_doorbells *
545 sizeof(u32));
546 if (adev->doorbell.ptr == NULL)
d38ceaf9 547 return -ENOMEM;
d38ceaf9
AD
548
549 return 0;
550}
551
552/**
06ec9070 553 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
554 *
555 * @adev: amdgpu_device pointer
556 *
557 * Tear down doorbell driver information (CIK)
558 */
06ec9070 559static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
560{
561 iounmap(adev->doorbell.ptr);
562 adev->doorbell.ptr = NULL;
563}
564
22cb0164 565
d38ceaf9
AD
566
567/*
06ec9070 568 * amdgpu_device_wb_*()
455a7bc2 569 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 570 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
571 */
572
573/**
06ec9070 574 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
575 *
576 * @adev: amdgpu_device pointer
577 *
578 * Disables Writeback and frees the Writeback memory (all asics).
579 * Used at driver shutdown.
580 */
06ec9070 581static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
582{
583 if (adev->wb.wb_obj) {
a76ed485
AD
584 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
585 &adev->wb.gpu_addr,
586 (void **)&adev->wb.wb);
d38ceaf9
AD
587 adev->wb.wb_obj = NULL;
588 }
589}
590
591/**
06ec9070 592 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
593 *
594 * @adev: amdgpu_device pointer
595 *
455a7bc2 596 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
597 * Used at driver startup.
598 * Returns 0 on success or an -error on failure.
599 */
06ec9070 600static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
601{
602 int r;
603
604 if (adev->wb.wb_obj == NULL) {
97407b63
AD
605 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
606 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
607 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
608 &adev->wb.wb_obj, &adev->wb.gpu_addr,
609 (void **)&adev->wb.wb);
d38ceaf9
AD
610 if (r) {
611 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
612 return r;
613 }
d38ceaf9
AD
614
615 adev->wb.num_wb = AMDGPU_MAX_WB;
616 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
617
618 /* clear wb memory */
73469585 619 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
620 }
621
622 return 0;
623}
624
625/**
131b4b36 626 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
627 *
628 * @adev: amdgpu_device pointer
629 * @wb: wb index
630 *
631 * Allocate a wb slot for use by the driver (all asics).
632 * Returns 0 on success or -EINVAL on failure.
633 */
131b4b36 634int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
635{
636 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 637
97407b63 638 if (offset < adev->wb.num_wb) {
7014285a 639 __set_bit(offset, adev->wb.used);
63ae07ca 640 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
641 return 0;
642 } else {
643 return -EINVAL;
644 }
645}
646
d38ceaf9 647/**
131b4b36 648 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
649 *
650 * @adev: amdgpu_device pointer
651 * @wb: wb index
652 *
653 * Free a wb slot allocated for use by the driver (all asics)
654 */
131b4b36 655void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 656{
73469585 657 wb >>= 3;
d38ceaf9 658 if (wb < adev->wb.num_wb)
73469585 659 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
660}
661
d6895ad3
CK
662/**
663 * amdgpu_device_resize_fb_bar - try to resize FB BAR
664 *
665 * @adev: amdgpu_device pointer
666 *
667 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
668 * to fail, but if any of the BARs is not accessible after the size we abort
669 * driver loading by returning -ENODEV.
670 */
671int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
672{
770d13b1 673 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 674 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
675 struct pci_bus *root;
676 struct resource *res;
677 unsigned i;
d6895ad3
CK
678 u16 cmd;
679 int r;
680
0c03b912 681 /* Bypass for VF */
682 if (amdgpu_sriov_vf(adev))
683 return 0;
684
31b8adab
CK
685 /* Check if the root BUS has 64bit memory resources */
686 root = adev->pdev->bus;
687 while (root->parent)
688 root = root->parent;
689
690 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 691 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
692 res->start > 0x100000000ull)
693 break;
694 }
695
696 /* Trying to resize is pointless without a root hub window above 4GB */
697 if (!res)
698 return 0;
699
d6895ad3
CK
700 /* Disable memory decoding while we change the BAR addresses and size */
701 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
702 pci_write_config_word(adev->pdev, PCI_COMMAND,
703 cmd & ~PCI_COMMAND_MEMORY);
704
705 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 706 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
707 if (adev->asic_type >= CHIP_BONAIRE)
708 pci_release_resource(adev->pdev, 2);
709
710 pci_release_resource(adev->pdev, 0);
711
712 r = pci_resize_resource(adev->pdev, 0, rbar_size);
713 if (r == -ENOSPC)
714 DRM_INFO("Not enough PCI address space for a large BAR.");
715 else if (r && r != -ENOTSUPP)
716 DRM_ERROR("Problem resizing BAR0 (%d).", r);
717
718 pci_assign_unassigned_bus_resources(adev->pdev->bus);
719
720 /* When the doorbell or fb BAR isn't available we have no chance of
721 * using the device.
722 */
06ec9070 723 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
724 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
725 return -ENODEV;
726
727 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
728
729 return 0;
730}
a05502e5 731
d38ceaf9
AD
732/*
733 * GPU helpers function.
734 */
735/**
39c640c0 736 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
737 *
738 * @adev: amdgpu_device pointer
739 *
c836fec5
JQ
740 * Check if the asic has been initialized (all asics) at driver startup
741 * or post is needed if hw reset is performed.
742 * Returns true if need or false if not.
d38ceaf9 743 */
39c640c0 744bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
745{
746 uint32_t reg;
747
bec86378
ML
748 if (amdgpu_sriov_vf(adev))
749 return false;
750
751 if (amdgpu_passthrough(adev)) {
1da2c326
ML
752 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
753 * some old smc fw still need driver do vPost otherwise gpu hang, while
754 * those smc fw version above 22.15 doesn't have this flaw, so we force
755 * vpost executed for smc version below 22.15
bec86378
ML
756 */
757 if (adev->asic_type == CHIP_FIJI) {
758 int err;
759 uint32_t fw_ver;
760 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
761 /* force vPost if error occured */
762 if (err)
763 return true;
764
765 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
766 if (fw_ver < 0x00160e00)
767 return true;
bec86378 768 }
bec86378 769 }
91fe77eb 770
771 if (adev->has_hw_reset) {
772 adev->has_hw_reset = false;
773 return true;
774 }
775
776 /* bios scratch used on CIK+ */
777 if (adev->asic_type >= CHIP_BONAIRE)
778 return amdgpu_atombios_scratch_need_asic_init(adev);
779
780 /* check MEM_SIZE for older asics */
781 reg = amdgpu_asic_get_config_memsize(adev);
782
783 if ((reg != 0) && (reg != 0xffffffff))
784 return false;
785
786 return true;
bec86378
ML
787}
788
d38ceaf9
AD
789/* if we get transitioned to only one device, take VGA back */
790/**
06ec9070 791 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
792 *
793 * @cookie: amdgpu_device pointer
794 * @state: enable/disable vga decode
795 *
796 * Enable/disable vga decode (all asics).
797 * Returns VGA resource flags.
798 */
06ec9070 799static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
800{
801 struct amdgpu_device *adev = cookie;
802 amdgpu_asic_set_vga_state(adev, state);
803 if (state)
804 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
805 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
806 else
807 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
808}
809
e3ecdffa
AD
810/**
811 * amdgpu_device_check_block_size - validate the vm block size
812 *
813 * @adev: amdgpu_device pointer
814 *
815 * Validates the vm block size specified via module parameter.
816 * The vm block size defines number of bits in page table versus page directory,
817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
818 * page table and the remaining bits are in the page directory.
819 */
06ec9070 820static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
821{
822 /* defines number of bits in page table versus page directory,
823 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
824 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
825 if (amdgpu_vm_block_size == -1)
826 return;
a1adf8be 827
bab4fee7 828 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
829 dev_warn(adev->dev, "VM page table size (%d) too small\n",
830 amdgpu_vm_block_size);
97489129 831 amdgpu_vm_block_size = -1;
a1adf8be 832 }
a1adf8be
CZ
833}
834
e3ecdffa
AD
835/**
836 * amdgpu_device_check_vm_size - validate the vm size
837 *
838 * @adev: amdgpu_device pointer
839 *
840 * Validates the vm size in GB specified via module parameter.
841 * The VM size is the size of the GPU virtual memory space in GB.
842 */
06ec9070 843static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 844{
64dab074
AD
845 /* no need to check the default value */
846 if (amdgpu_vm_size == -1)
847 return;
848
83ca145d
ZJ
849 if (amdgpu_vm_size < 1) {
850 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
851 amdgpu_vm_size);
f3368128 852 amdgpu_vm_size = -1;
83ca145d 853 }
83ca145d
ZJ
854}
855
7951e376
RZ
856static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
857{
858 struct sysinfo si;
859 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
860 uint64_t total_memory;
861 uint64_t dram_size_seven_GB = 0x1B8000000;
862 uint64_t dram_size_three_GB = 0xB8000000;
863
864 if (amdgpu_smu_memory_pool_size == 0)
865 return;
866
867 if (!is_os_64) {
868 DRM_WARN("Not 64-bit OS, feature not supported\n");
869 goto def_value;
870 }
871 si_meminfo(&si);
872 total_memory = (uint64_t)si.totalram * si.mem_unit;
873
874 if ((amdgpu_smu_memory_pool_size == 1) ||
875 (amdgpu_smu_memory_pool_size == 2)) {
876 if (total_memory < dram_size_three_GB)
877 goto def_value1;
878 } else if ((amdgpu_smu_memory_pool_size == 4) ||
879 (amdgpu_smu_memory_pool_size == 8)) {
880 if (total_memory < dram_size_seven_GB)
881 goto def_value1;
882 } else {
883 DRM_WARN("Smu memory pool size not supported\n");
884 goto def_value;
885 }
886 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
887
888 return;
889
890def_value1:
891 DRM_WARN("No enough system memory\n");
892def_value:
893 adev->pm.smu_prv_buffer_size = 0;
894}
895
d38ceaf9 896/**
06ec9070 897 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
898 *
899 * @adev: amdgpu_device pointer
900 *
901 * Validates certain module parameters and updates
902 * the associated values used by the driver (all asics).
903 */
06ec9070 904static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 905{
5b011235
CZ
906 if (amdgpu_sched_jobs < 4) {
907 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
908 amdgpu_sched_jobs);
909 amdgpu_sched_jobs = 4;
76117507 910 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
911 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
912 amdgpu_sched_jobs);
913 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
914 }
d38ceaf9 915
83e74db6 916 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
917 /* gart size must be greater or equal to 32M */
918 dev_warn(adev->dev, "gart size (%d) too small\n",
919 amdgpu_gart_size);
83e74db6 920 amdgpu_gart_size = -1;
d38ceaf9
AD
921 }
922
36d38372 923 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 924 /* gtt size must be greater or equal to 32M */
36d38372
CK
925 dev_warn(adev->dev, "gtt size (%d) too small\n",
926 amdgpu_gtt_size);
927 amdgpu_gtt_size = -1;
d38ceaf9
AD
928 }
929
d07f14be
RH
930 /* valid range is between 4 and 9 inclusive */
931 if (amdgpu_vm_fragment_size != -1 &&
932 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
933 dev_warn(adev->dev, "valid range is between 4 and 9\n");
934 amdgpu_vm_fragment_size = -1;
935 }
936
7951e376
RZ
937 amdgpu_device_check_smu_prv_buffer_size(adev);
938
06ec9070 939 amdgpu_device_check_vm_size(adev);
d38ceaf9 940
06ec9070 941 amdgpu_device_check_block_size(adev);
6a7f76e7 942
526bae37 943 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 944 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
945 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
946 amdgpu_vram_page_split);
947 amdgpu_vram_page_split = 1024;
948 }
8854695a
AG
949
950 if (amdgpu_lockup_timeout == 0) {
951 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
952 amdgpu_lockup_timeout = 10000;
953 }
19aede77
AD
954
955 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
956}
957
958/**
959 * amdgpu_switcheroo_set_state - set switcheroo state
960 *
961 * @pdev: pci dev pointer
1694467b 962 * @state: vga_switcheroo state
d38ceaf9
AD
963 *
964 * Callback for the switcheroo driver. Suspends or resumes the
965 * the asics before or after it is powered up using ACPI methods.
966 */
967static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
968{
969 struct drm_device *dev = pci_get_drvdata(pdev);
970
971 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
972 return;
973
974 if (state == VGA_SWITCHEROO_ON) {
7ca85295 975 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
976 /* don't suspend or resume card normally */
977 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
978
810ddc3a 979 amdgpu_device_resume(dev, true, true);
d38ceaf9 980
d38ceaf9
AD
981 dev->switch_power_state = DRM_SWITCH_POWER_ON;
982 drm_kms_helper_poll_enable(dev);
983 } else {
7ca85295 984 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
985 drm_kms_helper_poll_disable(dev);
986 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 987 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
988 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
989 }
990}
991
992/**
993 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
994 *
995 * @pdev: pci dev pointer
996 *
997 * Callback for the switcheroo driver. Check of the switcheroo
998 * state can be changed.
999 * Returns true if the state can be changed, false if not.
1000 */
1001static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1002{
1003 struct drm_device *dev = pci_get_drvdata(pdev);
1004
1005 /*
1006 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1007 * locking inversion with the driver load path. And the access here is
1008 * completely racy anyway. So don't bother with locking for now.
1009 */
1010 return dev->open_count == 0;
1011}
1012
1013static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1014 .set_gpu_state = amdgpu_switcheroo_set_state,
1015 .reprobe = NULL,
1016 .can_switch = amdgpu_switcheroo_can_switch,
1017};
1018
e3ecdffa
AD
1019/**
1020 * amdgpu_device_ip_set_clockgating_state - set the CG state
1021 *
87e3f136 1022 * @dev: amdgpu_device pointer
e3ecdffa
AD
1023 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1024 * @state: clockgating state (gate or ungate)
1025 *
1026 * Sets the requested clockgating state for all instances of
1027 * the hardware IP specified.
1028 * Returns the error code from the last instance.
1029 */
43fa561f 1030int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1031 enum amd_ip_block_type block_type,
1032 enum amd_clockgating_state state)
d38ceaf9 1033{
43fa561f 1034 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1035 int i, r = 0;
1036
1037 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1038 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1039 continue;
c722865a
RZ
1040 if (adev->ip_blocks[i].version->type != block_type)
1041 continue;
1042 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1043 continue;
1044 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1045 (void *)adev, state);
1046 if (r)
1047 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1048 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1049 }
1050 return r;
1051}
1052
e3ecdffa
AD
1053/**
1054 * amdgpu_device_ip_set_powergating_state - set the PG state
1055 *
87e3f136 1056 * @dev: amdgpu_device pointer
e3ecdffa
AD
1057 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1058 * @state: powergating state (gate or ungate)
1059 *
1060 * Sets the requested powergating state for all instances of
1061 * the hardware IP specified.
1062 * Returns the error code from the last instance.
1063 */
43fa561f 1064int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1065 enum amd_ip_block_type block_type,
1066 enum amd_powergating_state state)
d38ceaf9 1067{
43fa561f 1068 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1069 int i, r = 0;
1070
1071 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1072 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1073 continue;
c722865a
RZ
1074 if (adev->ip_blocks[i].version->type != block_type)
1075 continue;
1076 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1077 continue;
1078 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1079 (void *)adev, state);
1080 if (r)
1081 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1082 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1083 }
1084 return r;
1085}
1086
e3ecdffa
AD
1087/**
1088 * amdgpu_device_ip_get_clockgating_state - get the CG state
1089 *
1090 * @adev: amdgpu_device pointer
1091 * @flags: clockgating feature flags
1092 *
1093 * Walks the list of IPs on the device and updates the clockgating
1094 * flags for each IP.
1095 * Updates @flags with the feature flags for each hardware IP where
1096 * clockgating is enabled.
1097 */
2990a1fc
AD
1098void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1099 u32 *flags)
6cb2d4e4
HR
1100{
1101 int i;
1102
1103 for (i = 0; i < adev->num_ip_blocks; i++) {
1104 if (!adev->ip_blocks[i].status.valid)
1105 continue;
1106 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1107 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1108 }
1109}
1110
e3ecdffa
AD
1111/**
1112 * amdgpu_device_ip_wait_for_idle - wait for idle
1113 *
1114 * @adev: amdgpu_device pointer
1115 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1116 *
1117 * Waits for the request hardware IP to be idle.
1118 * Returns 0 for success or a negative error code on failure.
1119 */
2990a1fc
AD
1120int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1121 enum amd_ip_block_type block_type)
5dbbb60b
AD
1122{
1123 int i, r;
1124
1125 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1126 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1127 continue;
a1255107
AD
1128 if (adev->ip_blocks[i].version->type == block_type) {
1129 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1130 if (r)
1131 return r;
1132 break;
1133 }
1134 }
1135 return 0;
1136
1137}
1138
e3ecdffa
AD
1139/**
1140 * amdgpu_device_ip_is_idle - is the hardware IP idle
1141 *
1142 * @adev: amdgpu_device pointer
1143 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1144 *
1145 * Check if the hardware IP is idle or not.
1146 * Returns true if it the IP is idle, false if not.
1147 */
2990a1fc
AD
1148bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1149 enum amd_ip_block_type block_type)
5dbbb60b
AD
1150{
1151 int i;
1152
1153 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1154 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1155 continue;
a1255107
AD
1156 if (adev->ip_blocks[i].version->type == block_type)
1157 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1158 }
1159 return true;
1160
1161}
1162
e3ecdffa
AD
1163/**
1164 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1165 *
1166 * @adev: amdgpu_device pointer
87e3f136 1167 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1168 *
1169 * Returns a pointer to the hardware IP block structure
1170 * if it exists for the asic, otherwise NULL.
1171 */
2990a1fc
AD
1172struct amdgpu_ip_block *
1173amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1174 enum amd_ip_block_type type)
d38ceaf9
AD
1175{
1176 int i;
1177
1178 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1179 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1180 return &adev->ip_blocks[i];
1181
1182 return NULL;
1183}
1184
1185/**
2990a1fc 1186 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1187 *
1188 * @adev: amdgpu_device pointer
5fc3aeeb 1189 * @type: enum amd_ip_block_type
d38ceaf9
AD
1190 * @major: major version
1191 * @minor: minor version
1192 *
1193 * return 0 if equal or greater
1194 * return 1 if smaller or the ip_block doesn't exist
1195 */
2990a1fc
AD
1196int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1197 enum amd_ip_block_type type,
1198 u32 major, u32 minor)
d38ceaf9 1199{
2990a1fc 1200 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1201
a1255107
AD
1202 if (ip_block && ((ip_block->version->major > major) ||
1203 ((ip_block->version->major == major) &&
1204 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1205 return 0;
1206
1207 return 1;
1208}
1209
a1255107 1210/**
2990a1fc 1211 * amdgpu_device_ip_block_add
a1255107
AD
1212 *
1213 * @adev: amdgpu_device pointer
1214 * @ip_block_version: pointer to the IP to add
1215 *
1216 * Adds the IP block driver information to the collection of IPs
1217 * on the asic.
1218 */
2990a1fc
AD
1219int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1220 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1221{
1222 if (!ip_block_version)
1223 return -EINVAL;
1224
e966a725 1225 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1226 ip_block_version->funcs->name);
1227
a1255107
AD
1228 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1229
1230 return 0;
1231}
1232
e3ecdffa
AD
1233/**
1234 * amdgpu_device_enable_virtual_display - enable virtual display feature
1235 *
1236 * @adev: amdgpu_device pointer
1237 *
1238 * Enabled the virtual display feature if the user has enabled it via
1239 * the module parameter virtual_display. This feature provides a virtual
1240 * display hardware on headless boards or in virtualized environments.
1241 * This function parses and validates the configuration string specified by
1242 * the user and configues the virtual display configuration (number of
1243 * virtual connectors, crtcs, etc.) specified.
1244 */
483ef985 1245static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1246{
1247 adev->enable_virtual_display = false;
1248
1249 if (amdgpu_virtual_display) {
1250 struct drm_device *ddev = adev->ddev;
1251 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1252 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1253
1254 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1255 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1256 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1257 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1258 if (!strcmp("all", pciaddname)
1259 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1260 long num_crtc;
1261 int res = -1;
1262
9accf2fd 1263 adev->enable_virtual_display = true;
0f66356d
ED
1264
1265 if (pciaddname_tmp)
1266 res = kstrtol(pciaddname_tmp, 10,
1267 &num_crtc);
1268
1269 if (!res) {
1270 if (num_crtc < 1)
1271 num_crtc = 1;
1272 if (num_crtc > 6)
1273 num_crtc = 6;
1274 adev->mode_info.num_crtc = num_crtc;
1275 } else {
1276 adev->mode_info.num_crtc = 1;
1277 }
9accf2fd
ED
1278 break;
1279 }
1280 }
1281
0f66356d
ED
1282 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1283 amdgpu_virtual_display, pci_address_name,
1284 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1285
1286 kfree(pciaddstr);
1287 }
1288}
1289
e3ecdffa
AD
1290/**
1291 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1292 *
1293 * @adev: amdgpu_device pointer
1294 *
1295 * Parses the asic configuration parameters specified in the gpu info
1296 * firmware and makes them availale to the driver for use in configuring
1297 * the asic.
1298 * Returns 0 on success, -EINVAL on failure.
1299 */
e2a75f88
AD
1300static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1301{
e2a75f88
AD
1302 const char *chip_name;
1303 char fw_name[30];
1304 int err;
1305 const struct gpu_info_firmware_header_v1_0 *hdr;
1306
ab4fe3e1
HR
1307 adev->firmware.gpu_info_fw = NULL;
1308
e2a75f88
AD
1309 switch (adev->asic_type) {
1310 case CHIP_TOPAZ:
1311 case CHIP_TONGA:
1312 case CHIP_FIJI:
e2a75f88 1313 case CHIP_POLARIS10:
cc07f18d 1314 case CHIP_POLARIS11:
e2a75f88 1315 case CHIP_POLARIS12:
cc07f18d 1316 case CHIP_VEGAM:
e2a75f88
AD
1317 case CHIP_CARRIZO:
1318 case CHIP_STONEY:
1319#ifdef CONFIG_DRM_AMDGPU_SI
1320 case CHIP_VERDE:
1321 case CHIP_TAHITI:
1322 case CHIP_PITCAIRN:
1323 case CHIP_OLAND:
1324 case CHIP_HAINAN:
1325#endif
1326#ifdef CONFIG_DRM_AMDGPU_CIK
1327 case CHIP_BONAIRE:
1328 case CHIP_HAWAII:
1329 case CHIP_KAVERI:
1330 case CHIP_KABINI:
1331 case CHIP_MULLINS:
1332#endif
27c0bc71 1333 case CHIP_VEGA20:
e2a75f88
AD
1334 default:
1335 return 0;
1336 case CHIP_VEGA10:
1337 chip_name = "vega10";
1338 break;
3f76dced
AD
1339 case CHIP_VEGA12:
1340 chip_name = "vega12";
1341 break;
2d2e5e7e 1342 case CHIP_RAVEN:
54c4d17e
FX
1343 if (adev->rev_id >= 8)
1344 chip_name = "raven2";
741deade
AD
1345 else if (adev->pdev->device == 0x15d8)
1346 chip_name = "picasso";
54c4d17e
FX
1347 else
1348 chip_name = "raven";
2d2e5e7e 1349 break;
e2a75f88
AD
1350 }
1351
1352 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1353 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1354 if (err) {
1355 dev_err(adev->dev,
1356 "Failed to load gpu_info firmware \"%s\"\n",
1357 fw_name);
1358 goto out;
1359 }
ab4fe3e1 1360 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1361 if (err) {
1362 dev_err(adev->dev,
1363 "Failed to validate gpu_info firmware \"%s\"\n",
1364 fw_name);
1365 goto out;
1366 }
1367
ab4fe3e1 1368 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1369 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1370
1371 switch (hdr->version_major) {
1372 case 1:
1373 {
1374 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1375 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1376 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1377
b5ab16bf
AD
1378 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1379 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1380 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1381 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1382 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1383 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1384 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1385 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1386 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1387 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1388 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1389 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1390 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1391 adev->gfx.cu_info.max_waves_per_simd =
1392 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1393 adev->gfx.cu_info.max_scratch_slots_per_cu =
1394 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1395 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1396 break;
1397 }
1398 default:
1399 dev_err(adev->dev,
1400 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1401 err = -EINVAL;
1402 goto out;
1403 }
1404out:
e2a75f88
AD
1405 return err;
1406}
1407
e3ecdffa
AD
1408/**
1409 * amdgpu_device_ip_early_init - run early init for hardware IPs
1410 *
1411 * @adev: amdgpu_device pointer
1412 *
1413 * Early initialization pass for hardware IPs. The hardware IPs that make
1414 * up each asic are discovered each IP's early_init callback is run. This
1415 * is the first stage in initializing the asic.
1416 * Returns 0 on success, negative error code on failure.
1417 */
06ec9070 1418static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1419{
aaa36a97 1420 int i, r;
d38ceaf9 1421
483ef985 1422 amdgpu_device_enable_virtual_display(adev);
a6be7570 1423
d38ceaf9 1424 switch (adev->asic_type) {
aaa36a97
AD
1425 case CHIP_TOPAZ:
1426 case CHIP_TONGA:
48299f95 1427 case CHIP_FIJI:
2cc0c0b5 1428 case CHIP_POLARIS10:
32cc7e53 1429 case CHIP_POLARIS11:
c4642a47 1430 case CHIP_POLARIS12:
32cc7e53 1431 case CHIP_VEGAM:
aaa36a97 1432 case CHIP_CARRIZO:
39bb0c92
SL
1433 case CHIP_STONEY:
1434 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1435 adev->family = AMDGPU_FAMILY_CZ;
1436 else
1437 adev->family = AMDGPU_FAMILY_VI;
1438
1439 r = vi_set_ip_blocks(adev);
1440 if (r)
1441 return r;
1442 break;
33f34802
KW
1443#ifdef CONFIG_DRM_AMDGPU_SI
1444 case CHIP_VERDE:
1445 case CHIP_TAHITI:
1446 case CHIP_PITCAIRN:
1447 case CHIP_OLAND:
1448 case CHIP_HAINAN:
295d0daf 1449 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1450 r = si_set_ip_blocks(adev);
1451 if (r)
1452 return r;
1453 break;
1454#endif
a2e73f56
AD
1455#ifdef CONFIG_DRM_AMDGPU_CIK
1456 case CHIP_BONAIRE:
1457 case CHIP_HAWAII:
1458 case CHIP_KAVERI:
1459 case CHIP_KABINI:
1460 case CHIP_MULLINS:
1461 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1462 adev->family = AMDGPU_FAMILY_CI;
1463 else
1464 adev->family = AMDGPU_FAMILY_KV;
1465
1466 r = cik_set_ip_blocks(adev);
1467 if (r)
1468 return r;
1469 break;
1470#endif
e48a3cd9
AD
1471 case CHIP_VEGA10:
1472 case CHIP_VEGA12:
e4bd8170 1473 case CHIP_VEGA20:
e48a3cd9 1474 case CHIP_RAVEN:
741deade 1475 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1476 adev->family = AMDGPU_FAMILY_RV;
1477 else
1478 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1479
1480 r = soc15_set_ip_blocks(adev);
1481 if (r)
1482 return r;
1483 break;
d38ceaf9
AD
1484 default:
1485 /* FIXME: not supported yet */
1486 return -EINVAL;
1487 }
1488
e2a75f88
AD
1489 r = amdgpu_device_parse_gpu_info_fw(adev);
1490 if (r)
1491 return r;
1492
1884734a 1493 amdgpu_amdkfd_device_probe(adev);
1494
3149d9da
XY
1495 if (amdgpu_sriov_vf(adev)) {
1496 r = amdgpu_virt_request_full_gpu(adev, true);
1497 if (r)
5ffa61c1 1498 return -EAGAIN;
3149d9da
XY
1499 }
1500
00f54b97
HR
1501 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1502
d38ceaf9
AD
1503 for (i = 0; i < adev->num_ip_blocks; i++) {
1504 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1505 DRM_ERROR("disabled ip block: %d <%s>\n",
1506 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1507 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1508 } else {
a1255107
AD
1509 if (adev->ip_blocks[i].version->funcs->early_init) {
1510 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1511 if (r == -ENOENT) {
a1255107 1512 adev->ip_blocks[i].status.valid = false;
2c1a2784 1513 } else if (r) {
a1255107
AD
1514 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1515 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1516 return r;
2c1a2784 1517 } else {
a1255107 1518 adev->ip_blocks[i].status.valid = true;
2c1a2784 1519 }
974e6b64 1520 } else {
a1255107 1521 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1522 }
d38ceaf9
AD
1523 }
1524 }
1525
395d1fb9
NH
1526 adev->cg_flags &= amdgpu_cg_mask;
1527 adev->pg_flags &= amdgpu_pg_mask;
1528
d38ceaf9
AD
1529 return 0;
1530}
1531
0a4f2520
RZ
1532static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1533{
1534 int i, r;
1535
1536 for (i = 0; i < adev->num_ip_blocks; i++) {
1537 if (!adev->ip_blocks[i].status.sw)
1538 continue;
1539 if (adev->ip_blocks[i].status.hw)
1540 continue;
1541 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1542 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1543 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1544 if (r) {
1545 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1546 adev->ip_blocks[i].version->funcs->name, r);
1547 return r;
1548 }
1549 adev->ip_blocks[i].status.hw = true;
1550 }
1551 }
1552
1553 return 0;
1554}
1555
1556static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1557{
1558 int i, r;
1559
1560 for (i = 0; i < adev->num_ip_blocks; i++) {
1561 if (!adev->ip_blocks[i].status.sw)
1562 continue;
1563 if (adev->ip_blocks[i].status.hw)
1564 continue;
1565 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1566 if (r) {
1567 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1568 adev->ip_blocks[i].version->funcs->name, r);
1569 return r;
1570 }
1571 adev->ip_blocks[i].status.hw = true;
1572 }
1573
1574 return 0;
1575}
1576
7a3e0bb2
RZ
1577static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1578{
1579 int r = 0;
1580 int i;
1581
1582 if (adev->asic_type >= CHIP_VEGA10) {
1583 for (i = 0; i < adev->num_ip_blocks; i++) {
1584 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1585 if (adev->in_gpu_reset || adev->in_suspend) {
1586 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1587 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1588 r = adev->ip_blocks[i].version->funcs->resume(adev);
1589 if (r) {
1590 DRM_ERROR("resume of IP block <%s> failed %d\n",
1591 adev->ip_blocks[i].version->funcs->name, r);
1592 return r;
1593 }
1594 } else {
1595 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1596 if (r) {
1597 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1598 adev->ip_blocks[i].version->funcs->name, r);
1599 return r;
1600 }
1601 }
1602 adev->ip_blocks[i].status.hw = true;
1603 }
1604 }
1605 }
1606
91eec27e 1607 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1608 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1609 if (r) {
1610 pr_err("firmware loading failed\n");
1611 return r;
1612 }
1613 }
1614
1615 return 0;
1616}
1617
e3ecdffa
AD
1618/**
1619 * amdgpu_device_ip_init - run init for hardware IPs
1620 *
1621 * @adev: amdgpu_device pointer
1622 *
1623 * Main initialization pass for hardware IPs. The list of all the hardware
1624 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1625 * are run. sw_init initializes the software state associated with each IP
1626 * and hw_init initializes the hardware associated with each IP.
1627 * Returns 0 on success, negative error code on failure.
1628 */
06ec9070 1629static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1630{
1631 int i, r;
1632
1633 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1634 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1635 continue;
a1255107 1636 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1637 if (r) {
a1255107
AD
1638 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1639 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1640 return r;
2c1a2784 1641 }
a1255107 1642 adev->ip_blocks[i].status.sw = true;
bfca0289 1643
d38ceaf9 1644 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1645 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1646 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1647 if (r) {
1648 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1649 return r;
2c1a2784 1650 }
a1255107 1651 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1652 if (r) {
1653 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1654 return r;
2c1a2784 1655 }
06ec9070 1656 r = amdgpu_device_wb_init(adev);
2c1a2784 1657 if (r) {
06ec9070 1658 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1659 return r;
2c1a2784 1660 }
a1255107 1661 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1662
1663 /* right after GMC hw init, we create CSA */
1664 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1665 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1666 AMDGPU_GEM_DOMAIN_VRAM,
1667 AMDGPU_CSA_SIZE);
2493664f
ML
1668 if (r) {
1669 DRM_ERROR("allocate CSA failed %d\n", r);
1670 return r;
1671 }
1672 }
d38ceaf9
AD
1673 }
1674 }
1675
c8963ea4
RZ
1676 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1677 if (r)
1678 return r;
0a4f2520
RZ
1679
1680 r = amdgpu_device_ip_hw_init_phase1(adev);
1681 if (r)
1682 return r;
1683
7a3e0bb2
RZ
1684 r = amdgpu_device_fw_loading(adev);
1685 if (r)
1686 return r;
1687
0a4f2520
RZ
1688 r = amdgpu_device_ip_hw_init_phase2(adev);
1689 if (r)
1690 return r;
d38ceaf9 1691
3e2e2ab5
HZ
1692 if (adev->gmc.xgmi.num_physical_nodes > 1)
1693 amdgpu_xgmi_add_device(adev);
1884734a 1694 amdgpu_amdkfd_device_init(adev);
c6332b97 1695
1696 if (amdgpu_sriov_vf(adev))
1697 amdgpu_virt_release_full_gpu(adev, true);
1698
d38ceaf9
AD
1699 return 0;
1700}
1701
e3ecdffa
AD
1702/**
1703 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1704 *
1705 * @adev: amdgpu_device pointer
1706 *
1707 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1708 * this function before a GPU reset. If the value is retained after a
1709 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1710 */
06ec9070 1711static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1712{
1713 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1714}
1715
e3ecdffa
AD
1716/**
1717 * amdgpu_device_check_vram_lost - check if vram is valid
1718 *
1719 * @adev: amdgpu_device pointer
1720 *
1721 * Checks the reset magic value written to the gart pointer in VRAM.
1722 * The driver calls this after a GPU reset to see if the contents of
1723 * VRAM is lost or now.
1724 * returns true if vram is lost, false if not.
1725 */
06ec9070 1726static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1727{
1728 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1729 AMDGPU_RESET_MAGIC_NUM);
1730}
1731
e3ecdffa 1732/**
1112a46b 1733 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1734 *
1735 * @adev: amdgpu_device pointer
1736 *
e3ecdffa 1737 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1738 * set_clockgating_state callbacks are run.
1739 * Late initialization pass enabling clockgating for hardware IPs.
1740 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1741 * Returns 0 on success, negative error code on failure.
1742 */
fdd34271 1743
1112a46b
RZ
1744static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1745 enum amd_clockgating_state state)
d38ceaf9 1746{
1112a46b 1747 int i, j, r;
d38ceaf9 1748
4a2ba394
SL
1749 if (amdgpu_emu_mode == 1)
1750 return 0;
1751
1112a46b
RZ
1752 for (j = 0; j < adev->num_ip_blocks; j++) {
1753 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1754 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1755 continue;
4a446d55 1756 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1757 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1758 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1760 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1761 /* enable clockgating to save power */
a1255107 1762 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1763 state);
4a446d55
AD
1764 if (r) {
1765 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1766 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1767 return r;
1768 }
b0b00ff1 1769 }
d38ceaf9 1770 }
06b18f61 1771
c9f96fd5
RZ
1772 return 0;
1773}
1774
1112a46b 1775static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1776{
1112a46b 1777 int i, j, r;
06b18f61 1778
c9f96fd5
RZ
1779 if (amdgpu_emu_mode == 1)
1780 return 0;
1781
1112a46b
RZ
1782 for (j = 0; j < adev->num_ip_blocks; j++) {
1783 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1784 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1785 continue;
1786 /* skip CG for VCE/UVD, it's handled specially */
1787 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1788 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1789 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1790 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1791 /* enable powergating to save power */
1792 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1793 state);
c9f96fd5
RZ
1794 if (r) {
1795 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1796 adev->ip_blocks[i].version->funcs->name, r);
1797 return r;
1798 }
1799 }
1800 }
2dc80b00
S
1801 return 0;
1802}
1803
e3ecdffa
AD
1804/**
1805 * amdgpu_device_ip_late_init - run late init for hardware IPs
1806 *
1807 * @adev: amdgpu_device pointer
1808 *
1809 * Late initialization pass for hardware IPs. The list of all the hardware
1810 * IPs that make up the asic is walked and the late_init callbacks are run.
1811 * late_init covers any special initialization that an IP requires
1812 * after all of the have been initialized or something that needs to happen
1813 * late in the init process.
1814 * Returns 0 on success, negative error code on failure.
1815 */
06ec9070 1816static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1817{
1818 int i = 0, r;
1819
1820 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1821 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1822 continue;
1823 if (adev->ip_blocks[i].version->funcs->late_init) {
1824 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1825 if (r) {
1826 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1827 adev->ip_blocks[i].version->funcs->name, r);
1828 return r;
1829 }
2dc80b00 1830 }
73f847db 1831 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1832 }
1833
1112a46b
RZ
1834 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1835 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1836
2c773de2
S
1837 queue_delayed_work(system_wq, &adev->late_init_work,
1838 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1839
06ec9070 1840 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1841
1842 return 0;
1843}
1844
e3ecdffa
AD
1845/**
1846 * amdgpu_device_ip_fini - run fini for hardware IPs
1847 *
1848 * @adev: amdgpu_device pointer
1849 *
1850 * Main teardown pass for hardware IPs. The list of all the hardware
1851 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1852 * are run. hw_fini tears down the hardware associated with each IP
1853 * and sw_fini tears down any software state associated with each IP.
1854 * Returns 0 on success, negative error code on failure.
1855 */
06ec9070 1856static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1857{
1858 int i, r;
1859
1884734a 1860 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1861
1862 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1863 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1864
3e96dbfd
AD
1865 /* need to disable SMC first */
1866 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1867 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1868 continue;
fdd34271 1869 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1870 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1871 /* XXX handle errors */
1872 if (r) {
1873 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1874 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1875 }
a1255107 1876 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1877 break;
1878 }
1879 }
1880
d38ceaf9 1881 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1882 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1883 continue;
8201a67a 1884
a1255107 1885 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1886 /* XXX handle errors */
2c1a2784 1887 if (r) {
a1255107
AD
1888 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1889 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1890 }
8201a67a 1891
a1255107 1892 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1893 }
1894
9950cda2 1895
d38ceaf9 1896 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1897 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1898 continue;
c12aba3a
ML
1899
1900 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1901 amdgpu_ucode_free_bo(adev);
1e256e27 1902 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1903 amdgpu_device_wb_fini(adev);
1904 amdgpu_device_vram_scratch_fini(adev);
1905 }
1906
a1255107 1907 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1908 /* XXX handle errors */
2c1a2784 1909 if (r) {
a1255107
AD
1910 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1911 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1912 }
a1255107
AD
1913 adev->ip_blocks[i].status.sw = false;
1914 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1915 }
1916
a6dcfd9c 1917 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1918 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1919 continue;
a1255107
AD
1920 if (adev->ip_blocks[i].version->funcs->late_fini)
1921 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1922 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1923 }
1924
030308fc 1925 if (amdgpu_sriov_vf(adev))
24136135
ML
1926 if (amdgpu_virt_release_full_gpu(adev, false))
1927 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1928
d38ceaf9
AD
1929 return 0;
1930}
1931
b55c9e7a
EQ
1932static int amdgpu_device_enable_mgpu_fan_boost(void)
1933{
1934 struct amdgpu_gpu_instance *gpu_ins;
1935 struct amdgpu_device *adev;
1936 int i, ret = 0;
1937
1938 mutex_lock(&mgpu_info.mutex);
1939
1940 /*
1941 * MGPU fan boost feature should be enabled
1942 * only when there are two or more dGPUs in
1943 * the system
1944 */
1945 if (mgpu_info.num_dgpu < 2)
1946 goto out;
1947
1948 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1949 gpu_ins = &(mgpu_info.gpu_ins[i]);
1950 adev = gpu_ins->adev;
1951 if (!(adev->flags & AMD_IS_APU) &&
1952 !gpu_ins->mgpu_fan_enabled &&
1953 adev->powerplay.pp_funcs &&
1954 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1955 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1956 if (ret)
1957 break;
1958
1959 gpu_ins->mgpu_fan_enabled = 1;
1960 }
1961 }
1962
1963out:
1964 mutex_unlock(&mgpu_info.mutex);
1965
1966 return ret;
1967}
1968
e3ecdffa 1969/**
1112a46b 1970 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1971 *
1112a46b 1972 * @work: work_struct.
e3ecdffa 1973 */
06ec9070 1974static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1975{
1976 struct amdgpu_device *adev =
1977 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1978 int r;
1979
1980 r = amdgpu_ib_ring_tests(adev);
1981 if (r)
1982 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1983
1984 r = amdgpu_device_enable_mgpu_fan_boost();
1985 if (r)
1986 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
1987}
1988
1e317b99
RZ
1989static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
1990{
1991 struct amdgpu_device *adev =
1992 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
1993
1994 mutex_lock(&adev->gfx.gfx_off_mutex);
1995 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
1996 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
1997 adev->gfx.gfx_off_state = true;
1998 }
1999 mutex_unlock(&adev->gfx.gfx_off_mutex);
2000}
2001
e3ecdffa 2002/**
e7854a03 2003 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2004 *
2005 * @adev: amdgpu_device pointer
2006 *
2007 * Main suspend function for hardware IPs. The list of all the hardware
2008 * IPs that make up the asic is walked, clockgating is disabled and the
2009 * suspend callbacks are run. suspend puts the hardware and software state
2010 * in each IP into a state suitable for suspend.
2011 * Returns 0 on success, negative error code on failure.
2012 */
e7854a03
AD
2013static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2014{
2015 int i, r;
2016
05df1f01 2017 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2018 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2019
e7854a03
AD
2020 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2021 if (!adev->ip_blocks[i].status.valid)
2022 continue;
2023 /* displays are handled separately */
2024 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2025 /* XXX handle errors */
2026 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2027 /* XXX handle errors */
2028 if (r) {
2029 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2030 adev->ip_blocks[i].version->funcs->name, r);
2031 }
2032 }
2033 }
2034
e7854a03
AD
2035 return 0;
2036}
2037
2038/**
2039 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2040 *
2041 * @adev: amdgpu_device pointer
2042 *
2043 * Main suspend function for hardware IPs. The list of all the hardware
2044 * IPs that make up the asic is walked, clockgating is disabled and the
2045 * suspend callbacks are run. suspend puts the hardware and software state
2046 * in each IP into a state suitable for suspend.
2047 * Returns 0 on success, negative error code on failure.
2048 */
2049static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2050{
2051 int i, r;
2052
2053 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2054 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2055 continue;
e7854a03
AD
2056 /* displays are handled in phase1 */
2057 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2058 continue;
d38ceaf9 2059 /* XXX handle errors */
a1255107 2060 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2061 /* XXX handle errors */
2c1a2784 2062 if (r) {
a1255107
AD
2063 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2064 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2065 }
d38ceaf9
AD
2066 }
2067
2068 return 0;
2069}
2070
e7854a03
AD
2071/**
2072 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2073 *
2074 * @adev: amdgpu_device pointer
2075 *
2076 * Main suspend function for hardware IPs. The list of all the hardware
2077 * IPs that make up the asic is walked, clockgating is disabled and the
2078 * suspend callbacks are run. suspend puts the hardware and software state
2079 * in each IP into a state suitable for suspend.
2080 * Returns 0 on success, negative error code on failure.
2081 */
2082int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2083{
2084 int r;
2085
e7819644
YT
2086 if (amdgpu_sriov_vf(adev))
2087 amdgpu_virt_request_full_gpu(adev, false);
2088
e7854a03
AD
2089 r = amdgpu_device_ip_suspend_phase1(adev);
2090 if (r)
2091 return r;
2092 r = amdgpu_device_ip_suspend_phase2(adev);
2093
e7819644
YT
2094 if (amdgpu_sriov_vf(adev))
2095 amdgpu_virt_release_full_gpu(adev, false);
2096
e7854a03
AD
2097 return r;
2098}
2099
06ec9070 2100static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2101{
2102 int i, r;
2103
2cb681b6
ML
2104 static enum amd_ip_block_type ip_order[] = {
2105 AMD_IP_BLOCK_TYPE_GMC,
2106 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2107 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2108 AMD_IP_BLOCK_TYPE_IH,
2109 };
a90ad3c2 2110
2cb681b6
ML
2111 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2112 int j;
2113 struct amdgpu_ip_block *block;
a90ad3c2 2114
2cb681b6
ML
2115 for (j = 0; j < adev->num_ip_blocks; j++) {
2116 block = &adev->ip_blocks[j];
2117
2118 if (block->version->type != ip_order[i] ||
2119 !block->status.valid)
2120 continue;
2121
2122 r = block->version->funcs->hw_init(adev);
3f48c681 2123 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2124 if (r)
2125 return r;
a90ad3c2
ML
2126 }
2127 }
2128
2129 return 0;
2130}
2131
06ec9070 2132static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2133{
2134 int i, r;
2135
2cb681b6
ML
2136 static enum amd_ip_block_type ip_order[] = {
2137 AMD_IP_BLOCK_TYPE_SMC,
2138 AMD_IP_BLOCK_TYPE_DCE,
2139 AMD_IP_BLOCK_TYPE_GFX,
2140 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2141 AMD_IP_BLOCK_TYPE_UVD,
2142 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2143 };
a90ad3c2 2144
2cb681b6
ML
2145 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2146 int j;
2147 struct amdgpu_ip_block *block;
a90ad3c2 2148
2cb681b6
ML
2149 for (j = 0; j < adev->num_ip_blocks; j++) {
2150 block = &adev->ip_blocks[j];
2151
2152 if (block->version->type != ip_order[i] ||
2153 !block->status.valid)
2154 continue;
2155
2156 r = block->version->funcs->hw_init(adev);
3f48c681 2157 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2158 if (r)
2159 return r;
a90ad3c2
ML
2160 }
2161 }
2162
2163 return 0;
2164}
2165
e3ecdffa
AD
2166/**
2167 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2168 *
2169 * @adev: amdgpu_device pointer
2170 *
2171 * First resume function for hardware IPs. The list of all the hardware
2172 * IPs that make up the asic is walked and the resume callbacks are run for
2173 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2174 * after a suspend and updates the software state as necessary. This
2175 * function is also used for restoring the GPU after a GPU reset.
2176 * Returns 0 on success, negative error code on failure.
2177 */
06ec9070 2178static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2179{
2180 int i, r;
2181
a90ad3c2
ML
2182 for (i = 0; i < adev->num_ip_blocks; i++) {
2183 if (!adev->ip_blocks[i].status.valid)
2184 continue;
a90ad3c2 2185 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2186 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2187 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2188 r = adev->ip_blocks[i].version->funcs->resume(adev);
2189 if (r) {
2190 DRM_ERROR("resume of IP block <%s> failed %d\n",
2191 adev->ip_blocks[i].version->funcs->name, r);
2192 return r;
2193 }
a90ad3c2
ML
2194 }
2195 }
2196
2197 return 0;
2198}
2199
e3ecdffa
AD
2200/**
2201 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2202 *
2203 * @adev: amdgpu_device pointer
2204 *
2205 * First resume function for hardware IPs. The list of all the hardware
2206 * IPs that make up the asic is walked and the resume callbacks are run for
2207 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2208 * functional state after a suspend and updates the software state as
2209 * necessary. This function is also used for restoring the GPU after a GPU
2210 * reset.
2211 * Returns 0 on success, negative error code on failure.
2212 */
06ec9070 2213static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2214{
2215 int i, r;
2216
2217 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2218 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2219 continue;
fcf0649f 2220 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2221 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2222 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2223 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2224 continue;
a1255107 2225 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2226 if (r) {
a1255107
AD
2227 DRM_ERROR("resume of IP block <%s> failed %d\n",
2228 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2229 return r;
2c1a2784 2230 }
d38ceaf9
AD
2231 }
2232
2233 return 0;
2234}
2235
e3ecdffa
AD
2236/**
2237 * amdgpu_device_ip_resume - run resume for hardware IPs
2238 *
2239 * @adev: amdgpu_device pointer
2240 *
2241 * Main resume function for hardware IPs. The hardware IPs
2242 * are split into two resume functions because they are
2243 * are also used in in recovering from a GPU reset and some additional
2244 * steps need to be take between them. In this case (S3/S4) they are
2245 * run sequentially.
2246 * Returns 0 on success, negative error code on failure.
2247 */
06ec9070 2248static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2249{
2250 int r;
2251
06ec9070 2252 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2253 if (r)
2254 return r;
7a3e0bb2
RZ
2255
2256 r = amdgpu_device_fw_loading(adev);
2257 if (r)
2258 return r;
2259
06ec9070 2260 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2261
2262 return r;
2263}
2264
e3ecdffa
AD
2265/**
2266 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2267 *
2268 * @adev: amdgpu_device pointer
2269 *
2270 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2271 */
4e99a44e 2272static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2273{
6867e1b5
ML
2274 if (amdgpu_sriov_vf(adev)) {
2275 if (adev->is_atom_fw) {
2276 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2277 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2278 } else {
2279 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2280 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2281 }
2282
2283 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2284 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2285 }
048765ad
AR
2286}
2287
e3ecdffa
AD
2288/**
2289 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2290 *
2291 * @asic_type: AMD asic type
2292 *
2293 * Check if there is DC (new modesetting infrastructre) support for an asic.
2294 * returns true if DC has support, false if not.
2295 */
4562236b
HW
2296bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2297{
2298 switch (asic_type) {
2299#if defined(CONFIG_DRM_AMD_DC)
2300 case CHIP_BONAIRE:
0d6fbccb 2301 case CHIP_KAVERI:
367e6687
AD
2302 case CHIP_KABINI:
2303 case CHIP_MULLINS:
d9fda248
HW
2304 /*
2305 * We have systems in the wild with these ASICs that require
2306 * LVDS and VGA support which is not supported with DC.
2307 *
2308 * Fallback to the non-DC driver here by default so as not to
2309 * cause regressions.
2310 */
2311 return amdgpu_dc > 0;
2312 case CHIP_HAWAII:
4562236b
HW
2313 case CHIP_CARRIZO:
2314 case CHIP_STONEY:
4562236b 2315 case CHIP_POLARIS10:
675fd32b 2316 case CHIP_POLARIS11:
2c8ad2d5 2317 case CHIP_POLARIS12:
675fd32b 2318 case CHIP_VEGAM:
4562236b
HW
2319 case CHIP_TONGA:
2320 case CHIP_FIJI:
42f8ffa1 2321 case CHIP_VEGA10:
dca7b401 2322 case CHIP_VEGA12:
c6034aa2 2323 case CHIP_VEGA20:
dc37a9a0 2324#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2325 case CHIP_RAVEN:
42f8ffa1 2326#endif
fd187853 2327 return amdgpu_dc != 0;
4562236b
HW
2328#endif
2329 default:
2330 return false;
2331 }
2332}
2333
2334/**
2335 * amdgpu_device_has_dc_support - check if dc is supported
2336 *
2337 * @adev: amdgpu_device_pointer
2338 *
2339 * Returns true for supported, false for not supported
2340 */
2341bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2342{
2555039d
XY
2343 if (amdgpu_sriov_vf(adev))
2344 return false;
2345
4562236b
HW
2346 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2347}
2348
d38ceaf9
AD
2349/**
2350 * amdgpu_device_init - initialize the driver
2351 *
2352 * @adev: amdgpu_device pointer
87e3f136 2353 * @ddev: drm dev pointer
d38ceaf9
AD
2354 * @pdev: pci dev pointer
2355 * @flags: driver flags
2356 *
2357 * Initializes the driver info and hw (all asics).
2358 * Returns 0 for success or an error on failure.
2359 * Called at driver startup.
2360 */
2361int amdgpu_device_init(struct amdgpu_device *adev,
2362 struct drm_device *ddev,
2363 struct pci_dev *pdev,
2364 uint32_t flags)
2365{
2366 int r, i;
2367 bool runtime = false;
95844d20 2368 u32 max_MBps;
d38ceaf9
AD
2369
2370 adev->shutdown = false;
2371 adev->dev = &pdev->dev;
2372 adev->ddev = ddev;
2373 adev->pdev = pdev;
2374 adev->flags = flags;
2f7d10b3 2375 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2376 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2377 if (amdgpu_emu_mode == 1)
2378 adev->usec_timeout *= 2;
770d13b1 2379 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2380 adev->accel_working = false;
2381 adev->num_rings = 0;
2382 adev->mman.buffer_funcs = NULL;
2383 adev->mman.buffer_funcs_ring = NULL;
2384 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2385 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2386 adev->gmc.gmc_funcs = NULL;
f54d1867 2387 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2388 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2389
2390 adev->smc_rreg = &amdgpu_invalid_rreg;
2391 adev->smc_wreg = &amdgpu_invalid_wreg;
2392 adev->pcie_rreg = &amdgpu_invalid_rreg;
2393 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2394 adev->pciep_rreg = &amdgpu_invalid_rreg;
2395 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2396 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2397 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2398 adev->didt_rreg = &amdgpu_invalid_rreg;
2399 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2400 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2401 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2402 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2403 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2404
3e39ab90
AD
2405 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2406 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2407 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2408
2409 /* mutex initialization are all done here so we
2410 * can recall function without having locking issues */
d38ceaf9 2411 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2412 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2413 mutex_init(&adev->pm.mutex);
2414 mutex_init(&adev->gfx.gpu_clock_mutex);
2415 mutex_init(&adev->srbm_mutex);
b8866c26 2416 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2417 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2418 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2419 mutex_init(&adev->mn_lock);
e23b74aa 2420 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2421 hash_init(adev->mn_hash);
13a752e3 2422 mutex_init(&adev->lock_reset);
d38ceaf9 2423
06ec9070 2424 amdgpu_device_check_arguments(adev);
d38ceaf9 2425
d38ceaf9
AD
2426 spin_lock_init(&adev->mmio_idx_lock);
2427 spin_lock_init(&adev->smc_idx_lock);
2428 spin_lock_init(&adev->pcie_idx_lock);
2429 spin_lock_init(&adev->uvd_ctx_idx_lock);
2430 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2431 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2432 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2433 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2434 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2435
0c4e7fa5
CZ
2436 INIT_LIST_HEAD(&adev->shadow_list);
2437 mutex_init(&adev->shadow_list_lock);
2438
795f2813
AR
2439 INIT_LIST_HEAD(&adev->ring_lru_list);
2440 spin_lock_init(&adev->ring_lru_list_lock);
2441
06ec9070
AD
2442 INIT_DELAYED_WORK(&adev->late_init_work,
2443 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2444 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2445 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2446
d23ee13f 2447 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2448 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2449
0fa49558
AX
2450 /* Registers mapping */
2451 /* TODO: block userspace mapping of io register */
da69c161
KW
2452 if (adev->asic_type >= CHIP_BONAIRE) {
2453 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2454 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2455 } else {
2456 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2457 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2458 }
d38ceaf9 2459
d38ceaf9
AD
2460 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2461 if (adev->rmmio == NULL) {
2462 return -ENOMEM;
2463 }
2464 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2465 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2466
705e519e 2467 /* doorbell bar mapping */
06ec9070 2468 amdgpu_device_doorbell_init(adev);
d38ceaf9
AD
2469
2470 /* io port mapping */
2471 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2472 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2473 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2474 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2475 break;
2476 }
2477 }
2478 if (adev->rio_mem == NULL)
b64a18c5 2479 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2480
5494d864
AD
2481 amdgpu_device_get_pcie_info(adev);
2482
d38ceaf9 2483 /* early init functions */
06ec9070 2484 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2485 if (r)
2486 return r;
2487
2488 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2489 /* this will fail for cards that aren't VGA class devices, just
2490 * ignore it */
06ec9070 2491 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2492
e9bef455 2493 if (amdgpu_device_is_px(ddev))
d38ceaf9 2494 runtime = true;
84c8b22e
LW
2495 if (!pci_is_thunderbolt_attached(adev->pdev))
2496 vga_switcheroo_register_client(adev->pdev,
2497 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2498 if (runtime)
2499 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2500
9475a943
SL
2501 if (amdgpu_emu_mode == 1) {
2502 /* post the asic on emulation mode */
2503 emu_soc_asic_init(adev);
bfca0289 2504 goto fence_driver_init;
9475a943 2505 }
bfca0289 2506
d38ceaf9 2507 /* Read BIOS */
83ba126a
AD
2508 if (!amdgpu_get_bios(adev)) {
2509 r = -EINVAL;
2510 goto failed;
2511 }
f7e9e9fe 2512
d38ceaf9 2513 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2514 if (r) {
2515 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2516 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2517 goto failed;
2c1a2784 2518 }
d38ceaf9 2519
4e99a44e
ML
2520 /* detect if we are with an SRIOV vbios */
2521 amdgpu_device_detect_sriov_bios(adev);
048765ad 2522
d38ceaf9 2523 /* Post card if necessary */
39c640c0 2524 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2525 if (!adev->bios) {
bec86378 2526 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2527 r = -EINVAL;
2528 goto failed;
d38ceaf9 2529 }
bec86378 2530 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2531 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2532 if (r) {
2533 dev_err(adev->dev, "gpu post error!\n");
2534 goto failed;
2535 }
d38ceaf9
AD
2536 }
2537
88b64e95
AD
2538 if (adev->is_atom_fw) {
2539 /* Initialize clocks */
2540 r = amdgpu_atomfirmware_get_clock_info(adev);
2541 if (r) {
2542 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2543 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2544 goto failed;
2545 }
2546 } else {
a5bde2f9
AD
2547 /* Initialize clocks */
2548 r = amdgpu_atombios_get_clock_info(adev);
2549 if (r) {
2550 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2551 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2552 goto failed;
a5bde2f9
AD
2553 }
2554 /* init i2c buses */
4562236b
HW
2555 if (!amdgpu_device_has_dc_support(adev))
2556 amdgpu_atombios_i2c_init(adev);
2c1a2784 2557 }
d38ceaf9 2558
bfca0289 2559fence_driver_init:
d38ceaf9
AD
2560 /* Fence driver */
2561 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2562 if (r) {
2563 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2564 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2565 goto failed;
2c1a2784 2566 }
d38ceaf9
AD
2567
2568 /* init the mode config */
2569 drm_mode_config_init(adev->ddev);
2570
06ec9070 2571 r = amdgpu_device_ip_init(adev);
d38ceaf9 2572 if (r) {
8840a387 2573 /* failed in exclusive mode due to timeout */
2574 if (amdgpu_sriov_vf(adev) &&
2575 !amdgpu_sriov_runtime(adev) &&
2576 amdgpu_virt_mmio_blocked(adev) &&
2577 !amdgpu_virt_wait_reset(adev)) {
2578 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2579 /* Don't send request since VF is inactive. */
2580 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2581 adev->virt.ops = NULL;
8840a387 2582 r = -EAGAIN;
2583 goto failed;
2584 }
06ec9070 2585 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2586 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2587 goto failed;
d38ceaf9
AD
2588 }
2589
2590 adev->accel_working = true;
2591
e59c0205
AX
2592 amdgpu_vm_check_compute_bug(adev);
2593
95844d20
MO
2594 /* Initialize the buffer migration limit. */
2595 if (amdgpu_moverate >= 0)
2596 max_MBps = amdgpu_moverate;
2597 else
2598 max_MBps = 8; /* Allow 8 MB/s. */
2599 /* Get a log2 for easy divisions. */
2600 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2601
d38ceaf9
AD
2602 r = amdgpu_ib_pool_init(adev);
2603 if (r) {
2604 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2605 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2606 goto failed;
d38ceaf9
AD
2607 }
2608
2dc8f81e
HC
2609 if (amdgpu_sriov_vf(adev))
2610 amdgpu_virt_init_data_exchange(adev);
2611
9bc92b9c
ML
2612 amdgpu_fbdev_init(adev);
2613
d2f52ac8
RZ
2614 r = amdgpu_pm_sysfs_init(adev);
2615 if (r)
2616 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2617
75758255 2618 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2619 if (r)
d38ceaf9 2620 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2621
2622 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2623 if (r)
d38ceaf9 2624 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2625
50ab2533 2626 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2627 if (r)
50ab2533 2628 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2629
763efb6c 2630 r = amdgpu_debugfs_init(adev);
db95e218 2631 if (r)
763efb6c 2632 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2633
d38ceaf9
AD
2634 if ((amdgpu_testing & 1)) {
2635 if (adev->accel_working)
2636 amdgpu_test_moves(adev);
2637 else
2638 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2639 }
d38ceaf9
AD
2640 if (amdgpu_benchmarking) {
2641 if (adev->accel_working)
2642 amdgpu_benchmark(adev, amdgpu_benchmarking);
2643 else
2644 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2645 }
2646
2647 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2648 * explicit gating rather than handling it automatically.
2649 */
06ec9070 2650 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2651 if (r) {
06ec9070 2652 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2653 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2654 goto failed;
2c1a2784 2655 }
d38ceaf9
AD
2656
2657 return 0;
83ba126a
AD
2658
2659failed:
89041940 2660 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2661 if (runtime)
2662 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2663
83ba126a 2664 return r;
d38ceaf9
AD
2665}
2666
d38ceaf9
AD
2667/**
2668 * amdgpu_device_fini - tear down the driver
2669 *
2670 * @adev: amdgpu_device pointer
2671 *
2672 * Tear down the driver info (all asics).
2673 * Called at driver shutdown.
2674 */
2675void amdgpu_device_fini(struct amdgpu_device *adev)
2676{
2677 int r;
2678
2679 DRM_INFO("amdgpu: finishing device.\n");
2680 adev->shutdown = true;
e5b03032
ML
2681 /* disable all interrupts */
2682 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2683 if (adev->mode_info.mode_config_initialized){
2684 if (!amdgpu_device_has_dc_support(adev))
2685 drm_crtc_force_disable_all(adev->ddev);
2686 else
2687 drm_atomic_helper_shutdown(adev->ddev);
2688 }
d38ceaf9
AD
2689 amdgpu_ib_pool_fini(adev);
2690 amdgpu_fence_driver_fini(adev);
58e955d9 2691 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2692 amdgpu_fbdev_fini(adev);
06ec9070 2693 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2694 if (adev->firmware.gpu_info_fw) {
2695 release_firmware(adev->firmware.gpu_info_fw);
2696 adev->firmware.gpu_info_fw = NULL;
2697 }
d38ceaf9 2698 adev->accel_working = false;
2dc80b00 2699 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2700 /* free i2c buses */
4562236b
HW
2701 if (!amdgpu_device_has_dc_support(adev))
2702 amdgpu_i2c_fini(adev);
bfca0289
SL
2703
2704 if (amdgpu_emu_mode != 1)
2705 amdgpu_atombios_fini(adev);
2706
d38ceaf9
AD
2707 kfree(adev->bios);
2708 adev->bios = NULL;
84c8b22e
LW
2709 if (!pci_is_thunderbolt_attached(adev->pdev))
2710 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2711 if (adev->flags & AMD_IS_PX)
2712 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2713 vga_client_register(adev->pdev, NULL, NULL, NULL);
2714 if (adev->rio_mem)
2715 pci_iounmap(adev->pdev, adev->rio_mem);
2716 adev->rio_mem = NULL;
2717 iounmap(adev->rmmio);
2718 adev->rmmio = NULL;
06ec9070 2719 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2720 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2721}
2722
2723
2724/*
2725 * Suspend & resume.
2726 */
2727/**
810ddc3a 2728 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2729 *
87e3f136
DP
2730 * @dev: drm dev pointer
2731 * @suspend: suspend state
2732 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2733 *
2734 * Puts the hw in the suspend state (all asics).
2735 * Returns 0 for success or an error on failure.
2736 * Called at driver suspend.
2737 */
810ddc3a 2738int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2739{
2740 struct amdgpu_device *adev;
2741 struct drm_crtc *crtc;
2742 struct drm_connector *connector;
5ceb54c6 2743 int r;
d38ceaf9
AD
2744
2745 if (dev == NULL || dev->dev_private == NULL) {
2746 return -ENODEV;
2747 }
2748
2749 adev = dev->dev_private;
2750
2751 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2752 return 0;
2753
44779b43 2754 adev->in_suspend = true;
d38ceaf9
AD
2755 drm_kms_helper_poll_disable(dev);
2756
5f818173
S
2757 if (fbcon)
2758 amdgpu_fbdev_set_suspend(adev, 1);
2759
a5459475
RZ
2760 cancel_delayed_work_sync(&adev->late_init_work);
2761
4562236b
HW
2762 if (!amdgpu_device_has_dc_support(adev)) {
2763 /* turn off display hw */
2764 drm_modeset_lock_all(dev);
2765 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2766 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2767 }
2768 drm_modeset_unlock_all(dev);
fe1053b7
AD
2769 /* unpin the front buffers and cursors */
2770 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2771 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2772 struct drm_framebuffer *fb = crtc->primary->fb;
2773 struct amdgpu_bo *robj;
2774
2775 if (amdgpu_crtc->cursor_bo) {
2776 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2777 r = amdgpu_bo_reserve(aobj, true);
2778 if (r == 0) {
2779 amdgpu_bo_unpin(aobj);
2780 amdgpu_bo_unreserve(aobj);
2781 }
756e6880 2782 }
756e6880 2783
fe1053b7
AD
2784 if (fb == NULL || fb->obj[0] == NULL) {
2785 continue;
2786 }
2787 robj = gem_to_amdgpu_bo(fb->obj[0]);
2788 /* don't unpin kernel fb objects */
2789 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2790 r = amdgpu_bo_reserve(robj, true);
2791 if (r == 0) {
2792 amdgpu_bo_unpin(robj);
2793 amdgpu_bo_unreserve(robj);
2794 }
d38ceaf9
AD
2795 }
2796 }
2797 }
fe1053b7
AD
2798
2799 amdgpu_amdkfd_suspend(adev);
2800
2801 r = amdgpu_device_ip_suspend_phase1(adev);
2802
d38ceaf9
AD
2803 /* evict vram memory */
2804 amdgpu_bo_evict_vram(adev);
2805
5ceb54c6 2806 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2807
fe1053b7 2808 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2809
a0a71e49
AD
2810 /* evict remaining vram memory
2811 * This second call to evict vram is to evict the gart page table
2812 * using the CPU.
2813 */
d38ceaf9
AD
2814 amdgpu_bo_evict_vram(adev);
2815
2816 pci_save_state(dev->pdev);
2817 if (suspend) {
2818 /* Shut down the device */
2819 pci_disable_device(dev->pdev);
2820 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2821 } else {
2822 r = amdgpu_asic_reset(adev);
2823 if (r)
2824 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2825 }
2826
d38ceaf9
AD
2827 return 0;
2828}
2829
2830/**
810ddc3a 2831 * amdgpu_device_resume - initiate device resume
d38ceaf9 2832 *
87e3f136
DP
2833 * @dev: drm dev pointer
2834 * @resume: resume state
2835 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2836 *
2837 * Bring the hw back to operating state (all asics).
2838 * Returns 0 for success or an error on failure.
2839 * Called at driver resume.
2840 */
810ddc3a 2841int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2842{
2843 struct drm_connector *connector;
2844 struct amdgpu_device *adev = dev->dev_private;
756e6880 2845 struct drm_crtc *crtc;
03161a6e 2846 int r = 0;
d38ceaf9
AD
2847
2848 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2849 return 0;
2850
d38ceaf9
AD
2851 if (resume) {
2852 pci_set_power_state(dev->pdev, PCI_D0);
2853 pci_restore_state(dev->pdev);
74b0b157 2854 r = pci_enable_device(dev->pdev);
03161a6e 2855 if (r)
4d3b9ae5 2856 return r;
d38ceaf9
AD
2857 }
2858
2859 /* post card */
39c640c0 2860 if (amdgpu_device_need_post(adev)) {
74b0b157 2861 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2862 if (r)
2863 DRM_ERROR("amdgpu asic init failed\n");
2864 }
d38ceaf9 2865
06ec9070 2866 r = amdgpu_device_ip_resume(adev);
e6707218 2867 if (r) {
06ec9070 2868 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2869 return r;
e6707218 2870 }
5ceb54c6
AD
2871 amdgpu_fence_driver_resume(adev);
2872
d38ceaf9 2873
06ec9070 2874 r = amdgpu_device_ip_late_init(adev);
03161a6e 2875 if (r)
4d3b9ae5 2876 return r;
d38ceaf9 2877
fe1053b7
AD
2878 if (!amdgpu_device_has_dc_support(adev)) {
2879 /* pin cursors */
2880 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2881 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2882
2883 if (amdgpu_crtc->cursor_bo) {
2884 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2885 r = amdgpu_bo_reserve(aobj, true);
2886 if (r == 0) {
2887 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2888 if (r != 0)
2889 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2890 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2891 amdgpu_bo_unreserve(aobj);
2892 }
756e6880
AD
2893 }
2894 }
2895 }
ba997709
YZ
2896 r = amdgpu_amdkfd_resume(adev);
2897 if (r)
2898 return r;
756e6880 2899
96a5d8d4
LL
2900 /* Make sure IB tests flushed */
2901 flush_delayed_work(&adev->late_init_work);
2902
d38ceaf9
AD
2903 /* blat the mode back in */
2904 if (fbcon) {
4562236b
HW
2905 if (!amdgpu_device_has_dc_support(adev)) {
2906 /* pre DCE11 */
2907 drm_helper_resume_force_mode(dev);
2908
2909 /* turn on display hw */
2910 drm_modeset_lock_all(dev);
2911 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2912 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2913 }
2914 drm_modeset_unlock_all(dev);
d38ceaf9 2915 }
4d3b9ae5 2916 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2917 }
2918
2919 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2920
2921 /*
2922 * Most of the connector probing functions try to acquire runtime pm
2923 * refs to ensure that the GPU is powered on when connector polling is
2924 * performed. Since we're calling this from a runtime PM callback,
2925 * trying to acquire rpm refs will cause us to deadlock.
2926 *
2927 * Since we're guaranteed to be holding the rpm lock, it's safe to
2928 * temporarily disable the rpm helpers so this doesn't deadlock us.
2929 */
2930#ifdef CONFIG_PM
2931 dev->dev->power.disable_depth++;
2932#endif
4562236b
HW
2933 if (!amdgpu_device_has_dc_support(adev))
2934 drm_helper_hpd_irq_event(dev);
2935 else
2936 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2937#ifdef CONFIG_PM
2938 dev->dev->power.disable_depth--;
2939#endif
44779b43
RZ
2940 adev->in_suspend = false;
2941
4d3b9ae5 2942 return 0;
d38ceaf9
AD
2943}
2944
e3ecdffa
AD
2945/**
2946 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2947 *
2948 * @adev: amdgpu_device pointer
2949 *
2950 * The list of all the hardware IPs that make up the asic is walked and
2951 * the check_soft_reset callbacks are run. check_soft_reset determines
2952 * if the asic is still hung or not.
2953 * Returns true if any of the IPs are still in a hung state, false if not.
2954 */
06ec9070 2955static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2956{
2957 int i;
2958 bool asic_hang = false;
2959
f993d628
ML
2960 if (amdgpu_sriov_vf(adev))
2961 return true;
2962
8bc04c29
AD
2963 if (amdgpu_asic_need_full_reset(adev))
2964 return true;
2965
63fbf42f 2966 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2967 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2968 continue;
a1255107
AD
2969 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2970 adev->ip_blocks[i].status.hang =
2971 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2972 if (adev->ip_blocks[i].status.hang) {
2973 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
2974 asic_hang = true;
2975 }
2976 }
2977 return asic_hang;
2978}
2979
e3ecdffa
AD
2980/**
2981 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2982 *
2983 * @adev: amdgpu_device pointer
2984 *
2985 * The list of all the hardware IPs that make up the asic is walked and the
2986 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
2987 * handles any IP specific hardware or software state changes that are
2988 * necessary for a soft reset to succeed.
2989 * Returns 0 on success, negative error code on failure.
2990 */
06ec9070 2991static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
2992{
2993 int i, r = 0;
2994
2995 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2996 if (!adev->ip_blocks[i].status.valid)
d31a501e 2997 continue;
a1255107
AD
2998 if (adev->ip_blocks[i].status.hang &&
2999 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3000 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3001 if (r)
3002 return r;
3003 }
3004 }
3005
3006 return 0;
3007}
3008
e3ecdffa
AD
3009/**
3010 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3011 *
3012 * @adev: amdgpu_device pointer
3013 *
3014 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3015 * reset is necessary to recover.
3016 * Returns true if a full asic reset is required, false if not.
3017 */
06ec9070 3018static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3019{
da146d3b
AD
3020 int i;
3021
8bc04c29
AD
3022 if (amdgpu_asic_need_full_reset(adev))
3023 return true;
3024
da146d3b 3025 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3026 if (!adev->ip_blocks[i].status.valid)
da146d3b 3027 continue;
a1255107
AD
3028 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3029 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3030 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3031 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3032 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3033 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3034 DRM_INFO("Some block need full reset!\n");
3035 return true;
3036 }
3037 }
35d782fe
CZ
3038 }
3039 return false;
3040}
3041
e3ecdffa
AD
3042/**
3043 * amdgpu_device_ip_soft_reset - do a soft reset
3044 *
3045 * @adev: amdgpu_device pointer
3046 *
3047 * The list of all the hardware IPs that make up the asic is walked and the
3048 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3049 * IP specific hardware or software state changes that are necessary to soft
3050 * reset the IP.
3051 * Returns 0 on success, negative error code on failure.
3052 */
06ec9070 3053static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3054{
3055 int i, r = 0;
3056
3057 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3058 if (!adev->ip_blocks[i].status.valid)
35d782fe 3059 continue;
a1255107
AD
3060 if (adev->ip_blocks[i].status.hang &&
3061 adev->ip_blocks[i].version->funcs->soft_reset) {
3062 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3063 if (r)
3064 return r;
3065 }
3066 }
3067
3068 return 0;
3069}
3070
e3ecdffa
AD
3071/**
3072 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3073 *
3074 * @adev: amdgpu_device pointer
3075 *
3076 * The list of all the hardware IPs that make up the asic is walked and the
3077 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3078 * handles any IP specific hardware or software state changes that are
3079 * necessary after the IP has been soft reset.
3080 * Returns 0 on success, negative error code on failure.
3081 */
06ec9070 3082static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3083{
3084 int i, r = 0;
3085
3086 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3087 if (!adev->ip_blocks[i].status.valid)
35d782fe 3088 continue;
a1255107
AD
3089 if (adev->ip_blocks[i].status.hang &&
3090 adev->ip_blocks[i].version->funcs->post_soft_reset)
3091 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3092 if (r)
3093 return r;
3094 }
3095
3096 return 0;
3097}
3098
e3ecdffa 3099/**
c33adbc7 3100 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3101 *
3102 * @adev: amdgpu_device pointer
3103 *
3104 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3105 * restore things like GPUVM page tables after a GPU reset where
3106 * the contents of VRAM might be lost.
403009bf
CK
3107 *
3108 * Returns:
3109 * 0 on success, negative error code on failure.
e3ecdffa 3110 */
c33adbc7 3111static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3112{
c41d1cf6 3113 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3114 struct amdgpu_bo *shadow;
3115 long r = 1, tmo;
c41d1cf6
ML
3116
3117 if (amdgpu_sriov_runtime(adev))
b045d3af 3118 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3119 else
3120 tmo = msecs_to_jiffies(100);
3121
3122 DRM_INFO("recover vram bo from shadow start\n");
3123 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3124 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3125
3126 /* No need to recover an evicted BO */
3127 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3128 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3129 continue;
3130
3131 r = amdgpu_bo_restore_shadow(shadow, &next);
3132 if (r)
3133 break;
3134
c41d1cf6
ML
3135 if (fence) {
3136 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3137 dma_fence_put(fence);
3138 fence = next;
3139 if (r <= 0)
c41d1cf6 3140 break;
403009bf
CK
3141 } else {
3142 fence = next;
c41d1cf6 3143 }
c41d1cf6
ML
3144 }
3145 mutex_unlock(&adev->shadow_list_lock);
3146
403009bf
CK
3147 if (fence)
3148 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3149 dma_fence_put(fence);
3150
403009bf 3151 if (r <= 0 || tmo <= 0) {
c41d1cf6 3152 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3153 return -EIO;
3154 }
c41d1cf6 3155
403009bf
CK
3156 DRM_INFO("recover vram bo from shadow done\n");
3157 return 0;
c41d1cf6
ML
3158}
3159
e3ecdffa 3160/**
06ec9070 3161 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
a90ad3c2
ML
3162 *
3163 * @adev: amdgpu device pointer
a90ad3c2 3164 *
5740682e 3165 * attempt to do soft-reset or full-reset and reinitialize Asic
3f48c681 3166 * return 0 means succeeded otherwise failed
e3ecdffa 3167 */
c41d1cf6 3168static int amdgpu_device_reset(struct amdgpu_device *adev)
a90ad3c2 3169{
5740682e
ML
3170 bool need_full_reset, vram_lost = 0;
3171 int r;
a90ad3c2 3172
06ec9070 3173 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
a90ad3c2 3174
5740682e 3175 if (!need_full_reset) {
06ec9070
AD
3176 amdgpu_device_ip_pre_soft_reset(adev);
3177 r = amdgpu_device_ip_soft_reset(adev);
3178 amdgpu_device_ip_post_soft_reset(adev);
3179 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5740682e
ML
3180 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3181 need_full_reset = true;
3182 }
5740682e 3183 }
a90ad3c2 3184
5740682e 3185 if (need_full_reset) {
cdd61df6 3186 r = amdgpu_device_ip_suspend(adev);
a90ad3c2 3187
5740682e 3188retry:
5740682e 3189 r = amdgpu_asic_reset(adev);
5740682e
ML
3190 /* post card */
3191 amdgpu_atom_asic_init(adev->mode_info.atom_context);
65781c78 3192
5740682e
ML
3193 if (!r) {
3194 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
06ec9070 3195 r = amdgpu_device_ip_resume_phase1(adev);
5740682e
ML
3196 if (r)
3197 goto out;
65781c78 3198
06ec9070 3199 vram_lost = amdgpu_device_check_vram_lost(adev);
5740682e
ML
3200 if (vram_lost) {
3201 DRM_ERROR("VRAM is lost!\n");
3202 atomic_inc(&adev->vram_lost_counter);
3203 }
3204
c1c7ce8f
CK
3205 r = amdgpu_gtt_mgr_recover(
3206 &adev->mman.bdev.man[TTM_PL_TT]);
5740682e
ML
3207 if (r)
3208 goto out;
3209
7a3e0bb2
RZ
3210 r = amdgpu_device_fw_loading(adev);
3211 if (r)
3212 return r;
3213
06ec9070 3214 r = amdgpu_device_ip_resume_phase2(adev);
5740682e
ML
3215 if (r)
3216 goto out;
3217
3218 if (vram_lost)
06ec9070 3219 amdgpu_device_fill_reset_magic(adev);
65781c78 3220 }
5740682e 3221 }
65781c78 3222
5740682e
ML
3223out:
3224 if (!r) {
3225 amdgpu_irq_gpu_reset_resume_helper(adev);
3226 r = amdgpu_ib_ring_tests(adev);
3227 if (r) {
3228 dev_err(adev->dev, "ib ring test failed (%d).\n", r);
cdd61df6 3229 r = amdgpu_device_ip_suspend(adev);
5740682e
ML
3230 need_full_reset = true;
3231 goto retry;
3232 }
3233 }
65781c78 3234
c33adbc7
CK
3235 if (!r)
3236 r = amdgpu_device_recover_vram(adev);
a90ad3c2 3237
5740682e
ML
3238 return r;
3239}
a90ad3c2 3240
e3ecdffa 3241/**
06ec9070 3242 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3243 *
3244 * @adev: amdgpu device pointer
87e3f136 3245 * @from_hypervisor: request from hypervisor
5740682e
ML
3246 *
3247 * do VF FLR and reinitialize Asic
3f48c681 3248 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3249 */
3250static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3251 bool from_hypervisor)
5740682e
ML
3252{
3253 int r;
3254
3255 if (from_hypervisor)
3256 r = amdgpu_virt_request_full_gpu(adev, true);
3257 else
3258 r = amdgpu_virt_reset_gpu(adev);
3259 if (r)
3260 return r;
a90ad3c2
ML
3261
3262 /* Resume IP prior to SMC */
06ec9070 3263 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3264 if (r)
3265 goto error;
a90ad3c2
ML
3266
3267 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3268 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3269
7a3e0bb2
RZ
3270 r = amdgpu_device_fw_loading(adev);
3271 if (r)
3272 return r;
3273
a90ad3c2 3274 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3275 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3276 if (r)
3277 goto error;
a90ad3c2
ML
3278
3279 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3280 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3281
abc34253
ED
3282error:
3283 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3284 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3285 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3286 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3287 }
3288
3289 return r;
3290}
3291
12938fad
CK
3292/**
3293 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3294 *
3295 * @adev: amdgpu device pointer
3296 *
3297 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3298 * a hung GPU.
3299 */
3300bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3301{
3302 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3303 DRM_INFO("Timeout, but no hardware hang detected.\n");
3304 return false;
3305 }
3306
3ba7b418
AG
3307 if (amdgpu_gpu_recovery == 0)
3308 goto disabled;
3309
3310 if (amdgpu_sriov_vf(adev))
3311 return true;
3312
3313 if (amdgpu_gpu_recovery == -1) {
3314 switch (adev->asic_type) {
3315 case CHIP_TOPAZ:
3316 case CHIP_TONGA:
3317 case CHIP_FIJI:
3318 case CHIP_POLARIS10:
3319 case CHIP_POLARIS11:
3320 case CHIP_POLARIS12:
3321 case CHIP_VEGAM:
3322 case CHIP_VEGA20:
3323 case CHIP_VEGA10:
3324 case CHIP_VEGA12:
3325 break;
3326 default:
3327 goto disabled;
3328 }
12938fad
CK
3329 }
3330
3331 return true;
3ba7b418
AG
3332
3333disabled:
3334 DRM_INFO("GPU recovery disabled.\n");
3335 return false;
12938fad
CK
3336}
3337
d38ceaf9 3338/**
5f152b5e 3339 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
d38ceaf9
AD
3340 *
3341 * @adev: amdgpu device pointer
5740682e 3342 * @job: which job trigger hang
d38ceaf9 3343 *
5740682e 3344 * Attempt to reset the GPU if it has hung (all asics).
d38ceaf9
AD
3345 * Returns 0 for success or an error on failure.
3346 */
5f152b5e 3347int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
12938fad 3348 struct amdgpu_job *job)
d38ceaf9 3349{
5740682e 3350 int i, r, resched;
fb140b29 3351
5740682e
ML
3352 dev_info(adev->dev, "GPU reset begin!\n");
3353
13a752e3 3354 mutex_lock(&adev->lock_reset);
d94aed5a 3355 atomic_inc(&adev->gpu_reset_counter);
13a752e3 3356 adev->in_gpu_reset = 1;
d38ceaf9 3357
5c6dd71e
SL
3358 /* Block kfd */
3359 amdgpu_amdkfd_pre_reset(adev);
3360
a3c47d6b
CZ
3361 /* block TTM */
3362 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
71182665 3363
71182665 3364 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3365 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3366 struct amdgpu_ring *ring = adev->rings[i];
3367
51687759 3368 if (!ring || !ring->sched.thread)
0875dc9e 3369 continue;
5740682e 3370
71182665
ML
3371 kthread_park(ring->sched.thread);
3372
734afd4b 3373 if (job && job->base.sched != &ring->sched)
5740682e
ML
3374 continue;
3375
67ccea60 3376 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3377
2f9d4084
ML
3378 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3379 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3380 }
d38ceaf9 3381
5740682e 3382 if (amdgpu_sriov_vf(adev))
c41d1cf6 3383 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5740682e 3384 else
c41d1cf6 3385 r = amdgpu_device_reset(adev);
5740682e 3386
71182665
ML
3387 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3388 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3389
71182665
ML
3390 if (!ring || !ring->sched.thread)
3391 continue;
5740682e 3392
71182665
ML
3393 /* only need recovery sched of the given job's ring
3394 * or all rings (in the case @job is NULL)
3395 * after above amdgpu_reset accomplished
3396 */
3320b8d2 3397 if ((!job || job->base.sched == &ring->sched) && !r)
1b1f42d8 3398 drm_sched_job_recovery(&ring->sched);
5740682e 3399
71182665 3400 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3401 }
3402
bf830604 3403 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3404 drm_helper_resume_force_mode(adev->ddev);
5740682e 3405 }
d38ceaf9
AD
3406
3407 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
5740682e 3408
89041940 3409 if (r) {
d38ceaf9 3410 /* bad news, how to tell it to userspace ? */
5740682e
ML
3411 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3412 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3413 } else {
3f48c681 3414 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
89041940 3415 }
d38ceaf9 3416
5c6dd71e
SL
3417 /*unlock kfd */
3418 amdgpu_amdkfd_post_reset(adev);
89041940 3419 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3420 adev->in_gpu_reset = 0;
3421 mutex_unlock(&adev->lock_reset);
d38ceaf9
AD
3422 return r;
3423}
3424
e3ecdffa
AD
3425/**
3426 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3427 *
3428 * @adev: amdgpu_device pointer
3429 *
3430 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3431 * and lanes) of the slot the device is in. Handles APUs and
3432 * virtualized environments where PCIE config space may not be available.
3433 */
5494d864 3434static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3435{
5d9a6330
AD
3436 struct pci_dev *pdev;
3437 enum pci_bus_speed speed_cap;
3438 enum pcie_link_width link_width;
d0dd7f0c 3439
cd474ba0
AD
3440 if (amdgpu_pcie_gen_cap)
3441 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3442
cd474ba0
AD
3443 if (amdgpu_pcie_lane_cap)
3444 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3445
cd474ba0
AD
3446 /* covers APUs as well */
3447 if (pci_is_root_bus(adev->pdev->bus)) {
3448 if (adev->pm.pcie_gen_mask == 0)
3449 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3450 if (adev->pm.pcie_mlw_mask == 0)
3451 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3452 return;
cd474ba0 3453 }
d0dd7f0c 3454
cd474ba0 3455 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3456 /* asic caps */
3457 pdev = adev->pdev;
3458 speed_cap = pcie_get_speed_cap(pdev);
3459 if (speed_cap == PCI_SPEED_UNKNOWN) {
3460 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3461 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3463 } else {
5d9a6330
AD
3464 if (speed_cap == PCIE_SPEED_16_0GT)
3465 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3469 else if (speed_cap == PCIE_SPEED_8_0GT)
3470 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3471 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3473 else if (speed_cap == PCIE_SPEED_5_0GT)
3474 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3475 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3476 else
3477 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3478 }
3479 /* platform caps */
3480 pdev = adev->ddev->pdev->bus->self;
3481 speed_cap = pcie_get_speed_cap(pdev);
3482 if (speed_cap == PCI_SPEED_UNKNOWN) {
3483 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3485 } else {
3486 if (speed_cap == PCIE_SPEED_16_0GT)
3487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3489 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3490 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3491 else if (speed_cap == PCIE_SPEED_8_0GT)
3492 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3495 else if (speed_cap == PCIE_SPEED_5_0GT)
3496 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3497 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3498 else
3499 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3500
cd474ba0
AD
3501 }
3502 }
3503 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3504 pdev = adev->ddev->pdev->bus->self;
3505 link_width = pcie_get_width_cap(pdev);
3506 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3507 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3508 } else {
3509 switch (link_width) {
3510 case PCIE_LNK_X32:
cd474ba0
AD
3511 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3518 break;
5d9a6330 3519 case PCIE_LNK_X16:
cd474ba0
AD
3520 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3526 break;
5d9a6330 3527 case PCIE_LNK_X12:
cd474ba0
AD
3528 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3533 break;
5d9a6330 3534 case PCIE_LNK_X8:
cd474ba0
AD
3535 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3539 break;
5d9a6330 3540 case PCIE_LNK_X4:
cd474ba0
AD
3541 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3544 break;
5d9a6330 3545 case PCIE_LNK_X2:
cd474ba0
AD
3546 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3547 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3548 break;
5d9a6330 3549 case PCIE_LNK_X1:
cd474ba0
AD
3550 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3551 break;
3552 default:
3553 break;
3554 }
d0dd7f0c
AD
3555 }
3556 }
3557}
d38ceaf9 3558