]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drm/amdgpu: fix typo in function sdma_v4_0_page_resume
[thirdparty/kernel/stable.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
e2a75f88 62MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 63MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 64MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 65MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 66MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 67
2dc80b00
S
68#define AMDGPU_RESUME_MS 2000
69
d38ceaf9 70static const char *amdgpu_asic_name[] = {
da69c161
KW
71 "TAHITI",
72 "PITCAIRN",
73 "VERDE",
74 "OLAND",
75 "HAINAN",
d38ceaf9
AD
76 "BONAIRE",
77 "KAVERI",
78 "KABINI",
79 "HAWAII",
80 "MULLINS",
81 "TOPAZ",
82 "TONGA",
48299f95 83 "FIJI",
d38ceaf9 84 "CARRIZO",
139f4917 85 "STONEY",
2cc0c0b5
FC
86 "POLARIS10",
87 "POLARIS11",
c4642a47 88 "POLARIS12",
48ff108d 89 "VEGAM",
d4196f01 90 "VEGA10",
8fab806a 91 "VEGA12",
956fcddc 92 "VEGA20",
2ca8a5d2 93 "RAVEN",
d38ceaf9
AD
94 "LAST",
95};
96
5494d864
AD
97static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
98
e3ecdffa
AD
99/**
100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
101 *
102 * @dev: drm_device pointer
103 *
104 * Returns true if the device is a dGPU with HG/PX power control,
105 * otherwise return false.
106 */
d38ceaf9
AD
107bool amdgpu_device_is_px(struct drm_device *dev)
108{
109 struct amdgpu_device *adev = dev->dev_private;
110
2f7d10b3 111 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
112 return true;
113 return false;
114}
115
116/*
117 * MMIO register access helper functions.
118 */
e3ecdffa
AD
119/**
120 * amdgpu_mm_rreg - read a memory mapped IO register
121 *
122 * @adev: amdgpu_device pointer
123 * @reg: dword aligned register offset
124 * @acc_flags: access flags which require special behavior
125 *
126 * Returns the 32 bit value from the offset specified.
127 */
d38ceaf9 128uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 129 uint32_t acc_flags)
d38ceaf9 130{
f4b373f4
TSD
131 uint32_t ret;
132
43ca8efa 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 134 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 135
15d72fd7 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
138 else {
139 unsigned long flags;
d38ceaf9
AD
140
141 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 145 }
f4b373f4
TSD
146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
147 return ret;
d38ceaf9
AD
148}
149
421a2a30
ML
150/*
151 * MMIO register read with bytes helper functions
152 * @offset:bytes offset from MMIO start
153 *
154*/
155
e3ecdffa
AD
156/**
157 * amdgpu_mm_rreg8 - read a memory mapped IO register
158 *
159 * @adev: amdgpu_device pointer
160 * @offset: byte aligned register offset
161 *
162 * Returns the 8 bit value from the offset specified.
163 */
421a2a30
ML
164uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
165 if (offset < adev->rmmio_size)
166 return (readb(adev->rmmio + offset));
167 BUG();
168}
169
170/*
171 * MMIO register write with bytes helper functions
172 * @offset:bytes offset from MMIO start
173 * @value: the value want to be written to the register
174 *
175*/
e3ecdffa
AD
176/**
177 * amdgpu_mm_wreg8 - read a memory mapped IO register
178 *
179 * @adev: amdgpu_device pointer
180 * @offset: byte aligned register offset
181 * @value: 8 bit value to write
182 *
183 * Writes the value specified to the offset specified.
184 */
421a2a30
ML
185void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
186 if (offset < adev->rmmio_size)
187 writeb(value, adev->rmmio + offset);
188 else
189 BUG();
190}
191
e3ecdffa
AD
192/**
193 * amdgpu_mm_wreg - write to a memory mapped IO register
194 *
195 * @adev: amdgpu_device pointer
196 * @reg: dword aligned register offset
197 * @v: 32 bit value to write to the register
198 * @acc_flags: access flags which require special behavior
199 *
200 * Writes the value specified to the offset specified.
201 */
d38ceaf9 202void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 203 uint32_t acc_flags)
d38ceaf9 204{
f4b373f4 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 206
47ed4e1c
KW
207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
208 adev->last_mm_index = v;
209 }
210
43ca8efa 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 212 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 213
15d72fd7 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
216 else {
217 unsigned long flags;
218
219 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
223 }
47ed4e1c
KW
224
225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
226 udelay(500);
227 }
d38ceaf9
AD
228}
229
e3ecdffa
AD
230/**
231 * amdgpu_io_rreg - read an IO register
232 *
233 * @adev: amdgpu_device pointer
234 * @reg: dword aligned register offset
235 *
236 * Returns the 32 bit value from the offset specified.
237 */
d38ceaf9
AD
238u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
239{
240 if ((reg * 4) < adev->rio_mem_size)
241 return ioread32(adev->rio_mem + (reg * 4));
242 else {
243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
244 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
245 }
246}
247
e3ecdffa
AD
248/**
249 * amdgpu_io_wreg - write to an IO register
250 *
251 * @adev: amdgpu_device pointer
252 * @reg: dword aligned register offset
253 * @v: 32 bit value to write to the register
254 *
255 * Writes the value specified to the offset specified.
256 */
d38ceaf9
AD
257void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
258{
47ed4e1c
KW
259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
260 adev->last_mm_index = v;
261 }
d38ceaf9
AD
262
263 if ((reg * 4) < adev->rio_mem_size)
264 iowrite32(v, adev->rio_mem + (reg * 4));
265 else {
266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
268 }
47ed4e1c
KW
269
270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
271 udelay(500);
272 }
d38ceaf9
AD
273}
274
275/**
276 * amdgpu_mm_rdoorbell - read a doorbell dword
277 *
278 * @adev: amdgpu_device pointer
279 * @index: doorbell index
280 *
281 * Returns the value in the doorbell aperture at the
282 * requested doorbell index (CIK).
283 */
284u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
285{
286 if (index < adev->doorbell.num_doorbells) {
287 return readl(adev->doorbell.ptr + index);
288 } else {
289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
290 return 0;
291 }
292}
293
294/**
295 * amdgpu_mm_wdoorbell - write a doorbell dword
296 *
297 * @adev: amdgpu_device pointer
298 * @index: doorbell index
299 * @v: value to write
300 *
301 * Writes @v to the doorbell aperture at the
302 * requested doorbell index (CIK).
303 */
304void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
305{
306 if (index < adev->doorbell.num_doorbells) {
307 writel(v, adev->doorbell.ptr + index);
308 } else {
309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
310 }
311}
312
832be404
KW
313/**
314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
315 *
316 * @adev: amdgpu_device pointer
317 * @index: doorbell index
318 *
319 * Returns the value in the doorbell aperture at the
320 * requested doorbell index (VEGA10+).
321 */
322u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
323{
324 if (index < adev->doorbell.num_doorbells) {
325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
326 } else {
327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
328 return 0;
329 }
330}
331
332/**
333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
334 *
335 * @adev: amdgpu_device pointer
336 * @index: doorbell index
337 * @v: value to write
338 *
339 * Writes @v to the doorbell aperture at the
340 * requested doorbell index (VEGA10+).
341 */
342void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
343{
344 if (index < adev->doorbell.num_doorbells) {
345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
346 } else {
347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
348 }
349}
350
d38ceaf9
AD
351/**
352 * amdgpu_invalid_rreg - dummy reg read function
353 *
354 * @adev: amdgpu device pointer
355 * @reg: offset of register
356 *
357 * Dummy register read function. Used for register blocks
358 * that certain asics don't have (all asics).
359 * Returns the value in the register.
360 */
361static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
362{
363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
364 BUG();
365 return 0;
366}
367
368/**
369 * amdgpu_invalid_wreg - dummy reg write function
370 *
371 * @adev: amdgpu device pointer
372 * @reg: offset of register
373 * @v: value to write to the register
374 *
375 * Dummy register read function. Used for register blocks
376 * that certain asics don't have (all asics).
377 */
378static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
379{
380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
381 reg, v);
382 BUG();
383}
384
385/**
386 * amdgpu_block_invalid_rreg - dummy reg read function
387 *
388 * @adev: amdgpu device pointer
389 * @block: offset of instance
390 * @reg: offset of register
391 *
392 * Dummy register read function. Used for register blocks
393 * that certain asics don't have (all asics).
394 * Returns the value in the register.
395 */
396static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
397 uint32_t block, uint32_t reg)
398{
399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
400 reg, block);
401 BUG();
402 return 0;
403}
404
405/**
406 * amdgpu_block_invalid_wreg - dummy reg write function
407 *
408 * @adev: amdgpu device pointer
409 * @block: offset of instance
410 * @reg: offset of register
411 * @v: value to write to the register
412 *
413 * Dummy register read function. Used for register blocks
414 * that certain asics don't have (all asics).
415 */
416static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
417 uint32_t block,
418 uint32_t reg, uint32_t v)
419{
420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
421 reg, block, v);
422 BUG();
423}
424
e3ecdffa
AD
425/**
426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
427 *
428 * @adev: amdgpu device pointer
429 *
430 * Allocates a scratch page of VRAM for use by various things in the
431 * driver.
432 */
06ec9070 433static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 434{
a4a02777
CK
435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
437 &adev->vram_scratch.robj,
438 &adev->vram_scratch.gpu_addr,
439 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
440}
441
e3ecdffa
AD
442/**
443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
444 *
445 * @adev: amdgpu device pointer
446 *
447 * Frees the VRAM scratch page.
448 */
06ec9070 449static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 450{
078af1a3 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
452}
453
454/**
9c3f2b54 455 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
456 *
457 * @adev: amdgpu_device pointer
458 * @registers: pointer to the register array
459 * @array_size: size of the register array
460 *
461 * Programs an array or registers with and and or masks.
462 * This is a helper for setting golden registers.
463 */
9c3f2b54
AD
464void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
465 const u32 *registers,
466 const u32 array_size)
d38ceaf9
AD
467{
468 u32 tmp, reg, and_mask, or_mask;
469 int i;
470
471 if (array_size % 3)
472 return;
473
474 for (i = 0; i < array_size; i +=3) {
475 reg = registers[i + 0];
476 and_mask = registers[i + 1];
477 or_mask = registers[i + 2];
478
479 if (and_mask == 0xffffffff) {
480 tmp = or_mask;
481 } else {
482 tmp = RREG32(reg);
483 tmp &= ~and_mask;
484 tmp |= or_mask;
485 }
486 WREG32(reg, tmp);
487 }
488}
489
e3ecdffa
AD
490/**
491 * amdgpu_device_pci_config_reset - reset the GPU
492 *
493 * @adev: amdgpu_device pointer
494 *
495 * Resets the GPU using the pci config reset sequence.
496 * Only applicable to asics prior to vega10.
497 */
8111c387 498void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
499{
500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
501}
502
503/*
504 * GPU doorbell aperture helpers function.
505 */
506/**
06ec9070 507 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
508 *
509 * @adev: amdgpu_device pointer
510 *
511 * Init doorbell driver information (CIK)
512 * Returns 0 on success, error on failure.
513 */
06ec9070 514static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 515{
705e519e
CK
516 /* No doorbell on SI hardware generation */
517 if (adev->asic_type < CHIP_BONAIRE) {
518 adev->doorbell.base = 0;
519 adev->doorbell.size = 0;
520 adev->doorbell.num_doorbells = 0;
521 adev->doorbell.ptr = NULL;
522 return 0;
523 }
524
d6895ad3
CK
525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
526 return -EINVAL;
527
d38ceaf9
AD
528 /* doorbell bar mapping */
529 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
530 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
531
edf600da 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
d38ceaf9
AD
533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
534 if (adev->doorbell.num_doorbells == 0)
535 return -EINVAL;
536
8972e5d2
CK
537 adev->doorbell.ptr = ioremap(adev->doorbell.base,
538 adev->doorbell.num_doorbells *
539 sizeof(u32));
540 if (adev->doorbell.ptr == NULL)
d38ceaf9 541 return -ENOMEM;
d38ceaf9
AD
542
543 return 0;
544}
545
546/**
06ec9070 547 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
548 *
549 * @adev: amdgpu_device pointer
550 *
551 * Tear down doorbell driver information (CIK)
552 */
06ec9070 553static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
554{
555 iounmap(adev->doorbell.ptr);
556 adev->doorbell.ptr = NULL;
557}
558
22cb0164 559
d38ceaf9
AD
560
561/*
06ec9070 562 * amdgpu_device_wb_*()
455a7bc2 563 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 564 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
565 */
566
567/**
06ec9070 568 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
569 *
570 * @adev: amdgpu_device pointer
571 *
572 * Disables Writeback and frees the Writeback memory (all asics).
573 * Used at driver shutdown.
574 */
06ec9070 575static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
576{
577 if (adev->wb.wb_obj) {
a76ed485
AD
578 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
579 &adev->wb.gpu_addr,
580 (void **)&adev->wb.wb);
d38ceaf9
AD
581 adev->wb.wb_obj = NULL;
582 }
583}
584
585/**
06ec9070 586 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
587 *
588 * @adev: amdgpu_device pointer
589 *
455a7bc2 590 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
591 * Used at driver startup.
592 * Returns 0 on success or an -error on failure.
593 */
06ec9070 594static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
595{
596 int r;
597
598 if (adev->wb.wb_obj == NULL) {
97407b63
AD
599 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
600 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
601 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
602 &adev->wb.wb_obj, &adev->wb.gpu_addr,
603 (void **)&adev->wb.wb);
d38ceaf9
AD
604 if (r) {
605 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
606 return r;
607 }
d38ceaf9
AD
608
609 adev->wb.num_wb = AMDGPU_MAX_WB;
610 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
611
612 /* clear wb memory */
73469585 613 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
614 }
615
616 return 0;
617}
618
619/**
131b4b36 620 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
621 *
622 * @adev: amdgpu_device pointer
623 * @wb: wb index
624 *
625 * Allocate a wb slot for use by the driver (all asics).
626 * Returns 0 on success or -EINVAL on failure.
627 */
131b4b36 628int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
629{
630 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 631
97407b63 632 if (offset < adev->wb.num_wb) {
7014285a 633 __set_bit(offset, adev->wb.used);
63ae07ca 634 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
635 return 0;
636 } else {
637 return -EINVAL;
638 }
639}
640
d38ceaf9 641/**
131b4b36 642 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
643 *
644 * @adev: amdgpu_device pointer
645 * @wb: wb index
646 *
647 * Free a wb slot allocated for use by the driver (all asics)
648 */
131b4b36 649void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 650{
73469585 651 wb >>= 3;
d38ceaf9 652 if (wb < adev->wb.num_wb)
73469585 653 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
654}
655
d6895ad3
CK
656/**
657 * amdgpu_device_resize_fb_bar - try to resize FB BAR
658 *
659 * @adev: amdgpu_device pointer
660 *
661 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
662 * to fail, but if any of the BARs is not accessible after the size we abort
663 * driver loading by returning -ENODEV.
664 */
665int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
666{
770d13b1 667 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 668 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
669 struct pci_bus *root;
670 struct resource *res;
671 unsigned i;
d6895ad3
CK
672 u16 cmd;
673 int r;
674
0c03b912 675 /* Bypass for VF */
676 if (amdgpu_sriov_vf(adev))
677 return 0;
678
31b8adab
CK
679 /* Check if the root BUS has 64bit memory resources */
680 root = adev->pdev->bus;
681 while (root->parent)
682 root = root->parent;
683
684 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 685 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
686 res->start > 0x100000000ull)
687 break;
688 }
689
690 /* Trying to resize is pointless without a root hub window above 4GB */
691 if (!res)
692 return 0;
693
d6895ad3
CK
694 /* Disable memory decoding while we change the BAR addresses and size */
695 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
696 pci_write_config_word(adev->pdev, PCI_COMMAND,
697 cmd & ~PCI_COMMAND_MEMORY);
698
699 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 700 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
701 if (adev->asic_type >= CHIP_BONAIRE)
702 pci_release_resource(adev->pdev, 2);
703
704 pci_release_resource(adev->pdev, 0);
705
706 r = pci_resize_resource(adev->pdev, 0, rbar_size);
707 if (r == -ENOSPC)
708 DRM_INFO("Not enough PCI address space for a large BAR.");
709 else if (r && r != -ENOTSUPP)
710 DRM_ERROR("Problem resizing BAR0 (%d).", r);
711
712 pci_assign_unassigned_bus_resources(adev->pdev->bus);
713
714 /* When the doorbell or fb BAR isn't available we have no chance of
715 * using the device.
716 */
06ec9070 717 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
718 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
719 return -ENODEV;
720
721 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
722
723 return 0;
724}
a05502e5 725
d38ceaf9
AD
726/*
727 * GPU helpers function.
728 */
729/**
39c640c0 730 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
731 *
732 * @adev: amdgpu_device pointer
733 *
c836fec5
JQ
734 * Check if the asic has been initialized (all asics) at driver startup
735 * or post is needed if hw reset is performed.
736 * Returns true if need or false if not.
d38ceaf9 737 */
39c640c0 738bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
739{
740 uint32_t reg;
741
bec86378
ML
742 if (amdgpu_sriov_vf(adev))
743 return false;
744
745 if (amdgpu_passthrough(adev)) {
1da2c326
ML
746 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
747 * some old smc fw still need driver do vPost otherwise gpu hang, while
748 * those smc fw version above 22.15 doesn't have this flaw, so we force
749 * vpost executed for smc version below 22.15
bec86378
ML
750 */
751 if (adev->asic_type == CHIP_FIJI) {
752 int err;
753 uint32_t fw_ver;
754 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
755 /* force vPost if error occured */
756 if (err)
757 return true;
758
759 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
760 if (fw_ver < 0x00160e00)
761 return true;
bec86378 762 }
bec86378 763 }
91fe77eb 764
765 if (adev->has_hw_reset) {
766 adev->has_hw_reset = false;
767 return true;
768 }
769
770 /* bios scratch used on CIK+ */
771 if (adev->asic_type >= CHIP_BONAIRE)
772 return amdgpu_atombios_scratch_need_asic_init(adev);
773
774 /* check MEM_SIZE for older asics */
775 reg = amdgpu_asic_get_config_memsize(adev);
776
777 if ((reg != 0) && (reg != 0xffffffff))
778 return false;
779
780 return true;
bec86378
ML
781}
782
d38ceaf9
AD
783/* if we get transitioned to only one device, take VGA back */
784/**
06ec9070 785 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
786 *
787 * @cookie: amdgpu_device pointer
788 * @state: enable/disable vga decode
789 *
790 * Enable/disable vga decode (all asics).
791 * Returns VGA resource flags.
792 */
06ec9070 793static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
794{
795 struct amdgpu_device *adev = cookie;
796 amdgpu_asic_set_vga_state(adev, state);
797 if (state)
798 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
799 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
800 else
801 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
802}
803
e3ecdffa
AD
804/**
805 * amdgpu_device_check_block_size - validate the vm block size
806 *
807 * @adev: amdgpu_device pointer
808 *
809 * Validates the vm block size specified via module parameter.
810 * The vm block size defines number of bits in page table versus page directory,
811 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
812 * page table and the remaining bits are in the page directory.
813 */
06ec9070 814static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
815{
816 /* defines number of bits in page table versus page directory,
817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
818 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
819 if (amdgpu_vm_block_size == -1)
820 return;
a1adf8be 821
bab4fee7 822 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
823 dev_warn(adev->dev, "VM page table size (%d) too small\n",
824 amdgpu_vm_block_size);
97489129 825 amdgpu_vm_block_size = -1;
a1adf8be 826 }
a1adf8be
CZ
827}
828
e3ecdffa
AD
829/**
830 * amdgpu_device_check_vm_size - validate the vm size
831 *
832 * @adev: amdgpu_device pointer
833 *
834 * Validates the vm size in GB specified via module parameter.
835 * The VM size is the size of the GPU virtual memory space in GB.
836 */
06ec9070 837static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 838{
64dab074
AD
839 /* no need to check the default value */
840 if (amdgpu_vm_size == -1)
841 return;
842
83ca145d
ZJ
843 if (amdgpu_vm_size < 1) {
844 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
845 amdgpu_vm_size);
f3368128 846 amdgpu_vm_size = -1;
83ca145d 847 }
83ca145d
ZJ
848}
849
7951e376
RZ
850static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
851{
852 struct sysinfo si;
853 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
854 uint64_t total_memory;
855 uint64_t dram_size_seven_GB = 0x1B8000000;
856 uint64_t dram_size_three_GB = 0xB8000000;
857
858 if (amdgpu_smu_memory_pool_size == 0)
859 return;
860
861 if (!is_os_64) {
862 DRM_WARN("Not 64-bit OS, feature not supported\n");
863 goto def_value;
864 }
865 si_meminfo(&si);
866 total_memory = (uint64_t)si.totalram * si.mem_unit;
867
868 if ((amdgpu_smu_memory_pool_size == 1) ||
869 (amdgpu_smu_memory_pool_size == 2)) {
870 if (total_memory < dram_size_three_GB)
871 goto def_value1;
872 } else if ((amdgpu_smu_memory_pool_size == 4) ||
873 (amdgpu_smu_memory_pool_size == 8)) {
874 if (total_memory < dram_size_seven_GB)
875 goto def_value1;
876 } else {
877 DRM_WARN("Smu memory pool size not supported\n");
878 goto def_value;
879 }
880 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
881
882 return;
883
884def_value1:
885 DRM_WARN("No enough system memory\n");
886def_value:
887 adev->pm.smu_prv_buffer_size = 0;
888}
889
d38ceaf9 890/**
06ec9070 891 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
892 *
893 * @adev: amdgpu_device pointer
894 *
895 * Validates certain module parameters and updates
896 * the associated values used by the driver (all asics).
897 */
06ec9070 898static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 899{
5b011235
CZ
900 if (amdgpu_sched_jobs < 4) {
901 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
902 amdgpu_sched_jobs);
903 amdgpu_sched_jobs = 4;
76117507 904 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
905 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
906 amdgpu_sched_jobs);
907 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
908 }
d38ceaf9 909
83e74db6 910 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
911 /* gart size must be greater or equal to 32M */
912 dev_warn(adev->dev, "gart size (%d) too small\n",
913 amdgpu_gart_size);
83e74db6 914 amdgpu_gart_size = -1;
d38ceaf9
AD
915 }
916
36d38372 917 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 918 /* gtt size must be greater or equal to 32M */
36d38372
CK
919 dev_warn(adev->dev, "gtt size (%d) too small\n",
920 amdgpu_gtt_size);
921 amdgpu_gtt_size = -1;
d38ceaf9
AD
922 }
923
d07f14be
RH
924 /* valid range is between 4 and 9 inclusive */
925 if (amdgpu_vm_fragment_size != -1 &&
926 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
927 dev_warn(adev->dev, "valid range is between 4 and 9\n");
928 amdgpu_vm_fragment_size = -1;
929 }
930
7951e376
RZ
931 amdgpu_device_check_smu_prv_buffer_size(adev);
932
06ec9070 933 amdgpu_device_check_vm_size(adev);
d38ceaf9 934
06ec9070 935 amdgpu_device_check_block_size(adev);
6a7f76e7 936
526bae37 937 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 938 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
939 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
940 amdgpu_vram_page_split);
941 amdgpu_vram_page_split = 1024;
942 }
8854695a
AG
943
944 if (amdgpu_lockup_timeout == 0) {
945 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
946 amdgpu_lockup_timeout = 10000;
947 }
19aede77
AD
948
949 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
950}
951
952/**
953 * amdgpu_switcheroo_set_state - set switcheroo state
954 *
955 * @pdev: pci dev pointer
1694467b 956 * @state: vga_switcheroo state
d38ceaf9
AD
957 *
958 * Callback for the switcheroo driver. Suspends or resumes the
959 * the asics before or after it is powered up using ACPI methods.
960 */
961static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
962{
963 struct drm_device *dev = pci_get_drvdata(pdev);
964
965 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
966 return;
967
968 if (state == VGA_SWITCHEROO_ON) {
7ca85295 969 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
970 /* don't suspend or resume card normally */
971 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
972
810ddc3a 973 amdgpu_device_resume(dev, true, true);
d38ceaf9 974
d38ceaf9
AD
975 dev->switch_power_state = DRM_SWITCH_POWER_ON;
976 drm_kms_helper_poll_enable(dev);
977 } else {
7ca85295 978 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
979 drm_kms_helper_poll_disable(dev);
980 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 981 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
982 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
983 }
984}
985
986/**
987 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
988 *
989 * @pdev: pci dev pointer
990 *
991 * Callback for the switcheroo driver. Check of the switcheroo
992 * state can be changed.
993 * Returns true if the state can be changed, false if not.
994 */
995static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
996{
997 struct drm_device *dev = pci_get_drvdata(pdev);
998
999 /*
1000 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1001 * locking inversion with the driver load path. And the access here is
1002 * completely racy anyway. So don't bother with locking for now.
1003 */
1004 return dev->open_count == 0;
1005}
1006
1007static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1008 .set_gpu_state = amdgpu_switcheroo_set_state,
1009 .reprobe = NULL,
1010 .can_switch = amdgpu_switcheroo_can_switch,
1011};
1012
e3ecdffa
AD
1013/**
1014 * amdgpu_device_ip_set_clockgating_state - set the CG state
1015 *
87e3f136 1016 * @dev: amdgpu_device pointer
e3ecdffa
AD
1017 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1018 * @state: clockgating state (gate or ungate)
1019 *
1020 * Sets the requested clockgating state for all instances of
1021 * the hardware IP specified.
1022 * Returns the error code from the last instance.
1023 */
43fa561f 1024int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1025 enum amd_ip_block_type block_type,
1026 enum amd_clockgating_state state)
d38ceaf9 1027{
43fa561f 1028 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1029 int i, r = 0;
1030
1031 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1032 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1033 continue;
c722865a
RZ
1034 if (adev->ip_blocks[i].version->type != block_type)
1035 continue;
1036 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1037 continue;
1038 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1039 (void *)adev, state);
1040 if (r)
1041 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1042 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1043 }
1044 return r;
1045}
1046
e3ecdffa
AD
1047/**
1048 * amdgpu_device_ip_set_powergating_state - set the PG state
1049 *
87e3f136 1050 * @dev: amdgpu_device pointer
e3ecdffa
AD
1051 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1052 * @state: powergating state (gate or ungate)
1053 *
1054 * Sets the requested powergating state for all instances of
1055 * the hardware IP specified.
1056 * Returns the error code from the last instance.
1057 */
43fa561f 1058int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1059 enum amd_ip_block_type block_type,
1060 enum amd_powergating_state state)
d38ceaf9 1061{
43fa561f 1062 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1063 int i, r = 0;
1064
1065 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1066 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1067 continue;
c722865a
RZ
1068 if (adev->ip_blocks[i].version->type != block_type)
1069 continue;
1070 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1071 continue;
1072 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1073 (void *)adev, state);
1074 if (r)
1075 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1076 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1077 }
1078 return r;
1079}
1080
e3ecdffa
AD
1081/**
1082 * amdgpu_device_ip_get_clockgating_state - get the CG state
1083 *
1084 * @adev: amdgpu_device pointer
1085 * @flags: clockgating feature flags
1086 *
1087 * Walks the list of IPs on the device and updates the clockgating
1088 * flags for each IP.
1089 * Updates @flags with the feature flags for each hardware IP where
1090 * clockgating is enabled.
1091 */
2990a1fc
AD
1092void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1093 u32 *flags)
6cb2d4e4
HR
1094{
1095 int i;
1096
1097 for (i = 0; i < adev->num_ip_blocks; i++) {
1098 if (!adev->ip_blocks[i].status.valid)
1099 continue;
1100 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1101 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1102 }
1103}
1104
e3ecdffa
AD
1105/**
1106 * amdgpu_device_ip_wait_for_idle - wait for idle
1107 *
1108 * @adev: amdgpu_device pointer
1109 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1110 *
1111 * Waits for the request hardware IP to be idle.
1112 * Returns 0 for success or a negative error code on failure.
1113 */
2990a1fc
AD
1114int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1115 enum amd_ip_block_type block_type)
5dbbb60b
AD
1116{
1117 int i, r;
1118
1119 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1120 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1121 continue;
a1255107
AD
1122 if (adev->ip_blocks[i].version->type == block_type) {
1123 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1124 if (r)
1125 return r;
1126 break;
1127 }
1128 }
1129 return 0;
1130
1131}
1132
e3ecdffa
AD
1133/**
1134 * amdgpu_device_ip_is_idle - is the hardware IP idle
1135 *
1136 * @adev: amdgpu_device pointer
1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1138 *
1139 * Check if the hardware IP is idle or not.
1140 * Returns true if it the IP is idle, false if not.
1141 */
2990a1fc
AD
1142bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1143 enum amd_ip_block_type block_type)
5dbbb60b
AD
1144{
1145 int i;
1146
1147 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1148 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1149 continue;
a1255107
AD
1150 if (adev->ip_blocks[i].version->type == block_type)
1151 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1152 }
1153 return true;
1154
1155}
1156
e3ecdffa
AD
1157/**
1158 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1159 *
1160 * @adev: amdgpu_device pointer
87e3f136 1161 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1162 *
1163 * Returns a pointer to the hardware IP block structure
1164 * if it exists for the asic, otherwise NULL.
1165 */
2990a1fc
AD
1166struct amdgpu_ip_block *
1167amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1168 enum amd_ip_block_type type)
d38ceaf9
AD
1169{
1170 int i;
1171
1172 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1173 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1174 return &adev->ip_blocks[i];
1175
1176 return NULL;
1177}
1178
1179/**
2990a1fc 1180 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1181 *
1182 * @adev: amdgpu_device pointer
5fc3aeeb 1183 * @type: enum amd_ip_block_type
d38ceaf9
AD
1184 * @major: major version
1185 * @minor: minor version
1186 *
1187 * return 0 if equal or greater
1188 * return 1 if smaller or the ip_block doesn't exist
1189 */
2990a1fc
AD
1190int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1191 enum amd_ip_block_type type,
1192 u32 major, u32 minor)
d38ceaf9 1193{
2990a1fc 1194 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1195
a1255107
AD
1196 if (ip_block && ((ip_block->version->major > major) ||
1197 ((ip_block->version->major == major) &&
1198 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1199 return 0;
1200
1201 return 1;
1202}
1203
a1255107 1204/**
2990a1fc 1205 * amdgpu_device_ip_block_add
a1255107
AD
1206 *
1207 * @adev: amdgpu_device pointer
1208 * @ip_block_version: pointer to the IP to add
1209 *
1210 * Adds the IP block driver information to the collection of IPs
1211 * on the asic.
1212 */
2990a1fc
AD
1213int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1214 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1215{
1216 if (!ip_block_version)
1217 return -EINVAL;
1218
e966a725 1219 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1220 ip_block_version->funcs->name);
1221
a1255107
AD
1222 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1223
1224 return 0;
1225}
1226
e3ecdffa
AD
1227/**
1228 * amdgpu_device_enable_virtual_display - enable virtual display feature
1229 *
1230 * @adev: amdgpu_device pointer
1231 *
1232 * Enabled the virtual display feature if the user has enabled it via
1233 * the module parameter virtual_display. This feature provides a virtual
1234 * display hardware on headless boards or in virtualized environments.
1235 * This function parses and validates the configuration string specified by
1236 * the user and configues the virtual display configuration (number of
1237 * virtual connectors, crtcs, etc.) specified.
1238 */
483ef985 1239static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1240{
1241 adev->enable_virtual_display = false;
1242
1243 if (amdgpu_virtual_display) {
1244 struct drm_device *ddev = adev->ddev;
1245 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1246 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1247
1248 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1249 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1250 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1251 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1252 if (!strcmp("all", pciaddname)
1253 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1254 long num_crtc;
1255 int res = -1;
1256
9accf2fd 1257 adev->enable_virtual_display = true;
0f66356d
ED
1258
1259 if (pciaddname_tmp)
1260 res = kstrtol(pciaddname_tmp, 10,
1261 &num_crtc);
1262
1263 if (!res) {
1264 if (num_crtc < 1)
1265 num_crtc = 1;
1266 if (num_crtc > 6)
1267 num_crtc = 6;
1268 adev->mode_info.num_crtc = num_crtc;
1269 } else {
1270 adev->mode_info.num_crtc = 1;
1271 }
9accf2fd
ED
1272 break;
1273 }
1274 }
1275
0f66356d
ED
1276 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1277 amdgpu_virtual_display, pci_address_name,
1278 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1279
1280 kfree(pciaddstr);
1281 }
1282}
1283
e3ecdffa
AD
1284/**
1285 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1286 *
1287 * @adev: amdgpu_device pointer
1288 *
1289 * Parses the asic configuration parameters specified in the gpu info
1290 * firmware and makes them availale to the driver for use in configuring
1291 * the asic.
1292 * Returns 0 on success, -EINVAL on failure.
1293 */
e2a75f88
AD
1294static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1295{
e2a75f88
AD
1296 const char *chip_name;
1297 char fw_name[30];
1298 int err;
1299 const struct gpu_info_firmware_header_v1_0 *hdr;
1300
ab4fe3e1
HR
1301 adev->firmware.gpu_info_fw = NULL;
1302
e2a75f88
AD
1303 switch (adev->asic_type) {
1304 case CHIP_TOPAZ:
1305 case CHIP_TONGA:
1306 case CHIP_FIJI:
e2a75f88 1307 case CHIP_POLARIS10:
cc07f18d 1308 case CHIP_POLARIS11:
e2a75f88 1309 case CHIP_POLARIS12:
cc07f18d 1310 case CHIP_VEGAM:
e2a75f88
AD
1311 case CHIP_CARRIZO:
1312 case CHIP_STONEY:
1313#ifdef CONFIG_DRM_AMDGPU_SI
1314 case CHIP_VERDE:
1315 case CHIP_TAHITI:
1316 case CHIP_PITCAIRN:
1317 case CHIP_OLAND:
1318 case CHIP_HAINAN:
1319#endif
1320#ifdef CONFIG_DRM_AMDGPU_CIK
1321 case CHIP_BONAIRE:
1322 case CHIP_HAWAII:
1323 case CHIP_KAVERI:
1324 case CHIP_KABINI:
1325 case CHIP_MULLINS:
1326#endif
27c0bc71 1327 case CHIP_VEGA20:
e2a75f88
AD
1328 default:
1329 return 0;
1330 case CHIP_VEGA10:
1331 chip_name = "vega10";
1332 break;
3f76dced
AD
1333 case CHIP_VEGA12:
1334 chip_name = "vega12";
1335 break;
2d2e5e7e 1336 case CHIP_RAVEN:
54c4d17e
FX
1337 if (adev->rev_id >= 8)
1338 chip_name = "raven2";
741deade
AD
1339 else if (adev->pdev->device == 0x15d8)
1340 chip_name = "picasso";
54c4d17e
FX
1341 else
1342 chip_name = "raven";
2d2e5e7e 1343 break;
e2a75f88
AD
1344 }
1345
1346 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1347 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1348 if (err) {
1349 dev_err(adev->dev,
1350 "Failed to load gpu_info firmware \"%s\"\n",
1351 fw_name);
1352 goto out;
1353 }
ab4fe3e1 1354 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1355 if (err) {
1356 dev_err(adev->dev,
1357 "Failed to validate gpu_info firmware \"%s\"\n",
1358 fw_name);
1359 goto out;
1360 }
1361
ab4fe3e1 1362 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1363 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1364
1365 switch (hdr->version_major) {
1366 case 1:
1367 {
1368 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1369 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1370 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1371
b5ab16bf
AD
1372 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1373 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1374 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1375 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1376 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1377 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1378 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1379 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1380 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1381 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1382 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1383 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1384 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1385 adev->gfx.cu_info.max_waves_per_simd =
1386 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1387 adev->gfx.cu_info.max_scratch_slots_per_cu =
1388 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1389 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1390 break;
1391 }
1392 default:
1393 dev_err(adev->dev,
1394 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1395 err = -EINVAL;
1396 goto out;
1397 }
1398out:
e2a75f88
AD
1399 return err;
1400}
1401
e3ecdffa
AD
1402/**
1403 * amdgpu_device_ip_early_init - run early init for hardware IPs
1404 *
1405 * @adev: amdgpu_device pointer
1406 *
1407 * Early initialization pass for hardware IPs. The hardware IPs that make
1408 * up each asic are discovered each IP's early_init callback is run. This
1409 * is the first stage in initializing the asic.
1410 * Returns 0 on success, negative error code on failure.
1411 */
06ec9070 1412static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1413{
aaa36a97 1414 int i, r;
d38ceaf9 1415
483ef985 1416 amdgpu_device_enable_virtual_display(adev);
a6be7570 1417
d38ceaf9 1418 switch (adev->asic_type) {
aaa36a97
AD
1419 case CHIP_TOPAZ:
1420 case CHIP_TONGA:
48299f95 1421 case CHIP_FIJI:
2cc0c0b5 1422 case CHIP_POLARIS10:
32cc7e53 1423 case CHIP_POLARIS11:
c4642a47 1424 case CHIP_POLARIS12:
32cc7e53 1425 case CHIP_VEGAM:
aaa36a97 1426 case CHIP_CARRIZO:
39bb0c92
SL
1427 case CHIP_STONEY:
1428 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1429 adev->family = AMDGPU_FAMILY_CZ;
1430 else
1431 adev->family = AMDGPU_FAMILY_VI;
1432
1433 r = vi_set_ip_blocks(adev);
1434 if (r)
1435 return r;
1436 break;
33f34802
KW
1437#ifdef CONFIG_DRM_AMDGPU_SI
1438 case CHIP_VERDE:
1439 case CHIP_TAHITI:
1440 case CHIP_PITCAIRN:
1441 case CHIP_OLAND:
1442 case CHIP_HAINAN:
295d0daf 1443 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1444 r = si_set_ip_blocks(adev);
1445 if (r)
1446 return r;
1447 break;
1448#endif
a2e73f56
AD
1449#ifdef CONFIG_DRM_AMDGPU_CIK
1450 case CHIP_BONAIRE:
1451 case CHIP_HAWAII:
1452 case CHIP_KAVERI:
1453 case CHIP_KABINI:
1454 case CHIP_MULLINS:
1455 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1456 adev->family = AMDGPU_FAMILY_CI;
1457 else
1458 adev->family = AMDGPU_FAMILY_KV;
1459
1460 r = cik_set_ip_blocks(adev);
1461 if (r)
1462 return r;
1463 break;
1464#endif
e48a3cd9
AD
1465 case CHIP_VEGA10:
1466 case CHIP_VEGA12:
e4bd8170 1467 case CHIP_VEGA20:
e48a3cd9 1468 case CHIP_RAVEN:
741deade 1469 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1470 adev->family = AMDGPU_FAMILY_RV;
1471 else
1472 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1473
1474 r = soc15_set_ip_blocks(adev);
1475 if (r)
1476 return r;
1477 break;
d38ceaf9
AD
1478 default:
1479 /* FIXME: not supported yet */
1480 return -EINVAL;
1481 }
1482
e2a75f88
AD
1483 r = amdgpu_device_parse_gpu_info_fw(adev);
1484 if (r)
1485 return r;
1486
1884734a 1487 amdgpu_amdkfd_device_probe(adev);
1488
3149d9da
XY
1489 if (amdgpu_sriov_vf(adev)) {
1490 r = amdgpu_virt_request_full_gpu(adev, true);
1491 if (r)
5ffa61c1 1492 return -EAGAIN;
3149d9da
XY
1493 }
1494
00f54b97
HR
1495 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1496
d38ceaf9
AD
1497 for (i = 0; i < adev->num_ip_blocks; i++) {
1498 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1499 DRM_ERROR("disabled ip block: %d <%s>\n",
1500 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1501 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1502 } else {
a1255107
AD
1503 if (adev->ip_blocks[i].version->funcs->early_init) {
1504 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1505 if (r == -ENOENT) {
a1255107 1506 adev->ip_blocks[i].status.valid = false;
2c1a2784 1507 } else if (r) {
a1255107
AD
1508 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1509 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1510 return r;
2c1a2784 1511 } else {
a1255107 1512 adev->ip_blocks[i].status.valid = true;
2c1a2784 1513 }
974e6b64 1514 } else {
a1255107 1515 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1516 }
d38ceaf9
AD
1517 }
1518 }
1519
395d1fb9
NH
1520 adev->cg_flags &= amdgpu_cg_mask;
1521 adev->pg_flags &= amdgpu_pg_mask;
1522
d38ceaf9
AD
1523 return 0;
1524}
1525
0a4f2520
RZ
1526static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1527{
1528 int i, r;
1529
1530 for (i = 0; i < adev->num_ip_blocks; i++) {
1531 if (!adev->ip_blocks[i].status.sw)
1532 continue;
1533 if (adev->ip_blocks[i].status.hw)
1534 continue;
1535 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1536 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1537 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1538 if (r) {
1539 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1540 adev->ip_blocks[i].version->funcs->name, r);
1541 return r;
1542 }
1543 adev->ip_blocks[i].status.hw = true;
1544 }
1545 }
1546
1547 return 0;
1548}
1549
1550static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1551{
1552 int i, r;
1553
1554 for (i = 0; i < adev->num_ip_blocks; i++) {
1555 if (!adev->ip_blocks[i].status.sw)
1556 continue;
1557 if (adev->ip_blocks[i].status.hw)
1558 continue;
1559 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1560 if (r) {
1561 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1562 adev->ip_blocks[i].version->funcs->name, r);
1563 return r;
1564 }
1565 adev->ip_blocks[i].status.hw = true;
1566 }
1567
1568 return 0;
1569}
1570
7a3e0bb2
RZ
1571static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1572{
1573 int r = 0;
1574 int i;
1575
1576 if (adev->asic_type >= CHIP_VEGA10) {
1577 for (i = 0; i < adev->num_ip_blocks; i++) {
1578 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1579 if (adev->in_gpu_reset || adev->in_suspend) {
1580 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1581 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1582 r = adev->ip_blocks[i].version->funcs->resume(adev);
1583 if (r) {
1584 DRM_ERROR("resume of IP block <%s> failed %d\n",
1585 adev->ip_blocks[i].version->funcs->name, r);
1586 return r;
1587 }
1588 } else {
1589 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1590 if (r) {
1591 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1592 adev->ip_blocks[i].version->funcs->name, r);
1593 return r;
1594 }
1595 }
1596 adev->ip_blocks[i].status.hw = true;
1597 }
1598 }
1599 }
1600
91eec27e 1601 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1602 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1603 if (r) {
1604 pr_err("firmware loading failed\n");
1605 return r;
1606 }
1607 }
1608
1609 return 0;
1610}
1611
e3ecdffa
AD
1612/**
1613 * amdgpu_device_ip_init - run init for hardware IPs
1614 *
1615 * @adev: amdgpu_device pointer
1616 *
1617 * Main initialization pass for hardware IPs. The list of all the hardware
1618 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1619 * are run. sw_init initializes the software state associated with each IP
1620 * and hw_init initializes the hardware associated with each IP.
1621 * Returns 0 on success, negative error code on failure.
1622 */
06ec9070 1623static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1624{
1625 int i, r;
1626
1627 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1628 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1629 continue;
a1255107 1630 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1631 if (r) {
a1255107
AD
1632 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1633 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1634 return r;
2c1a2784 1635 }
a1255107 1636 adev->ip_blocks[i].status.sw = true;
bfca0289 1637
d38ceaf9 1638 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1639 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1640 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1641 if (r) {
1642 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1643 return r;
2c1a2784 1644 }
a1255107 1645 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1646 if (r) {
1647 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1648 return r;
2c1a2784 1649 }
06ec9070 1650 r = amdgpu_device_wb_init(adev);
2c1a2784 1651 if (r) {
06ec9070 1652 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1653 return r;
2c1a2784 1654 }
a1255107 1655 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1656
1657 /* right after GMC hw init, we create CSA */
1658 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1659 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1660 AMDGPU_GEM_DOMAIN_VRAM,
1661 AMDGPU_CSA_SIZE);
2493664f
ML
1662 if (r) {
1663 DRM_ERROR("allocate CSA failed %d\n", r);
1664 return r;
1665 }
1666 }
d38ceaf9
AD
1667 }
1668 }
1669
c8963ea4
RZ
1670 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1671 if (r)
1672 return r;
0a4f2520
RZ
1673
1674 r = amdgpu_device_ip_hw_init_phase1(adev);
1675 if (r)
1676 return r;
1677
7a3e0bb2
RZ
1678 r = amdgpu_device_fw_loading(adev);
1679 if (r)
1680 return r;
1681
0a4f2520
RZ
1682 r = amdgpu_device_ip_hw_init_phase2(adev);
1683 if (r)
1684 return r;
d38ceaf9 1685
3e2e2ab5
HZ
1686 if (adev->gmc.xgmi.num_physical_nodes > 1)
1687 amdgpu_xgmi_add_device(adev);
1884734a 1688 amdgpu_amdkfd_device_init(adev);
c6332b97 1689
1690 if (amdgpu_sriov_vf(adev))
1691 amdgpu_virt_release_full_gpu(adev, true);
1692
d38ceaf9
AD
1693 return 0;
1694}
1695
e3ecdffa
AD
1696/**
1697 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1698 *
1699 * @adev: amdgpu_device pointer
1700 *
1701 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1702 * this function before a GPU reset. If the value is retained after a
1703 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1704 */
06ec9070 1705static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1706{
1707 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1708}
1709
e3ecdffa
AD
1710/**
1711 * amdgpu_device_check_vram_lost - check if vram is valid
1712 *
1713 * @adev: amdgpu_device pointer
1714 *
1715 * Checks the reset magic value written to the gart pointer in VRAM.
1716 * The driver calls this after a GPU reset to see if the contents of
1717 * VRAM is lost or now.
1718 * returns true if vram is lost, false if not.
1719 */
06ec9070 1720static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1721{
1722 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1723 AMDGPU_RESET_MAGIC_NUM);
1724}
1725
e3ecdffa 1726/**
1112a46b 1727 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1728 *
1729 * @adev: amdgpu_device pointer
1730 *
e3ecdffa 1731 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1732 * set_clockgating_state callbacks are run.
1733 * Late initialization pass enabling clockgating for hardware IPs.
1734 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1735 * Returns 0 on success, negative error code on failure.
1736 */
fdd34271 1737
1112a46b
RZ
1738static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1739 enum amd_clockgating_state state)
d38ceaf9 1740{
1112a46b 1741 int i, j, r;
d38ceaf9 1742
4a2ba394
SL
1743 if (amdgpu_emu_mode == 1)
1744 return 0;
1745
1112a46b
RZ
1746 for (j = 0; j < adev->num_ip_blocks; j++) {
1747 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1748 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1749 continue;
4a446d55 1750 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1751 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1752 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1753 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1754 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1755 /* enable clockgating to save power */
a1255107 1756 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1757 state);
4a446d55
AD
1758 if (r) {
1759 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1760 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1761 return r;
1762 }
b0b00ff1 1763 }
d38ceaf9 1764 }
06b18f61 1765
c9f96fd5
RZ
1766 return 0;
1767}
1768
1112a46b 1769static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1770{
1112a46b 1771 int i, j, r;
06b18f61 1772
c9f96fd5
RZ
1773 if (amdgpu_emu_mode == 1)
1774 return 0;
1775
1112a46b
RZ
1776 for (j = 0; j < adev->num_ip_blocks; j++) {
1777 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1778 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1779 continue;
1780 /* skip CG for VCE/UVD, it's handled specially */
1781 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1782 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1783 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1784 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1785 /* enable powergating to save power */
1786 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1787 state);
c9f96fd5
RZ
1788 if (r) {
1789 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1790 adev->ip_blocks[i].version->funcs->name, r);
1791 return r;
1792 }
1793 }
1794 }
2dc80b00
S
1795 return 0;
1796}
1797
e3ecdffa
AD
1798/**
1799 * amdgpu_device_ip_late_init - run late init for hardware IPs
1800 *
1801 * @adev: amdgpu_device pointer
1802 *
1803 * Late initialization pass for hardware IPs. The list of all the hardware
1804 * IPs that make up the asic is walked and the late_init callbacks are run.
1805 * late_init covers any special initialization that an IP requires
1806 * after all of the have been initialized or something that needs to happen
1807 * late in the init process.
1808 * Returns 0 on success, negative error code on failure.
1809 */
06ec9070 1810static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1811{
1812 int i = 0, r;
1813
1814 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1815 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1816 continue;
1817 if (adev->ip_blocks[i].version->funcs->late_init) {
1818 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1819 if (r) {
1820 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1821 adev->ip_blocks[i].version->funcs->name, r);
1822 return r;
1823 }
2dc80b00 1824 }
73f847db 1825 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1826 }
1827
1112a46b
RZ
1828 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1829 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1830
2c773de2
S
1831 queue_delayed_work(system_wq, &adev->late_init_work,
1832 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1833
06ec9070 1834 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1835
1836 return 0;
1837}
1838
e3ecdffa
AD
1839/**
1840 * amdgpu_device_ip_fini - run fini for hardware IPs
1841 *
1842 * @adev: amdgpu_device pointer
1843 *
1844 * Main teardown pass for hardware IPs. The list of all the hardware
1845 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1846 * are run. hw_fini tears down the hardware associated with each IP
1847 * and sw_fini tears down any software state associated with each IP.
1848 * Returns 0 on success, negative error code on failure.
1849 */
06ec9070 1850static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1851{
1852 int i, r;
1853
1884734a 1854 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1855
1856 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1857 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1858
3e96dbfd
AD
1859 /* need to disable SMC first */
1860 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1861 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1862 continue;
fdd34271 1863 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1864 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1865 /* XXX handle errors */
1866 if (r) {
1867 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1868 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1869 }
a1255107 1870 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1871 break;
1872 }
1873 }
1874
d38ceaf9 1875 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1876 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1877 continue;
8201a67a 1878
a1255107 1879 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1880 /* XXX handle errors */
2c1a2784 1881 if (r) {
a1255107
AD
1882 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1883 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1884 }
8201a67a 1885
a1255107 1886 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1887 }
1888
9950cda2 1889
d38ceaf9 1890 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1891 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1892 continue;
c12aba3a
ML
1893
1894 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1895 amdgpu_ucode_free_bo(adev);
1e256e27 1896 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1897 amdgpu_device_wb_fini(adev);
1898 amdgpu_device_vram_scratch_fini(adev);
1899 }
1900
a1255107 1901 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1902 /* XXX handle errors */
2c1a2784 1903 if (r) {
a1255107
AD
1904 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1905 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1906 }
a1255107
AD
1907 adev->ip_blocks[i].status.sw = false;
1908 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1909 }
1910
a6dcfd9c 1911 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1912 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1913 continue;
a1255107
AD
1914 if (adev->ip_blocks[i].version->funcs->late_fini)
1915 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1916 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1917 }
1918
030308fc 1919 if (amdgpu_sriov_vf(adev))
24136135
ML
1920 if (amdgpu_virt_release_full_gpu(adev, false))
1921 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1922
d38ceaf9
AD
1923 return 0;
1924}
1925
b55c9e7a
EQ
1926static int amdgpu_device_enable_mgpu_fan_boost(void)
1927{
1928 struct amdgpu_gpu_instance *gpu_ins;
1929 struct amdgpu_device *adev;
1930 int i, ret = 0;
1931
1932 mutex_lock(&mgpu_info.mutex);
1933
1934 /*
1935 * MGPU fan boost feature should be enabled
1936 * only when there are two or more dGPUs in
1937 * the system
1938 */
1939 if (mgpu_info.num_dgpu < 2)
1940 goto out;
1941
1942 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1943 gpu_ins = &(mgpu_info.gpu_ins[i]);
1944 adev = gpu_ins->adev;
1945 if (!(adev->flags & AMD_IS_APU) &&
1946 !gpu_ins->mgpu_fan_enabled &&
1947 adev->powerplay.pp_funcs &&
1948 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1949 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1950 if (ret)
1951 break;
1952
1953 gpu_ins->mgpu_fan_enabled = 1;
1954 }
1955 }
1956
1957out:
1958 mutex_unlock(&mgpu_info.mutex);
1959
1960 return ret;
1961}
1962
e3ecdffa 1963/**
1112a46b 1964 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1965 *
1112a46b 1966 * @work: work_struct.
e3ecdffa 1967 */
06ec9070 1968static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1969{
1970 struct amdgpu_device *adev =
1971 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1972 int r;
1973
1974 r = amdgpu_ib_ring_tests(adev);
1975 if (r)
1976 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1977
1978 r = amdgpu_device_enable_mgpu_fan_boost();
1979 if (r)
1980 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
1981}
1982
1e317b99
RZ
1983static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
1984{
1985 struct amdgpu_device *adev =
1986 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
1987
1988 mutex_lock(&adev->gfx.gfx_off_mutex);
1989 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
1990 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
1991 adev->gfx.gfx_off_state = true;
1992 }
1993 mutex_unlock(&adev->gfx.gfx_off_mutex);
1994}
1995
e3ecdffa 1996/**
e7854a03 1997 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
1998 *
1999 * @adev: amdgpu_device pointer
2000 *
2001 * Main suspend function for hardware IPs. The list of all the hardware
2002 * IPs that make up the asic is walked, clockgating is disabled and the
2003 * suspend callbacks are run. suspend puts the hardware and software state
2004 * in each IP into a state suitable for suspend.
2005 * Returns 0 on success, negative error code on failure.
2006 */
e7854a03
AD
2007static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2008{
2009 int i, r;
2010
05df1f01 2011 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2012 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2013
e7854a03
AD
2014 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2015 if (!adev->ip_blocks[i].status.valid)
2016 continue;
2017 /* displays are handled separately */
2018 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2019 /* XXX handle errors */
2020 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2021 /* XXX handle errors */
2022 if (r) {
2023 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2024 adev->ip_blocks[i].version->funcs->name, r);
2025 }
2026 }
2027 }
2028
e7854a03
AD
2029 return 0;
2030}
2031
2032/**
2033 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2034 *
2035 * @adev: amdgpu_device pointer
2036 *
2037 * Main suspend function for hardware IPs. The list of all the hardware
2038 * IPs that make up the asic is walked, clockgating is disabled and the
2039 * suspend callbacks are run. suspend puts the hardware and software state
2040 * in each IP into a state suitable for suspend.
2041 * Returns 0 on success, negative error code on failure.
2042 */
2043static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2044{
2045 int i, r;
2046
2047 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2048 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2049 continue;
e7854a03
AD
2050 /* displays are handled in phase1 */
2051 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2052 continue;
d38ceaf9 2053 /* XXX handle errors */
a1255107 2054 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2055 /* XXX handle errors */
2c1a2784 2056 if (r) {
a1255107
AD
2057 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2058 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2059 }
d38ceaf9
AD
2060 }
2061
2062 return 0;
2063}
2064
e7854a03
AD
2065/**
2066 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2067 *
2068 * @adev: amdgpu_device pointer
2069 *
2070 * Main suspend function for hardware IPs. The list of all the hardware
2071 * IPs that make up the asic is walked, clockgating is disabled and the
2072 * suspend callbacks are run. suspend puts the hardware and software state
2073 * in each IP into a state suitable for suspend.
2074 * Returns 0 on success, negative error code on failure.
2075 */
2076int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2077{
2078 int r;
2079
e7819644
YT
2080 if (amdgpu_sriov_vf(adev))
2081 amdgpu_virt_request_full_gpu(adev, false);
2082
e7854a03
AD
2083 r = amdgpu_device_ip_suspend_phase1(adev);
2084 if (r)
2085 return r;
2086 r = amdgpu_device_ip_suspend_phase2(adev);
2087
e7819644
YT
2088 if (amdgpu_sriov_vf(adev))
2089 amdgpu_virt_release_full_gpu(adev, false);
2090
e7854a03
AD
2091 return r;
2092}
2093
06ec9070 2094static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2095{
2096 int i, r;
2097
2cb681b6
ML
2098 static enum amd_ip_block_type ip_order[] = {
2099 AMD_IP_BLOCK_TYPE_GMC,
2100 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2101 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2102 AMD_IP_BLOCK_TYPE_IH,
2103 };
a90ad3c2 2104
2cb681b6
ML
2105 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2106 int j;
2107 struct amdgpu_ip_block *block;
a90ad3c2 2108
2cb681b6
ML
2109 for (j = 0; j < adev->num_ip_blocks; j++) {
2110 block = &adev->ip_blocks[j];
2111
2112 if (block->version->type != ip_order[i] ||
2113 !block->status.valid)
2114 continue;
2115
2116 r = block->version->funcs->hw_init(adev);
3f48c681 2117 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2118 if (r)
2119 return r;
a90ad3c2
ML
2120 }
2121 }
2122
2123 return 0;
2124}
2125
06ec9070 2126static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2127{
2128 int i, r;
2129
2cb681b6
ML
2130 static enum amd_ip_block_type ip_order[] = {
2131 AMD_IP_BLOCK_TYPE_SMC,
2132 AMD_IP_BLOCK_TYPE_DCE,
2133 AMD_IP_BLOCK_TYPE_GFX,
2134 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2135 AMD_IP_BLOCK_TYPE_UVD,
2136 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2137 };
a90ad3c2 2138
2cb681b6
ML
2139 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2140 int j;
2141 struct amdgpu_ip_block *block;
a90ad3c2 2142
2cb681b6
ML
2143 for (j = 0; j < adev->num_ip_blocks; j++) {
2144 block = &adev->ip_blocks[j];
2145
2146 if (block->version->type != ip_order[i] ||
2147 !block->status.valid)
2148 continue;
2149
2150 r = block->version->funcs->hw_init(adev);
3f48c681 2151 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2152 if (r)
2153 return r;
a90ad3c2
ML
2154 }
2155 }
2156
2157 return 0;
2158}
2159
e3ecdffa
AD
2160/**
2161 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2162 *
2163 * @adev: amdgpu_device pointer
2164 *
2165 * First resume function for hardware IPs. The list of all the hardware
2166 * IPs that make up the asic is walked and the resume callbacks are run for
2167 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2168 * after a suspend and updates the software state as necessary. This
2169 * function is also used for restoring the GPU after a GPU reset.
2170 * Returns 0 on success, negative error code on failure.
2171 */
06ec9070 2172static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2173{
2174 int i, r;
2175
a90ad3c2
ML
2176 for (i = 0; i < adev->num_ip_blocks; i++) {
2177 if (!adev->ip_blocks[i].status.valid)
2178 continue;
a90ad3c2 2179 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2180 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2182 r = adev->ip_blocks[i].version->funcs->resume(adev);
2183 if (r) {
2184 DRM_ERROR("resume of IP block <%s> failed %d\n",
2185 adev->ip_blocks[i].version->funcs->name, r);
2186 return r;
2187 }
a90ad3c2
ML
2188 }
2189 }
2190
2191 return 0;
2192}
2193
e3ecdffa
AD
2194/**
2195 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2196 *
2197 * @adev: amdgpu_device pointer
2198 *
2199 * First resume function for hardware IPs. The list of all the hardware
2200 * IPs that make up the asic is walked and the resume callbacks are run for
2201 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2202 * functional state after a suspend and updates the software state as
2203 * necessary. This function is also used for restoring the GPU after a GPU
2204 * reset.
2205 * Returns 0 on success, negative error code on failure.
2206 */
06ec9070 2207static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2208{
2209 int i, r;
2210
2211 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2212 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2213 continue;
fcf0649f 2214 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2216 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2217 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2218 continue;
a1255107 2219 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2220 if (r) {
a1255107
AD
2221 DRM_ERROR("resume of IP block <%s> failed %d\n",
2222 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2223 return r;
2c1a2784 2224 }
d38ceaf9
AD
2225 }
2226
2227 return 0;
2228}
2229
e3ecdffa
AD
2230/**
2231 * amdgpu_device_ip_resume - run resume for hardware IPs
2232 *
2233 * @adev: amdgpu_device pointer
2234 *
2235 * Main resume function for hardware IPs. The hardware IPs
2236 * are split into two resume functions because they are
2237 * are also used in in recovering from a GPU reset and some additional
2238 * steps need to be take between them. In this case (S3/S4) they are
2239 * run sequentially.
2240 * Returns 0 on success, negative error code on failure.
2241 */
06ec9070 2242static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2243{
2244 int r;
2245
06ec9070 2246 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2247 if (r)
2248 return r;
7a3e0bb2
RZ
2249
2250 r = amdgpu_device_fw_loading(adev);
2251 if (r)
2252 return r;
2253
06ec9070 2254 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2255
2256 return r;
2257}
2258
e3ecdffa
AD
2259/**
2260 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2261 *
2262 * @adev: amdgpu_device pointer
2263 *
2264 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2265 */
4e99a44e 2266static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2267{
6867e1b5
ML
2268 if (amdgpu_sriov_vf(adev)) {
2269 if (adev->is_atom_fw) {
2270 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2271 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2272 } else {
2273 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2274 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2275 }
2276
2277 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2278 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2279 }
048765ad
AR
2280}
2281
e3ecdffa
AD
2282/**
2283 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2284 *
2285 * @asic_type: AMD asic type
2286 *
2287 * Check if there is DC (new modesetting infrastructre) support for an asic.
2288 * returns true if DC has support, false if not.
2289 */
4562236b
HW
2290bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2291{
2292 switch (asic_type) {
2293#if defined(CONFIG_DRM_AMD_DC)
2294 case CHIP_BONAIRE:
0d6fbccb 2295 case CHIP_KAVERI:
367e6687
AD
2296 case CHIP_KABINI:
2297 case CHIP_MULLINS:
d9fda248
HW
2298 /*
2299 * We have systems in the wild with these ASICs that require
2300 * LVDS and VGA support which is not supported with DC.
2301 *
2302 * Fallback to the non-DC driver here by default so as not to
2303 * cause regressions.
2304 */
2305 return amdgpu_dc > 0;
2306 case CHIP_HAWAII:
4562236b
HW
2307 case CHIP_CARRIZO:
2308 case CHIP_STONEY:
4562236b 2309 case CHIP_POLARIS10:
675fd32b 2310 case CHIP_POLARIS11:
2c8ad2d5 2311 case CHIP_POLARIS12:
675fd32b 2312 case CHIP_VEGAM:
4562236b
HW
2313 case CHIP_TONGA:
2314 case CHIP_FIJI:
42f8ffa1 2315 case CHIP_VEGA10:
dca7b401 2316 case CHIP_VEGA12:
c6034aa2 2317 case CHIP_VEGA20:
dc37a9a0 2318#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2319 case CHIP_RAVEN:
42f8ffa1 2320#endif
fd187853 2321 return amdgpu_dc != 0;
4562236b
HW
2322#endif
2323 default:
2324 return false;
2325 }
2326}
2327
2328/**
2329 * amdgpu_device_has_dc_support - check if dc is supported
2330 *
2331 * @adev: amdgpu_device_pointer
2332 *
2333 * Returns true for supported, false for not supported
2334 */
2335bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2336{
2555039d
XY
2337 if (amdgpu_sriov_vf(adev))
2338 return false;
2339
4562236b
HW
2340 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2341}
2342
d38ceaf9
AD
2343/**
2344 * amdgpu_device_init - initialize the driver
2345 *
2346 * @adev: amdgpu_device pointer
87e3f136 2347 * @ddev: drm dev pointer
d38ceaf9
AD
2348 * @pdev: pci dev pointer
2349 * @flags: driver flags
2350 *
2351 * Initializes the driver info and hw (all asics).
2352 * Returns 0 for success or an error on failure.
2353 * Called at driver startup.
2354 */
2355int amdgpu_device_init(struct amdgpu_device *adev,
2356 struct drm_device *ddev,
2357 struct pci_dev *pdev,
2358 uint32_t flags)
2359{
2360 int r, i;
2361 bool runtime = false;
95844d20 2362 u32 max_MBps;
d38ceaf9
AD
2363
2364 adev->shutdown = false;
2365 adev->dev = &pdev->dev;
2366 adev->ddev = ddev;
2367 adev->pdev = pdev;
2368 adev->flags = flags;
2f7d10b3 2369 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2370 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2371 if (amdgpu_emu_mode == 1)
2372 adev->usec_timeout *= 2;
770d13b1 2373 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2374 adev->accel_working = false;
2375 adev->num_rings = 0;
2376 adev->mman.buffer_funcs = NULL;
2377 adev->mman.buffer_funcs_ring = NULL;
2378 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2379 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2380 adev->gmc.gmc_funcs = NULL;
f54d1867 2381 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2382 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2383
2384 adev->smc_rreg = &amdgpu_invalid_rreg;
2385 adev->smc_wreg = &amdgpu_invalid_wreg;
2386 adev->pcie_rreg = &amdgpu_invalid_rreg;
2387 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2388 adev->pciep_rreg = &amdgpu_invalid_rreg;
2389 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2390 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2391 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2392 adev->didt_rreg = &amdgpu_invalid_rreg;
2393 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2394 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2395 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2396 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2397 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2398
3e39ab90
AD
2399 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2400 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2401 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2402
2403 /* mutex initialization are all done here so we
2404 * can recall function without having locking issues */
d38ceaf9 2405 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2406 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2407 mutex_init(&adev->pm.mutex);
2408 mutex_init(&adev->gfx.gpu_clock_mutex);
2409 mutex_init(&adev->srbm_mutex);
b8866c26 2410 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2411 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2412 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2413 mutex_init(&adev->mn_lock);
e23b74aa 2414 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2415 hash_init(adev->mn_hash);
13a752e3 2416 mutex_init(&adev->lock_reset);
d38ceaf9 2417
06ec9070 2418 amdgpu_device_check_arguments(adev);
d38ceaf9 2419
d38ceaf9
AD
2420 spin_lock_init(&adev->mmio_idx_lock);
2421 spin_lock_init(&adev->smc_idx_lock);
2422 spin_lock_init(&adev->pcie_idx_lock);
2423 spin_lock_init(&adev->uvd_ctx_idx_lock);
2424 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2425 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2426 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2427 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2428 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2429
0c4e7fa5
CZ
2430 INIT_LIST_HEAD(&adev->shadow_list);
2431 mutex_init(&adev->shadow_list_lock);
2432
795f2813
AR
2433 INIT_LIST_HEAD(&adev->ring_lru_list);
2434 spin_lock_init(&adev->ring_lru_list_lock);
2435
06ec9070
AD
2436 INIT_DELAYED_WORK(&adev->late_init_work,
2437 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2438 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2439 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2440
d23ee13f 2441 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2442 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2443
0fa49558
AX
2444 /* Registers mapping */
2445 /* TODO: block userspace mapping of io register */
da69c161
KW
2446 if (adev->asic_type >= CHIP_BONAIRE) {
2447 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2448 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2449 } else {
2450 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2451 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2452 }
d38ceaf9 2453
d38ceaf9
AD
2454 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2455 if (adev->rmmio == NULL) {
2456 return -ENOMEM;
2457 }
2458 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2459 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2460
705e519e 2461 /* doorbell bar mapping */
06ec9070 2462 amdgpu_device_doorbell_init(adev);
d38ceaf9
AD
2463
2464 /* io port mapping */
2465 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2466 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2467 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2468 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2469 break;
2470 }
2471 }
2472 if (adev->rio_mem == NULL)
b64a18c5 2473 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2474
5494d864
AD
2475 amdgpu_device_get_pcie_info(adev);
2476
d38ceaf9 2477 /* early init functions */
06ec9070 2478 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2479 if (r)
2480 return r;
2481
2482 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2483 /* this will fail for cards that aren't VGA class devices, just
2484 * ignore it */
06ec9070 2485 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2486
e9bef455 2487 if (amdgpu_device_is_px(ddev))
d38ceaf9 2488 runtime = true;
84c8b22e
LW
2489 if (!pci_is_thunderbolt_attached(adev->pdev))
2490 vga_switcheroo_register_client(adev->pdev,
2491 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2492 if (runtime)
2493 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2494
9475a943
SL
2495 if (amdgpu_emu_mode == 1) {
2496 /* post the asic on emulation mode */
2497 emu_soc_asic_init(adev);
bfca0289 2498 goto fence_driver_init;
9475a943 2499 }
bfca0289 2500
d38ceaf9 2501 /* Read BIOS */
83ba126a
AD
2502 if (!amdgpu_get_bios(adev)) {
2503 r = -EINVAL;
2504 goto failed;
2505 }
f7e9e9fe 2506
d38ceaf9 2507 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2508 if (r) {
2509 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2510 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2511 goto failed;
2c1a2784 2512 }
d38ceaf9 2513
4e99a44e
ML
2514 /* detect if we are with an SRIOV vbios */
2515 amdgpu_device_detect_sriov_bios(adev);
048765ad 2516
d38ceaf9 2517 /* Post card if necessary */
39c640c0 2518 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2519 if (!adev->bios) {
bec86378 2520 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2521 r = -EINVAL;
2522 goto failed;
d38ceaf9 2523 }
bec86378 2524 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2525 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2526 if (r) {
2527 dev_err(adev->dev, "gpu post error!\n");
2528 goto failed;
2529 }
d38ceaf9
AD
2530 }
2531
88b64e95
AD
2532 if (adev->is_atom_fw) {
2533 /* Initialize clocks */
2534 r = amdgpu_atomfirmware_get_clock_info(adev);
2535 if (r) {
2536 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2537 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2538 goto failed;
2539 }
2540 } else {
a5bde2f9
AD
2541 /* Initialize clocks */
2542 r = amdgpu_atombios_get_clock_info(adev);
2543 if (r) {
2544 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2545 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2546 goto failed;
a5bde2f9
AD
2547 }
2548 /* init i2c buses */
4562236b
HW
2549 if (!amdgpu_device_has_dc_support(adev))
2550 amdgpu_atombios_i2c_init(adev);
2c1a2784 2551 }
d38ceaf9 2552
bfca0289 2553fence_driver_init:
d38ceaf9
AD
2554 /* Fence driver */
2555 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2556 if (r) {
2557 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2558 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2559 goto failed;
2c1a2784 2560 }
d38ceaf9
AD
2561
2562 /* init the mode config */
2563 drm_mode_config_init(adev->ddev);
2564
06ec9070 2565 r = amdgpu_device_ip_init(adev);
d38ceaf9 2566 if (r) {
8840a387 2567 /* failed in exclusive mode due to timeout */
2568 if (amdgpu_sriov_vf(adev) &&
2569 !amdgpu_sriov_runtime(adev) &&
2570 amdgpu_virt_mmio_blocked(adev) &&
2571 !amdgpu_virt_wait_reset(adev)) {
2572 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2573 /* Don't send request since VF is inactive. */
2574 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2575 adev->virt.ops = NULL;
8840a387 2576 r = -EAGAIN;
2577 goto failed;
2578 }
06ec9070 2579 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2580 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2581 goto failed;
d38ceaf9
AD
2582 }
2583
2584 adev->accel_working = true;
2585
e59c0205
AX
2586 amdgpu_vm_check_compute_bug(adev);
2587
95844d20
MO
2588 /* Initialize the buffer migration limit. */
2589 if (amdgpu_moverate >= 0)
2590 max_MBps = amdgpu_moverate;
2591 else
2592 max_MBps = 8; /* Allow 8 MB/s. */
2593 /* Get a log2 for easy divisions. */
2594 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2595
d38ceaf9
AD
2596 r = amdgpu_ib_pool_init(adev);
2597 if (r) {
2598 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2599 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2600 goto failed;
d38ceaf9
AD
2601 }
2602
2dc8f81e
HC
2603 if (amdgpu_sriov_vf(adev))
2604 amdgpu_virt_init_data_exchange(adev);
2605
9bc92b9c
ML
2606 amdgpu_fbdev_init(adev);
2607
d2f52ac8
RZ
2608 r = amdgpu_pm_sysfs_init(adev);
2609 if (r)
2610 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2611
75758255 2612 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2613 if (r)
d38ceaf9 2614 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2615
2616 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2617 if (r)
d38ceaf9 2618 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2619
50ab2533 2620 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2621 if (r)
50ab2533 2622 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2623
763efb6c 2624 r = amdgpu_debugfs_init(adev);
db95e218 2625 if (r)
763efb6c 2626 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2627
d38ceaf9
AD
2628 if ((amdgpu_testing & 1)) {
2629 if (adev->accel_working)
2630 amdgpu_test_moves(adev);
2631 else
2632 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2633 }
d38ceaf9
AD
2634 if (amdgpu_benchmarking) {
2635 if (adev->accel_working)
2636 amdgpu_benchmark(adev, amdgpu_benchmarking);
2637 else
2638 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2639 }
2640
2641 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2642 * explicit gating rather than handling it automatically.
2643 */
06ec9070 2644 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2645 if (r) {
06ec9070 2646 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2647 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2648 goto failed;
2c1a2784 2649 }
d38ceaf9
AD
2650
2651 return 0;
83ba126a
AD
2652
2653failed:
89041940 2654 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2655 if (runtime)
2656 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2657
83ba126a 2658 return r;
d38ceaf9
AD
2659}
2660
d38ceaf9
AD
2661/**
2662 * amdgpu_device_fini - tear down the driver
2663 *
2664 * @adev: amdgpu_device pointer
2665 *
2666 * Tear down the driver info (all asics).
2667 * Called at driver shutdown.
2668 */
2669void amdgpu_device_fini(struct amdgpu_device *adev)
2670{
2671 int r;
2672
2673 DRM_INFO("amdgpu: finishing device.\n");
2674 adev->shutdown = true;
e5b03032
ML
2675 /* disable all interrupts */
2676 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2677 if (adev->mode_info.mode_config_initialized){
2678 if (!amdgpu_device_has_dc_support(adev))
2679 drm_crtc_force_disable_all(adev->ddev);
2680 else
2681 drm_atomic_helper_shutdown(adev->ddev);
2682 }
d38ceaf9
AD
2683 amdgpu_ib_pool_fini(adev);
2684 amdgpu_fence_driver_fini(adev);
58e955d9 2685 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2686 amdgpu_fbdev_fini(adev);
06ec9070 2687 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2688 if (adev->firmware.gpu_info_fw) {
2689 release_firmware(adev->firmware.gpu_info_fw);
2690 adev->firmware.gpu_info_fw = NULL;
2691 }
d38ceaf9 2692 adev->accel_working = false;
2dc80b00 2693 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2694 /* free i2c buses */
4562236b
HW
2695 if (!amdgpu_device_has_dc_support(adev))
2696 amdgpu_i2c_fini(adev);
bfca0289
SL
2697
2698 if (amdgpu_emu_mode != 1)
2699 amdgpu_atombios_fini(adev);
2700
d38ceaf9
AD
2701 kfree(adev->bios);
2702 adev->bios = NULL;
84c8b22e
LW
2703 if (!pci_is_thunderbolt_attached(adev->pdev))
2704 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2705 if (adev->flags & AMD_IS_PX)
2706 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2707 vga_client_register(adev->pdev, NULL, NULL, NULL);
2708 if (adev->rio_mem)
2709 pci_iounmap(adev->pdev, adev->rio_mem);
2710 adev->rio_mem = NULL;
2711 iounmap(adev->rmmio);
2712 adev->rmmio = NULL;
06ec9070 2713 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2714 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2715}
2716
2717
2718/*
2719 * Suspend & resume.
2720 */
2721/**
810ddc3a 2722 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2723 *
87e3f136
DP
2724 * @dev: drm dev pointer
2725 * @suspend: suspend state
2726 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2727 *
2728 * Puts the hw in the suspend state (all asics).
2729 * Returns 0 for success or an error on failure.
2730 * Called at driver suspend.
2731 */
810ddc3a 2732int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2733{
2734 struct amdgpu_device *adev;
2735 struct drm_crtc *crtc;
2736 struct drm_connector *connector;
5ceb54c6 2737 int r;
d38ceaf9
AD
2738
2739 if (dev == NULL || dev->dev_private == NULL) {
2740 return -ENODEV;
2741 }
2742
2743 adev = dev->dev_private;
2744
2745 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2746 return 0;
2747
44779b43 2748 adev->in_suspend = true;
d38ceaf9
AD
2749 drm_kms_helper_poll_disable(dev);
2750
5f818173
S
2751 if (fbcon)
2752 amdgpu_fbdev_set_suspend(adev, 1);
2753
a5459475
RZ
2754 cancel_delayed_work_sync(&adev->late_init_work);
2755
4562236b
HW
2756 if (!amdgpu_device_has_dc_support(adev)) {
2757 /* turn off display hw */
2758 drm_modeset_lock_all(dev);
2759 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2760 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2761 }
2762 drm_modeset_unlock_all(dev);
fe1053b7
AD
2763 /* unpin the front buffers and cursors */
2764 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2765 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2766 struct drm_framebuffer *fb = crtc->primary->fb;
2767 struct amdgpu_bo *robj;
2768
2769 if (amdgpu_crtc->cursor_bo) {
2770 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2771 r = amdgpu_bo_reserve(aobj, true);
2772 if (r == 0) {
2773 amdgpu_bo_unpin(aobj);
2774 amdgpu_bo_unreserve(aobj);
2775 }
756e6880 2776 }
756e6880 2777
fe1053b7
AD
2778 if (fb == NULL || fb->obj[0] == NULL) {
2779 continue;
2780 }
2781 robj = gem_to_amdgpu_bo(fb->obj[0]);
2782 /* don't unpin kernel fb objects */
2783 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2784 r = amdgpu_bo_reserve(robj, true);
2785 if (r == 0) {
2786 amdgpu_bo_unpin(robj);
2787 amdgpu_bo_unreserve(robj);
2788 }
d38ceaf9
AD
2789 }
2790 }
2791 }
fe1053b7
AD
2792
2793 amdgpu_amdkfd_suspend(adev);
2794
2795 r = amdgpu_device_ip_suspend_phase1(adev);
2796
d38ceaf9
AD
2797 /* evict vram memory */
2798 amdgpu_bo_evict_vram(adev);
2799
5ceb54c6 2800 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2801
fe1053b7 2802 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2803
a0a71e49
AD
2804 /* evict remaining vram memory
2805 * This second call to evict vram is to evict the gart page table
2806 * using the CPU.
2807 */
d38ceaf9
AD
2808 amdgpu_bo_evict_vram(adev);
2809
2810 pci_save_state(dev->pdev);
2811 if (suspend) {
2812 /* Shut down the device */
2813 pci_disable_device(dev->pdev);
2814 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2815 } else {
2816 r = amdgpu_asic_reset(adev);
2817 if (r)
2818 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2819 }
2820
d38ceaf9
AD
2821 return 0;
2822}
2823
2824/**
810ddc3a 2825 * amdgpu_device_resume - initiate device resume
d38ceaf9 2826 *
87e3f136
DP
2827 * @dev: drm dev pointer
2828 * @resume: resume state
2829 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2830 *
2831 * Bring the hw back to operating state (all asics).
2832 * Returns 0 for success or an error on failure.
2833 * Called at driver resume.
2834 */
810ddc3a 2835int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2836{
2837 struct drm_connector *connector;
2838 struct amdgpu_device *adev = dev->dev_private;
756e6880 2839 struct drm_crtc *crtc;
03161a6e 2840 int r = 0;
d38ceaf9
AD
2841
2842 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2843 return 0;
2844
d38ceaf9
AD
2845 if (resume) {
2846 pci_set_power_state(dev->pdev, PCI_D0);
2847 pci_restore_state(dev->pdev);
74b0b157 2848 r = pci_enable_device(dev->pdev);
03161a6e 2849 if (r)
4d3b9ae5 2850 return r;
d38ceaf9
AD
2851 }
2852
2853 /* post card */
39c640c0 2854 if (amdgpu_device_need_post(adev)) {
74b0b157 2855 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2856 if (r)
2857 DRM_ERROR("amdgpu asic init failed\n");
2858 }
d38ceaf9 2859
06ec9070 2860 r = amdgpu_device_ip_resume(adev);
e6707218 2861 if (r) {
06ec9070 2862 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2863 return r;
e6707218 2864 }
5ceb54c6
AD
2865 amdgpu_fence_driver_resume(adev);
2866
d38ceaf9 2867
06ec9070 2868 r = amdgpu_device_ip_late_init(adev);
03161a6e 2869 if (r)
4d3b9ae5 2870 return r;
d38ceaf9 2871
fe1053b7
AD
2872 if (!amdgpu_device_has_dc_support(adev)) {
2873 /* pin cursors */
2874 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2875 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2876
2877 if (amdgpu_crtc->cursor_bo) {
2878 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2879 r = amdgpu_bo_reserve(aobj, true);
2880 if (r == 0) {
2881 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2882 if (r != 0)
2883 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2884 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2885 amdgpu_bo_unreserve(aobj);
2886 }
756e6880
AD
2887 }
2888 }
2889 }
ba997709
YZ
2890 r = amdgpu_amdkfd_resume(adev);
2891 if (r)
2892 return r;
756e6880 2893
96a5d8d4
LL
2894 /* Make sure IB tests flushed */
2895 flush_delayed_work(&adev->late_init_work);
2896
d38ceaf9
AD
2897 /* blat the mode back in */
2898 if (fbcon) {
4562236b
HW
2899 if (!amdgpu_device_has_dc_support(adev)) {
2900 /* pre DCE11 */
2901 drm_helper_resume_force_mode(dev);
2902
2903 /* turn on display hw */
2904 drm_modeset_lock_all(dev);
2905 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2906 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2907 }
2908 drm_modeset_unlock_all(dev);
d38ceaf9 2909 }
4d3b9ae5 2910 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2911 }
2912
2913 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2914
2915 /*
2916 * Most of the connector probing functions try to acquire runtime pm
2917 * refs to ensure that the GPU is powered on when connector polling is
2918 * performed. Since we're calling this from a runtime PM callback,
2919 * trying to acquire rpm refs will cause us to deadlock.
2920 *
2921 * Since we're guaranteed to be holding the rpm lock, it's safe to
2922 * temporarily disable the rpm helpers so this doesn't deadlock us.
2923 */
2924#ifdef CONFIG_PM
2925 dev->dev->power.disable_depth++;
2926#endif
4562236b
HW
2927 if (!amdgpu_device_has_dc_support(adev))
2928 drm_helper_hpd_irq_event(dev);
2929 else
2930 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2931#ifdef CONFIG_PM
2932 dev->dev->power.disable_depth--;
2933#endif
44779b43
RZ
2934 adev->in_suspend = false;
2935
4d3b9ae5 2936 return 0;
d38ceaf9
AD
2937}
2938
e3ecdffa
AD
2939/**
2940 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2941 *
2942 * @adev: amdgpu_device pointer
2943 *
2944 * The list of all the hardware IPs that make up the asic is walked and
2945 * the check_soft_reset callbacks are run. check_soft_reset determines
2946 * if the asic is still hung or not.
2947 * Returns true if any of the IPs are still in a hung state, false if not.
2948 */
06ec9070 2949static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2950{
2951 int i;
2952 bool asic_hang = false;
2953
f993d628
ML
2954 if (amdgpu_sriov_vf(adev))
2955 return true;
2956
8bc04c29
AD
2957 if (amdgpu_asic_need_full_reset(adev))
2958 return true;
2959
63fbf42f 2960 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2961 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2962 continue;
a1255107
AD
2963 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2964 adev->ip_blocks[i].status.hang =
2965 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2966 if (adev->ip_blocks[i].status.hang) {
2967 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
2968 asic_hang = true;
2969 }
2970 }
2971 return asic_hang;
2972}
2973
e3ecdffa
AD
2974/**
2975 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2976 *
2977 * @adev: amdgpu_device pointer
2978 *
2979 * The list of all the hardware IPs that make up the asic is walked and the
2980 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
2981 * handles any IP specific hardware or software state changes that are
2982 * necessary for a soft reset to succeed.
2983 * Returns 0 on success, negative error code on failure.
2984 */
06ec9070 2985static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
2986{
2987 int i, r = 0;
2988
2989 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2990 if (!adev->ip_blocks[i].status.valid)
d31a501e 2991 continue;
a1255107
AD
2992 if (adev->ip_blocks[i].status.hang &&
2993 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
2994 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
2995 if (r)
2996 return r;
2997 }
2998 }
2999
3000 return 0;
3001}
3002
e3ecdffa
AD
3003/**
3004 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3005 *
3006 * @adev: amdgpu_device pointer
3007 *
3008 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3009 * reset is necessary to recover.
3010 * Returns true if a full asic reset is required, false if not.
3011 */
06ec9070 3012static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3013{
da146d3b
AD
3014 int i;
3015
8bc04c29
AD
3016 if (amdgpu_asic_need_full_reset(adev))
3017 return true;
3018
da146d3b 3019 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3020 if (!adev->ip_blocks[i].status.valid)
da146d3b 3021 continue;
a1255107
AD
3022 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3023 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3024 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3025 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3026 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3027 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3028 DRM_INFO("Some block need full reset!\n");
3029 return true;
3030 }
3031 }
35d782fe
CZ
3032 }
3033 return false;
3034}
3035
e3ecdffa
AD
3036/**
3037 * amdgpu_device_ip_soft_reset - do a soft reset
3038 *
3039 * @adev: amdgpu_device pointer
3040 *
3041 * The list of all the hardware IPs that make up the asic is walked and the
3042 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3043 * IP specific hardware or software state changes that are necessary to soft
3044 * reset the IP.
3045 * Returns 0 on success, negative error code on failure.
3046 */
06ec9070 3047static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3048{
3049 int i, r = 0;
3050
3051 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3052 if (!adev->ip_blocks[i].status.valid)
35d782fe 3053 continue;
a1255107
AD
3054 if (adev->ip_blocks[i].status.hang &&
3055 adev->ip_blocks[i].version->funcs->soft_reset) {
3056 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3057 if (r)
3058 return r;
3059 }
3060 }
3061
3062 return 0;
3063}
3064
e3ecdffa
AD
3065/**
3066 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3067 *
3068 * @adev: amdgpu_device pointer
3069 *
3070 * The list of all the hardware IPs that make up the asic is walked and the
3071 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3072 * handles any IP specific hardware or software state changes that are
3073 * necessary after the IP has been soft reset.
3074 * Returns 0 on success, negative error code on failure.
3075 */
06ec9070 3076static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3077{
3078 int i, r = 0;
3079
3080 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3081 if (!adev->ip_blocks[i].status.valid)
35d782fe 3082 continue;
a1255107
AD
3083 if (adev->ip_blocks[i].status.hang &&
3084 adev->ip_blocks[i].version->funcs->post_soft_reset)
3085 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3086 if (r)
3087 return r;
3088 }
3089
3090 return 0;
3091}
3092
e3ecdffa 3093/**
c33adbc7 3094 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3095 *
3096 * @adev: amdgpu_device pointer
3097 *
3098 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3099 * restore things like GPUVM page tables after a GPU reset where
3100 * the contents of VRAM might be lost.
403009bf
CK
3101 *
3102 * Returns:
3103 * 0 on success, negative error code on failure.
e3ecdffa 3104 */
c33adbc7 3105static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3106{
c41d1cf6 3107 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3108 struct amdgpu_bo *shadow;
3109 long r = 1, tmo;
c41d1cf6
ML
3110
3111 if (amdgpu_sriov_runtime(adev))
b045d3af 3112 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3113 else
3114 tmo = msecs_to_jiffies(100);
3115
3116 DRM_INFO("recover vram bo from shadow start\n");
3117 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3118 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3119
3120 /* No need to recover an evicted BO */
3121 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3122 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3123 continue;
3124
3125 r = amdgpu_bo_restore_shadow(shadow, &next);
3126 if (r)
3127 break;
3128
c41d1cf6
ML
3129 if (fence) {
3130 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3131 dma_fence_put(fence);
3132 fence = next;
3133 if (r <= 0)
c41d1cf6 3134 break;
403009bf
CK
3135 } else {
3136 fence = next;
c41d1cf6 3137 }
c41d1cf6
ML
3138 }
3139 mutex_unlock(&adev->shadow_list_lock);
3140
403009bf
CK
3141 if (fence)
3142 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3143 dma_fence_put(fence);
3144
403009bf 3145 if (r <= 0 || tmo <= 0) {
c41d1cf6 3146 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3147 return -EIO;
3148 }
c41d1cf6 3149
403009bf
CK
3150 DRM_INFO("recover vram bo from shadow done\n");
3151 return 0;
c41d1cf6
ML
3152}
3153
e3ecdffa 3154/**
06ec9070 3155 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
a90ad3c2
ML
3156 *
3157 * @adev: amdgpu device pointer
a90ad3c2 3158 *
5740682e 3159 * attempt to do soft-reset or full-reset and reinitialize Asic
3f48c681 3160 * return 0 means succeeded otherwise failed
e3ecdffa 3161 */
c41d1cf6 3162static int amdgpu_device_reset(struct amdgpu_device *adev)
a90ad3c2 3163{
5740682e
ML
3164 bool need_full_reset, vram_lost = 0;
3165 int r;
a90ad3c2 3166
06ec9070 3167 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
a90ad3c2 3168
5740682e 3169 if (!need_full_reset) {
06ec9070
AD
3170 amdgpu_device_ip_pre_soft_reset(adev);
3171 r = amdgpu_device_ip_soft_reset(adev);
3172 amdgpu_device_ip_post_soft_reset(adev);
3173 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5740682e
ML
3174 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3175 need_full_reset = true;
3176 }
5740682e 3177 }
a90ad3c2 3178
5740682e 3179 if (need_full_reset) {
cdd61df6 3180 r = amdgpu_device_ip_suspend(adev);
a90ad3c2 3181
5740682e 3182retry:
5740682e 3183 r = amdgpu_asic_reset(adev);
5740682e
ML
3184 /* post card */
3185 amdgpu_atom_asic_init(adev->mode_info.atom_context);
65781c78 3186
5740682e
ML
3187 if (!r) {
3188 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
06ec9070 3189 r = amdgpu_device_ip_resume_phase1(adev);
5740682e
ML
3190 if (r)
3191 goto out;
65781c78 3192
06ec9070 3193 vram_lost = amdgpu_device_check_vram_lost(adev);
5740682e
ML
3194 if (vram_lost) {
3195 DRM_ERROR("VRAM is lost!\n");
3196 atomic_inc(&adev->vram_lost_counter);
3197 }
3198
c1c7ce8f
CK
3199 r = amdgpu_gtt_mgr_recover(
3200 &adev->mman.bdev.man[TTM_PL_TT]);
5740682e
ML
3201 if (r)
3202 goto out;
3203
7a3e0bb2
RZ
3204 r = amdgpu_device_fw_loading(adev);
3205 if (r)
3206 return r;
3207
06ec9070 3208 r = amdgpu_device_ip_resume_phase2(adev);
5740682e
ML
3209 if (r)
3210 goto out;
3211
3212 if (vram_lost)
06ec9070 3213 amdgpu_device_fill_reset_magic(adev);
65781c78 3214 }
5740682e 3215 }
65781c78 3216
5740682e
ML
3217out:
3218 if (!r) {
3219 amdgpu_irq_gpu_reset_resume_helper(adev);
3220 r = amdgpu_ib_ring_tests(adev);
3221 if (r) {
3222 dev_err(adev->dev, "ib ring test failed (%d).\n", r);
cdd61df6 3223 r = amdgpu_device_ip_suspend(adev);
5740682e
ML
3224 need_full_reset = true;
3225 goto retry;
3226 }
3227 }
65781c78 3228
c33adbc7
CK
3229 if (!r)
3230 r = amdgpu_device_recover_vram(adev);
a90ad3c2 3231
5740682e
ML
3232 return r;
3233}
a90ad3c2 3234
e3ecdffa 3235/**
06ec9070 3236 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3237 *
3238 * @adev: amdgpu device pointer
87e3f136 3239 * @from_hypervisor: request from hypervisor
5740682e
ML
3240 *
3241 * do VF FLR and reinitialize Asic
3f48c681 3242 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3243 */
3244static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3245 bool from_hypervisor)
5740682e
ML
3246{
3247 int r;
3248
3249 if (from_hypervisor)
3250 r = amdgpu_virt_request_full_gpu(adev, true);
3251 else
3252 r = amdgpu_virt_reset_gpu(adev);
3253 if (r)
3254 return r;
a90ad3c2
ML
3255
3256 /* Resume IP prior to SMC */
06ec9070 3257 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3258 if (r)
3259 goto error;
a90ad3c2
ML
3260
3261 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3262 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3263
7a3e0bb2
RZ
3264 r = amdgpu_device_fw_loading(adev);
3265 if (r)
3266 return r;
3267
a90ad3c2 3268 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3269 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3270 if (r)
3271 goto error;
a90ad3c2
ML
3272
3273 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3274 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3275
abc34253
ED
3276error:
3277 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3278 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3279 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3280 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3281 }
3282
3283 return r;
3284}
3285
12938fad
CK
3286/**
3287 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3288 *
3289 * @adev: amdgpu device pointer
3290 *
3291 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3292 * a hung GPU.
3293 */
3294bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3295{
3296 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3297 DRM_INFO("Timeout, but no hardware hang detected.\n");
3298 return false;
3299 }
3300
3ba7b418
AG
3301 if (amdgpu_gpu_recovery == 0)
3302 goto disabled;
3303
3304 if (amdgpu_sriov_vf(adev))
3305 return true;
3306
3307 if (amdgpu_gpu_recovery == -1) {
3308 switch (adev->asic_type) {
3309 case CHIP_TOPAZ:
3310 case CHIP_TONGA:
3311 case CHIP_FIJI:
3312 case CHIP_POLARIS10:
3313 case CHIP_POLARIS11:
3314 case CHIP_POLARIS12:
3315 case CHIP_VEGAM:
3316 case CHIP_VEGA20:
3317 case CHIP_VEGA10:
3318 case CHIP_VEGA12:
3319 break;
3320 default:
3321 goto disabled;
3322 }
12938fad
CK
3323 }
3324
3325 return true;
3ba7b418
AG
3326
3327disabled:
3328 DRM_INFO("GPU recovery disabled.\n");
3329 return false;
12938fad
CK
3330}
3331
d38ceaf9 3332/**
5f152b5e 3333 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
d38ceaf9
AD
3334 *
3335 * @adev: amdgpu device pointer
5740682e 3336 * @job: which job trigger hang
d38ceaf9 3337 *
5740682e 3338 * Attempt to reset the GPU if it has hung (all asics).
d38ceaf9
AD
3339 * Returns 0 for success or an error on failure.
3340 */
5f152b5e 3341int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
12938fad 3342 struct amdgpu_job *job)
d38ceaf9 3343{
5740682e 3344 int i, r, resched;
fb140b29 3345
5740682e
ML
3346 dev_info(adev->dev, "GPU reset begin!\n");
3347
13a752e3 3348 mutex_lock(&adev->lock_reset);
d94aed5a 3349 atomic_inc(&adev->gpu_reset_counter);
13a752e3 3350 adev->in_gpu_reset = 1;
d38ceaf9 3351
5c6dd71e
SL
3352 /* Block kfd */
3353 amdgpu_amdkfd_pre_reset(adev);
3354
a3c47d6b
CZ
3355 /* block TTM */
3356 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
71182665 3357
71182665 3358 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3359 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3360 struct amdgpu_ring *ring = adev->rings[i];
3361
51687759 3362 if (!ring || !ring->sched.thread)
0875dc9e 3363 continue;
5740682e 3364
71182665
ML
3365 kthread_park(ring->sched.thread);
3366
734afd4b 3367 if (job && job->base.sched != &ring->sched)
5740682e
ML
3368 continue;
3369
67ccea60 3370 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3371
2f9d4084
ML
3372 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3373 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3374 }
d38ceaf9 3375
5740682e 3376 if (amdgpu_sriov_vf(adev))
c41d1cf6 3377 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5740682e 3378 else
c41d1cf6 3379 r = amdgpu_device_reset(adev);
5740682e 3380
71182665
ML
3381 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3382 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3383
71182665
ML
3384 if (!ring || !ring->sched.thread)
3385 continue;
5740682e 3386
71182665
ML
3387 /* only need recovery sched of the given job's ring
3388 * or all rings (in the case @job is NULL)
3389 * after above amdgpu_reset accomplished
3390 */
3320b8d2 3391 if ((!job || job->base.sched == &ring->sched) && !r)
1b1f42d8 3392 drm_sched_job_recovery(&ring->sched);
5740682e 3393
71182665 3394 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3395 }
3396
bf830604 3397 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3398 drm_helper_resume_force_mode(adev->ddev);
5740682e 3399 }
d38ceaf9
AD
3400
3401 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
5740682e 3402
89041940 3403 if (r) {
d38ceaf9 3404 /* bad news, how to tell it to userspace ? */
5740682e
ML
3405 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3406 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3407 } else {
3f48c681 3408 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
89041940 3409 }
d38ceaf9 3410
5c6dd71e
SL
3411 /*unlock kfd */
3412 amdgpu_amdkfd_post_reset(adev);
89041940 3413 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3414 adev->in_gpu_reset = 0;
3415 mutex_unlock(&adev->lock_reset);
d38ceaf9
AD
3416 return r;
3417}
3418
e3ecdffa
AD
3419/**
3420 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3421 *
3422 * @adev: amdgpu_device pointer
3423 *
3424 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3425 * and lanes) of the slot the device is in. Handles APUs and
3426 * virtualized environments where PCIE config space may not be available.
3427 */
5494d864 3428static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3429{
5d9a6330
AD
3430 struct pci_dev *pdev;
3431 enum pci_bus_speed speed_cap;
3432 enum pcie_link_width link_width;
d0dd7f0c 3433
cd474ba0
AD
3434 if (amdgpu_pcie_gen_cap)
3435 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3436
cd474ba0
AD
3437 if (amdgpu_pcie_lane_cap)
3438 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3439
cd474ba0
AD
3440 /* covers APUs as well */
3441 if (pci_is_root_bus(adev->pdev->bus)) {
3442 if (adev->pm.pcie_gen_mask == 0)
3443 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3444 if (adev->pm.pcie_mlw_mask == 0)
3445 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3446 return;
cd474ba0 3447 }
d0dd7f0c 3448
cd474ba0 3449 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3450 /* asic caps */
3451 pdev = adev->pdev;
3452 speed_cap = pcie_get_speed_cap(pdev);
3453 if (speed_cap == PCI_SPEED_UNKNOWN) {
3454 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3455 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3456 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3457 } else {
5d9a6330
AD
3458 if (speed_cap == PCIE_SPEED_16_0GT)
3459 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3460 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3461 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3463 else if (speed_cap == PCIE_SPEED_8_0GT)
3464 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3467 else if (speed_cap == PCIE_SPEED_5_0GT)
3468 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3470 else
3471 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3472 }
3473 /* platform caps */
3474 pdev = adev->ddev->pdev->bus->self;
3475 speed_cap = pcie_get_speed_cap(pdev);
3476 if (speed_cap == PCI_SPEED_UNKNOWN) {
3477 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3478 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3479 } else {
3480 if (speed_cap == PCIE_SPEED_16_0GT)
3481 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3485 else if (speed_cap == PCIE_SPEED_8_0GT)
3486 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3489 else if (speed_cap == PCIE_SPEED_5_0GT)
3490 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3492 else
3493 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3494
cd474ba0
AD
3495 }
3496 }
3497 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3498 pdev = adev->ddev->pdev->bus->self;
3499 link_width = pcie_get_width_cap(pdev);
3500 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3501 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3502 } else {
3503 switch (link_width) {
3504 case PCIE_LNK_X32:
cd474ba0
AD
3505 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3510 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3512 break;
5d9a6330 3513 case PCIE_LNK_X16:
cd474ba0
AD
3514 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3520 break;
5d9a6330 3521 case PCIE_LNK_X12:
cd474ba0
AD
3522 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3527 break;
5d9a6330 3528 case PCIE_LNK_X8:
cd474ba0
AD
3529 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3533 break;
5d9a6330 3534 case PCIE_LNK_X4:
cd474ba0
AD
3535 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3538 break;
5d9a6330 3539 case PCIE_LNK_X2:
cd474ba0
AD
3540 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3542 break;
5d9a6330 3543 case PCIE_LNK_X1:
cd474ba0
AD
3544 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3545 break;
3546 default:
3547 break;
3548 }
d0dd7f0c
AD
3549 }
3550 }
3551}
d38ceaf9 3552