]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drm/sched: Convert the GPU scheduler to variable number of run-queues
[thirdparty/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
510 *
511 * this function is invoked only for the debugfs register access
03f2abb0 512 */
f7ee1874 513void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
514 uint32_t reg, uint32_t v,
515 uint32_t xcc_id)
2e0cc4d4 516{
56b53c0b 517 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
518 return;
519
2e0cc4d4 520 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
521 adev->gfx.rlc.funcs &&
522 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 523 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 524 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
525 } else if ((reg * 4) >= adev->rmmio_size) {
526 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
527 } else {
528 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 529 }
d38ceaf9
AD
530}
531
1bba3683
HZ
532/**
533 * amdgpu_device_indirect_rreg - read an indirect register
534 *
535 * @adev: amdgpu_device pointer
22f453fb 536 * @reg_addr: indirect register address to read from
1bba3683
HZ
537 *
538 * Returns the value of indirect register @reg_addr
539 */
540u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
541 u32 reg_addr)
542{
65ba96e9 543 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
544 void __iomem *pcie_index_offset;
545 void __iomem *pcie_data_offset;
65ba96e9
HZ
546 u32 r;
547
548 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
549 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
550
551 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
552 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
553 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
554
555 writel(reg_addr, pcie_index_offset);
556 readl(pcie_index_offset);
557 r = readl(pcie_data_offset);
558 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
559
560 return r;
561}
562
0c552ed3
LM
563u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
564 u64 reg_addr)
565{
566 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
567 u32 r;
568 void __iomem *pcie_index_offset;
569 void __iomem *pcie_index_hi_offset;
570 void __iomem *pcie_data_offset;
571
572 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
573 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
574 if (adev->nbio.funcs->get_pcie_index_hi_offset)
575 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
576 else
577 pcie_index_hi = 0;
578
579 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
580 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
581 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
582 if (pcie_index_hi != 0)
583 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
584 pcie_index_hi * 4;
585
586 writel(reg_addr, pcie_index_offset);
587 readl(pcie_index_offset);
588 if (pcie_index_hi != 0) {
589 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
590 readl(pcie_index_hi_offset);
591 }
592 r = readl(pcie_data_offset);
593
594 /* clear the high bits */
595 if (pcie_index_hi != 0) {
596 writel(0, pcie_index_hi_offset);
597 readl(pcie_index_hi_offset);
598 }
599
600 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
601
602 return r;
603}
604
1bba3683
HZ
605/**
606 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
607 *
608 * @adev: amdgpu_device pointer
22f453fb 609 * @reg_addr: indirect register address to read from
1bba3683
HZ
610 *
611 * Returns the value of indirect register @reg_addr
612 */
613u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
614 u32 reg_addr)
615{
65ba96e9 616 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
617 void __iomem *pcie_index_offset;
618 void __iomem *pcie_data_offset;
65ba96e9
HZ
619 u64 r;
620
621 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
622 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
623
624 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
625 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
626 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
627
628 /* read low 32 bits */
629 writel(reg_addr, pcie_index_offset);
630 readl(pcie_index_offset);
631 r = readl(pcie_data_offset);
632 /* read high 32 bits */
633 writel(reg_addr + 4, pcie_index_offset);
634 readl(pcie_index_offset);
635 r |= ((u64)readl(pcie_data_offset) << 32);
636 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
637
638 return r;
639}
640
641/**
642 * amdgpu_device_indirect_wreg - write an indirect register address
643 *
644 * @adev: amdgpu_device pointer
1bba3683
HZ
645 * @reg_addr: indirect register offset
646 * @reg_data: indirect register data
647 *
648 */
649void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
650 u32 reg_addr, u32 reg_data)
651{
65ba96e9 652 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
653 void __iomem *pcie_index_offset;
654 void __iomem *pcie_data_offset;
655
65ba96e9
HZ
656 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
657 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
658
1bba3683
HZ
659 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
660 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
661 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
662
663 writel(reg_addr, pcie_index_offset);
664 readl(pcie_index_offset);
665 writel(reg_data, pcie_data_offset);
666 readl(pcie_data_offset);
667 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
668}
669
0c552ed3
LM
670void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
671 u64 reg_addr, u32 reg_data)
672{
673 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
674 void __iomem *pcie_index_offset;
675 void __iomem *pcie_index_hi_offset;
676 void __iomem *pcie_data_offset;
677
678 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
679 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
680 if (adev->nbio.funcs->get_pcie_index_hi_offset)
681 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
682 else
683 pcie_index_hi = 0;
684
685 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
686 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
687 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
688 if (pcie_index_hi != 0)
689 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
690 pcie_index_hi * 4;
691
692 writel(reg_addr, pcie_index_offset);
693 readl(pcie_index_offset);
694 if (pcie_index_hi != 0) {
695 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
696 readl(pcie_index_hi_offset);
697 }
698 writel(reg_data, pcie_data_offset);
699 readl(pcie_data_offset);
700
701 /* clear the high bits */
702 if (pcie_index_hi != 0) {
703 writel(0, pcie_index_hi_offset);
704 readl(pcie_index_hi_offset);
705 }
706
707 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
708}
709
1bba3683
HZ
710/**
711 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
712 *
713 * @adev: amdgpu_device pointer
1bba3683
HZ
714 * @reg_addr: indirect register offset
715 * @reg_data: indirect register data
716 *
717 */
718void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
719 u32 reg_addr, u64 reg_data)
720{
65ba96e9 721 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
722 void __iomem *pcie_index_offset;
723 void __iomem *pcie_data_offset;
724
65ba96e9
HZ
725 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
726 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
727
1bba3683
HZ
728 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
729 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
730 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
731
732 /* write low 32 bits */
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
736 readl(pcie_data_offset);
737 /* write high 32 bits */
738 writel(reg_addr + 4, pcie_index_offset);
739 readl(pcie_index_offset);
740 writel((u32)(reg_data >> 32), pcie_data_offset);
741 readl(pcie_data_offset);
742 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
743}
744
dabc114e
HZ
745/**
746 * amdgpu_device_get_rev_id - query device rev_id
747 *
748 * @adev: amdgpu_device pointer
749 *
750 * Return device rev_id
751 */
752u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
753{
754 return adev->nbio.funcs->get_rev_id(adev);
755}
756
d38ceaf9
AD
757/**
758 * amdgpu_invalid_rreg - dummy reg read function
759 *
982a820b 760 * @adev: amdgpu_device pointer
d38ceaf9
AD
761 * @reg: offset of register
762 *
763 * Dummy register read function. Used for register blocks
764 * that certain asics don't have (all asics).
765 * Returns the value in the register.
766 */
767static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
768{
769 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
770 BUG();
771 return 0;
772}
773
0c552ed3
LM
774static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
775{
776 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
777 BUG();
778 return 0;
779}
780
d38ceaf9
AD
781/**
782 * amdgpu_invalid_wreg - dummy reg write function
783 *
982a820b 784 * @adev: amdgpu_device pointer
d38ceaf9
AD
785 * @reg: offset of register
786 * @v: value to write to the register
787 *
788 * Dummy register read function. Used for register blocks
789 * that certain asics don't have (all asics).
790 */
791static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
792{
793 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
794 reg, v);
795 BUG();
796}
797
0c552ed3
LM
798static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
799{
800 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
801 reg, v);
802 BUG();
803}
804
4fa1c6a6
TZ
805/**
806 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
807 *
982a820b 808 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
809 * @reg: offset of register
810 *
811 * Dummy register read function. Used for register blocks
812 * that certain asics don't have (all asics).
813 * Returns the value in the register.
814 */
815static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
816{
817 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
818 BUG();
819 return 0;
820}
821
822/**
823 * amdgpu_invalid_wreg64 - dummy reg write function
824 *
982a820b 825 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
826 * @reg: offset of register
827 * @v: value to write to the register
828 *
829 * Dummy register read function. Used for register blocks
830 * that certain asics don't have (all asics).
831 */
832static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
833{
834 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
835 reg, v);
836 BUG();
837}
838
d38ceaf9
AD
839/**
840 * amdgpu_block_invalid_rreg - dummy reg read function
841 *
982a820b 842 * @adev: amdgpu_device pointer
d38ceaf9
AD
843 * @block: offset of instance
844 * @reg: offset of register
845 *
846 * Dummy register read function. Used for register blocks
847 * that certain asics don't have (all asics).
848 * Returns the value in the register.
849 */
850static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
851 uint32_t block, uint32_t reg)
852{
853 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
854 reg, block);
855 BUG();
856 return 0;
857}
858
859/**
860 * amdgpu_block_invalid_wreg - dummy reg write function
861 *
982a820b 862 * @adev: amdgpu_device pointer
d38ceaf9
AD
863 * @block: offset of instance
864 * @reg: offset of register
865 * @v: value to write to the register
866 *
867 * Dummy register read function. Used for register blocks
868 * that certain asics don't have (all asics).
869 */
870static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
871 uint32_t block,
872 uint32_t reg, uint32_t v)
873{
874 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
875 reg, block, v);
876 BUG();
877}
878
4d2997ab
AD
879/**
880 * amdgpu_device_asic_init - Wrapper for atom asic_init
881 *
982a820b 882 * @adev: amdgpu_device pointer
4d2997ab
AD
883 *
884 * Does any asic specific work and then calls atom asic init.
885 */
886static int amdgpu_device_asic_init(struct amdgpu_device *adev)
887{
7656168a
LL
888 int ret;
889
4d2997ab
AD
890 amdgpu_asic_pre_asic_init(adev);
891
5db392a0 892 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
7656168a
LL
893 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
894 amdgpu_psp_wait_for_bootloader(adev);
895 ret = amdgpu_atomfirmware_asic_init(adev, true);
896 return ret;
897 } else {
85d1bcc6 898 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
7656168a
LL
899 }
900
901 return 0;
4d2997ab
AD
902}
903
e3ecdffa 904/**
7ccfd79f 905 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 906 *
982a820b 907 * @adev: amdgpu_device pointer
e3ecdffa
AD
908 *
909 * Allocates a scratch page of VRAM for use by various things in the
910 * driver.
911 */
7ccfd79f 912static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 913{
7ccfd79f
CK
914 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
915 AMDGPU_GEM_DOMAIN_VRAM |
916 AMDGPU_GEM_DOMAIN_GTT,
917 &adev->mem_scratch.robj,
918 &adev->mem_scratch.gpu_addr,
919 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
920}
921
e3ecdffa 922/**
7ccfd79f 923 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 924 *
982a820b 925 * @adev: amdgpu_device pointer
e3ecdffa
AD
926 *
927 * Frees the VRAM scratch page.
928 */
7ccfd79f 929static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 930{
7ccfd79f 931 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
932}
933
934/**
9c3f2b54 935 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
936 *
937 * @adev: amdgpu_device pointer
938 * @registers: pointer to the register array
939 * @array_size: size of the register array
940 *
b8920e1e 941 * Programs an array or registers with and or masks.
d38ceaf9
AD
942 * This is a helper for setting golden registers.
943 */
9c3f2b54
AD
944void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
945 const u32 *registers,
946 const u32 array_size)
d38ceaf9
AD
947{
948 u32 tmp, reg, and_mask, or_mask;
949 int i;
950
951 if (array_size % 3)
952 return;
953
47fc644f 954 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
955 reg = registers[i + 0];
956 and_mask = registers[i + 1];
957 or_mask = registers[i + 2];
958
959 if (and_mask == 0xffffffff) {
960 tmp = or_mask;
961 } else {
962 tmp = RREG32(reg);
963 tmp &= ~and_mask;
e0d07657
HZ
964 if (adev->family >= AMDGPU_FAMILY_AI)
965 tmp |= (or_mask & and_mask);
966 else
967 tmp |= or_mask;
d38ceaf9
AD
968 }
969 WREG32(reg, tmp);
970 }
971}
972
e3ecdffa
AD
973/**
974 * amdgpu_device_pci_config_reset - reset the GPU
975 *
976 * @adev: amdgpu_device pointer
977 *
978 * Resets the GPU using the pci config reset sequence.
979 * Only applicable to asics prior to vega10.
980 */
8111c387 981void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
982{
983 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
984}
985
af484df8
AD
986/**
987 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
988 *
989 * @adev: amdgpu_device pointer
990 *
991 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
992 */
993int amdgpu_device_pci_reset(struct amdgpu_device *adev)
994{
995 return pci_reset_function(adev->pdev);
996}
997
d38ceaf9 998/*
06ec9070 999 * amdgpu_device_wb_*()
455a7bc2 1000 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1001 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1002 */
1003
1004/**
06ec9070 1005 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1006 *
1007 * @adev: amdgpu_device pointer
1008 *
1009 * Disables Writeback and frees the Writeback memory (all asics).
1010 * Used at driver shutdown.
1011 */
06ec9070 1012static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1013{
1014 if (adev->wb.wb_obj) {
a76ed485
AD
1015 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1016 &adev->wb.gpu_addr,
1017 (void **)&adev->wb.wb);
d38ceaf9
AD
1018 adev->wb.wb_obj = NULL;
1019 }
1020}
1021
1022/**
03f2abb0 1023 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1024 *
1025 * @adev: amdgpu_device pointer
1026 *
455a7bc2 1027 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1028 * Used at driver startup.
1029 * Returns 0 on success or an -error on failure.
1030 */
06ec9070 1031static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1032{
1033 int r;
1034
1035 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1036 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1037 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1038 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1039 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1040 (void **)&adev->wb.wb);
d38ceaf9
AD
1041 if (r) {
1042 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1043 return r;
1044 }
d38ceaf9
AD
1045
1046 adev->wb.num_wb = AMDGPU_MAX_WB;
1047 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1048
1049 /* clear wb memory */
73469585 1050 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1051 }
1052
1053 return 0;
1054}
1055
1056/**
131b4b36 1057 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1058 *
1059 * @adev: amdgpu_device pointer
1060 * @wb: wb index
1061 *
1062 * Allocate a wb slot for use by the driver (all asics).
1063 * Returns 0 on success or -EINVAL on failure.
1064 */
131b4b36 1065int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1066{
1067 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1068
97407b63 1069 if (offset < adev->wb.num_wb) {
7014285a 1070 __set_bit(offset, adev->wb.used);
63ae07ca 1071 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1072 return 0;
1073 } else {
1074 return -EINVAL;
1075 }
1076}
1077
d38ceaf9 1078/**
131b4b36 1079 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1080 *
1081 * @adev: amdgpu_device pointer
1082 * @wb: wb index
1083 *
1084 * Free a wb slot allocated for use by the driver (all asics)
1085 */
131b4b36 1086void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1087{
73469585 1088 wb >>= 3;
d38ceaf9 1089 if (wb < adev->wb.num_wb)
73469585 1090 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1091}
1092
d6895ad3
CK
1093/**
1094 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1095 *
1096 * @adev: amdgpu_device pointer
1097 *
1098 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1099 * to fail, but if any of the BARs is not accessible after the size we abort
1100 * driver loading by returning -ENODEV.
1101 */
1102int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1103{
453f617a 1104 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1105 struct pci_bus *root;
1106 struct resource *res;
b8920e1e 1107 unsigned int i;
d6895ad3
CK
1108 u16 cmd;
1109 int r;
1110
822130b5
AB
1111 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1112 return 0;
1113
0c03b912 1114 /* Bypass for VF */
1115 if (amdgpu_sriov_vf(adev))
1116 return 0;
1117
b7221f2b
AD
1118 /* skip if the bios has already enabled large BAR */
1119 if (adev->gmc.real_vram_size &&
1120 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1121 return 0;
1122
31b8adab
CK
1123 /* Check if the root BUS has 64bit memory resources */
1124 root = adev->pdev->bus;
1125 while (root->parent)
1126 root = root->parent;
1127
1128 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1129 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1130 res->start > 0x100000000ull)
1131 break;
1132 }
1133
1134 /* Trying to resize is pointless without a root hub window above 4GB */
1135 if (!res)
1136 return 0;
1137
453f617a
ND
1138 /* Limit the BAR size to what is available */
1139 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1140 rbar_size);
1141
d6895ad3
CK
1142 /* Disable memory decoding while we change the BAR addresses and size */
1143 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1144 pci_write_config_word(adev->pdev, PCI_COMMAND,
1145 cmd & ~PCI_COMMAND_MEMORY);
1146
1147 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1148 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1149 if (adev->asic_type >= CHIP_BONAIRE)
1150 pci_release_resource(adev->pdev, 2);
1151
1152 pci_release_resource(adev->pdev, 0);
1153
1154 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1155 if (r == -ENOSPC)
1156 DRM_INFO("Not enough PCI address space for a large BAR.");
1157 else if (r && r != -ENOTSUPP)
1158 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1159
1160 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1161
1162 /* When the doorbell or fb BAR isn't available we have no chance of
1163 * using the device.
1164 */
43c064db 1165 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1166 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1167 return -ENODEV;
1168
1169 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1170
1171 return 0;
1172}
a05502e5 1173
9535a86a
SZ
1174static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1175{
b8920e1e 1176 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1177 return false;
9535a86a
SZ
1178
1179 return true;
1180}
1181
d38ceaf9
AD
1182/*
1183 * GPU helpers function.
1184 */
1185/**
39c640c0 1186 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1187 *
1188 * @adev: amdgpu_device pointer
1189 *
c836fec5
JQ
1190 * Check if the asic has been initialized (all asics) at driver startup
1191 * or post is needed if hw reset is performed.
1192 * Returns true if need or false if not.
d38ceaf9 1193 */
39c640c0 1194bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1195{
1196 uint32_t reg;
1197
bec86378
ML
1198 if (amdgpu_sriov_vf(adev))
1199 return false;
1200
9535a86a
SZ
1201 if (!amdgpu_device_read_bios(adev))
1202 return false;
1203
bec86378 1204 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1205 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1206 * some old smc fw still need driver do vPost otherwise gpu hang, while
1207 * those smc fw version above 22.15 doesn't have this flaw, so we force
1208 * vpost executed for smc version below 22.15
bec86378
ML
1209 */
1210 if (adev->asic_type == CHIP_FIJI) {
1211 int err;
1212 uint32_t fw_ver;
b8920e1e 1213
bec86378
ML
1214 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1215 /* force vPost if error occured */
1216 if (err)
1217 return true;
1218
1219 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1220 if (fw_ver < 0x00160e00)
1221 return true;
bec86378 1222 }
bec86378 1223 }
91fe77eb 1224
e3c1b071 1225 /* Don't post if we need to reset whole hive on init */
1226 if (adev->gmc.xgmi.pending_reset)
1227 return false;
1228
91fe77eb 1229 if (adev->has_hw_reset) {
1230 adev->has_hw_reset = false;
1231 return true;
1232 }
1233
1234 /* bios scratch used on CIK+ */
1235 if (adev->asic_type >= CHIP_BONAIRE)
1236 return amdgpu_atombios_scratch_need_asic_init(adev);
1237
1238 /* check MEM_SIZE for older asics */
1239 reg = amdgpu_asic_get_config_memsize(adev);
1240
1241 if ((reg != 0) && (reg != 0xffffffff))
1242 return false;
1243
1244 return true;
bec86378
ML
1245}
1246
5d1eb4c4
ML
1247/*
1248 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1249 * speed switching. Until we have confirmation from Intel that a specific host
1250 * supports it, it's safer that we keep it disabled for all.
1251 *
1252 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1253 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1254 */
1255bool amdgpu_device_pcie_dynamic_switching_supported(void)
1256{
1257#if IS_ENABLED(CONFIG_X86)
1258 struct cpuinfo_x86 *c = &cpu_data(0);
1259
1260 if (c->x86_vendor == X86_VENDOR_INTEL)
1261 return false;
1262#endif
1263 return true;
1264}
1265
0ab5d711
ML
1266/**
1267 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1268 *
1269 * @adev: amdgpu_device pointer
1270 *
1271 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1272 * be set for this device.
1273 *
1274 * Returns true if it should be used or false if not.
1275 */
1276bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1277{
1278 switch (amdgpu_aspm) {
1279 case -1:
1280 break;
1281 case 0:
1282 return false;
1283 case 1:
1284 return true;
1285 default:
1286 return false;
1287 }
1288 return pcie_aspm_enabled(adev->pdev);
1289}
1290
3ad5dcfe
KHF
1291bool amdgpu_device_aspm_support_quirk(void)
1292{
1293#if IS_ENABLED(CONFIG_X86)
1294 struct cpuinfo_x86 *c = &cpu_data(0);
1295
1296 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1297#else
1298 return true;
1299#endif
1300}
1301
d38ceaf9
AD
1302/* if we get transitioned to only one device, take VGA back */
1303/**
06ec9070 1304 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1305 *
bf44e8ce 1306 * @pdev: PCI device pointer
d38ceaf9
AD
1307 * @state: enable/disable vga decode
1308 *
1309 * Enable/disable vga decode (all asics).
1310 * Returns VGA resource flags.
1311 */
bf44e8ce
CH
1312static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1313 bool state)
d38ceaf9 1314{
bf44e8ce 1315 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1316
d38ceaf9
AD
1317 amdgpu_asic_set_vga_state(adev, state);
1318 if (state)
1319 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1320 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1321 else
1322 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1323}
1324
e3ecdffa
AD
1325/**
1326 * amdgpu_device_check_block_size - validate the vm block size
1327 *
1328 * @adev: amdgpu_device pointer
1329 *
1330 * Validates the vm block size specified via module parameter.
1331 * The vm block size defines number of bits in page table versus page directory,
1332 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1333 * page table and the remaining bits are in the page directory.
1334 */
06ec9070 1335static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1336{
1337 /* defines number of bits in page table versus page directory,
1338 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1339 * page table and the remaining bits are in the page directory
1340 */
bab4fee7
JZ
1341 if (amdgpu_vm_block_size == -1)
1342 return;
a1adf8be 1343
bab4fee7 1344 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1345 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1346 amdgpu_vm_block_size);
97489129 1347 amdgpu_vm_block_size = -1;
a1adf8be 1348 }
a1adf8be
CZ
1349}
1350
e3ecdffa
AD
1351/**
1352 * amdgpu_device_check_vm_size - validate the vm size
1353 *
1354 * @adev: amdgpu_device pointer
1355 *
1356 * Validates the vm size in GB specified via module parameter.
1357 * The VM size is the size of the GPU virtual memory space in GB.
1358 */
06ec9070 1359static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1360{
64dab074
AD
1361 /* no need to check the default value */
1362 if (amdgpu_vm_size == -1)
1363 return;
1364
83ca145d
ZJ
1365 if (amdgpu_vm_size < 1) {
1366 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1367 amdgpu_vm_size);
f3368128 1368 amdgpu_vm_size = -1;
83ca145d 1369 }
83ca145d
ZJ
1370}
1371
7951e376
RZ
1372static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1373{
1374 struct sysinfo si;
a9d4fe2f 1375 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1376 uint64_t total_memory;
1377 uint64_t dram_size_seven_GB = 0x1B8000000;
1378 uint64_t dram_size_three_GB = 0xB8000000;
1379
1380 if (amdgpu_smu_memory_pool_size == 0)
1381 return;
1382
1383 if (!is_os_64) {
1384 DRM_WARN("Not 64-bit OS, feature not supported\n");
1385 goto def_value;
1386 }
1387 si_meminfo(&si);
1388 total_memory = (uint64_t)si.totalram * si.mem_unit;
1389
1390 if ((amdgpu_smu_memory_pool_size == 1) ||
1391 (amdgpu_smu_memory_pool_size == 2)) {
1392 if (total_memory < dram_size_three_GB)
1393 goto def_value1;
1394 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1395 (amdgpu_smu_memory_pool_size == 8)) {
1396 if (total_memory < dram_size_seven_GB)
1397 goto def_value1;
1398 } else {
1399 DRM_WARN("Smu memory pool size not supported\n");
1400 goto def_value;
1401 }
1402 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1403
1404 return;
1405
1406def_value1:
1407 DRM_WARN("No enough system memory\n");
1408def_value:
1409 adev->pm.smu_prv_buffer_size = 0;
1410}
1411
9f6a7857
HR
1412static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1413{
1414 if (!(adev->flags & AMD_IS_APU) ||
1415 adev->asic_type < CHIP_RAVEN)
1416 return 0;
1417
1418 switch (adev->asic_type) {
1419 case CHIP_RAVEN:
1420 if (adev->pdev->device == 0x15dd)
1421 adev->apu_flags |= AMD_APU_IS_RAVEN;
1422 if (adev->pdev->device == 0x15d8)
1423 adev->apu_flags |= AMD_APU_IS_PICASSO;
1424 break;
1425 case CHIP_RENOIR:
1426 if ((adev->pdev->device == 0x1636) ||
1427 (adev->pdev->device == 0x164c))
1428 adev->apu_flags |= AMD_APU_IS_RENOIR;
1429 else
1430 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1431 break;
1432 case CHIP_VANGOGH:
1433 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1434 break;
1435 case CHIP_YELLOW_CARP:
1436 break;
d0f56dc2 1437 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1438 if ((adev->pdev->device == 0x13FE) ||
1439 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1440 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1441 break;
9f6a7857 1442 default:
4eaf21b7 1443 break;
9f6a7857
HR
1444 }
1445
1446 return 0;
1447}
1448
d38ceaf9 1449/**
06ec9070 1450 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1451 *
1452 * @adev: amdgpu_device pointer
1453 *
1454 * Validates certain module parameters and updates
1455 * the associated values used by the driver (all asics).
1456 */
912dfc84 1457static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1458{
5b011235
CZ
1459 if (amdgpu_sched_jobs < 4) {
1460 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1461 amdgpu_sched_jobs);
1462 amdgpu_sched_jobs = 4;
47fc644f 1463 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1464 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1465 amdgpu_sched_jobs);
1466 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1467 }
d38ceaf9 1468
83e74db6 1469 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1470 /* gart size must be greater or equal to 32M */
1471 dev_warn(adev->dev, "gart size (%d) too small\n",
1472 amdgpu_gart_size);
83e74db6 1473 amdgpu_gart_size = -1;
d38ceaf9
AD
1474 }
1475
36d38372 1476 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1477 /* gtt size must be greater or equal to 32M */
36d38372
CK
1478 dev_warn(adev->dev, "gtt size (%d) too small\n",
1479 amdgpu_gtt_size);
1480 amdgpu_gtt_size = -1;
d38ceaf9
AD
1481 }
1482
d07f14be
RH
1483 /* valid range is between 4 and 9 inclusive */
1484 if (amdgpu_vm_fragment_size != -1 &&
1485 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1486 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1487 amdgpu_vm_fragment_size = -1;
1488 }
1489
5d5bd5e3
KW
1490 if (amdgpu_sched_hw_submission < 2) {
1491 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1492 amdgpu_sched_hw_submission);
1493 amdgpu_sched_hw_submission = 2;
1494 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1495 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1496 amdgpu_sched_hw_submission);
1497 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1498 }
1499
2656fd23
AG
1500 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1501 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1502 amdgpu_reset_method = -1;
1503 }
1504
7951e376
RZ
1505 amdgpu_device_check_smu_prv_buffer_size(adev);
1506
06ec9070 1507 amdgpu_device_check_vm_size(adev);
d38ceaf9 1508
06ec9070 1509 amdgpu_device_check_block_size(adev);
6a7f76e7 1510
19aede77 1511 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1512
e3c00faa 1513 return 0;
d38ceaf9
AD
1514}
1515
1516/**
1517 * amdgpu_switcheroo_set_state - set switcheroo state
1518 *
1519 * @pdev: pci dev pointer
1694467b 1520 * @state: vga_switcheroo state
d38ceaf9 1521 *
12024b17 1522 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1523 * the asics before or after it is powered up using ACPI methods.
1524 */
8aba21b7
LT
1525static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1526 enum vga_switcheroo_state state)
d38ceaf9
AD
1527{
1528 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1529 int r;
d38ceaf9 1530
b98c6299 1531 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1532 return;
1533
1534 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1535 pr_info("switched on\n");
d38ceaf9
AD
1536 /* don't suspend or resume card normally */
1537 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1538
8f66090b
TZ
1539 pci_set_power_state(pdev, PCI_D0);
1540 amdgpu_device_load_pci_state(pdev);
1541 r = pci_enable_device(pdev);
de185019
AD
1542 if (r)
1543 DRM_WARN("pci_enable_device failed (%d)\n", r);
1544 amdgpu_device_resume(dev, true);
d38ceaf9 1545
d38ceaf9 1546 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1547 } else {
dd4fa6c1 1548 pr_info("switched off\n");
d38ceaf9 1549 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1550 amdgpu_device_suspend(dev, true);
8f66090b 1551 amdgpu_device_cache_pci_state(pdev);
de185019 1552 /* Shut down the device */
8f66090b
TZ
1553 pci_disable_device(pdev);
1554 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1555 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1556 }
1557}
1558
1559/**
1560 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1561 *
1562 * @pdev: pci dev pointer
1563 *
1564 * Callback for the switcheroo driver. Check of the switcheroo
1565 * state can be changed.
1566 * Returns true if the state can be changed, false if not.
1567 */
1568static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1569{
1570 struct drm_device *dev = pci_get_drvdata(pdev);
1571
b8920e1e 1572 /*
d38ceaf9
AD
1573 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1574 * locking inversion with the driver load path. And the access here is
1575 * completely racy anyway. So don't bother with locking for now.
1576 */
7e13ad89 1577 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1578}
1579
1580static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1581 .set_gpu_state = amdgpu_switcheroo_set_state,
1582 .reprobe = NULL,
1583 .can_switch = amdgpu_switcheroo_can_switch,
1584};
1585
e3ecdffa
AD
1586/**
1587 * amdgpu_device_ip_set_clockgating_state - set the CG state
1588 *
87e3f136 1589 * @dev: amdgpu_device pointer
e3ecdffa
AD
1590 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1591 * @state: clockgating state (gate or ungate)
1592 *
1593 * Sets the requested clockgating state for all instances of
1594 * the hardware IP specified.
1595 * Returns the error code from the last instance.
1596 */
43fa561f 1597int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1598 enum amd_ip_block_type block_type,
1599 enum amd_clockgating_state state)
d38ceaf9 1600{
43fa561f 1601 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1602 int i, r = 0;
1603
1604 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1605 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1606 continue;
c722865a
RZ
1607 if (adev->ip_blocks[i].version->type != block_type)
1608 continue;
1609 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1610 continue;
1611 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1612 (void *)adev, state);
1613 if (r)
1614 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1615 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1616 }
1617 return r;
1618}
1619
e3ecdffa
AD
1620/**
1621 * amdgpu_device_ip_set_powergating_state - set the PG state
1622 *
87e3f136 1623 * @dev: amdgpu_device pointer
e3ecdffa
AD
1624 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1625 * @state: powergating state (gate or ungate)
1626 *
1627 * Sets the requested powergating state for all instances of
1628 * the hardware IP specified.
1629 * Returns the error code from the last instance.
1630 */
43fa561f 1631int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1632 enum amd_ip_block_type block_type,
1633 enum amd_powergating_state state)
d38ceaf9 1634{
43fa561f 1635 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1636 int i, r = 0;
1637
1638 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1639 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1640 continue;
c722865a
RZ
1641 if (adev->ip_blocks[i].version->type != block_type)
1642 continue;
1643 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1644 continue;
1645 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1646 (void *)adev, state);
1647 if (r)
1648 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1649 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1650 }
1651 return r;
1652}
1653
e3ecdffa
AD
1654/**
1655 * amdgpu_device_ip_get_clockgating_state - get the CG state
1656 *
1657 * @adev: amdgpu_device pointer
1658 * @flags: clockgating feature flags
1659 *
1660 * Walks the list of IPs on the device and updates the clockgating
1661 * flags for each IP.
1662 * Updates @flags with the feature flags for each hardware IP where
1663 * clockgating is enabled.
1664 */
2990a1fc 1665void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1666 u64 *flags)
6cb2d4e4
HR
1667{
1668 int i;
1669
1670 for (i = 0; i < adev->num_ip_blocks; i++) {
1671 if (!adev->ip_blocks[i].status.valid)
1672 continue;
1673 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1674 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1675 }
1676}
1677
e3ecdffa
AD
1678/**
1679 * amdgpu_device_ip_wait_for_idle - wait for idle
1680 *
1681 * @adev: amdgpu_device pointer
1682 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1683 *
1684 * Waits for the request hardware IP to be idle.
1685 * Returns 0 for success or a negative error code on failure.
1686 */
2990a1fc
AD
1687int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1688 enum amd_ip_block_type block_type)
5dbbb60b
AD
1689{
1690 int i, r;
1691
1692 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1693 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1694 continue;
a1255107
AD
1695 if (adev->ip_blocks[i].version->type == block_type) {
1696 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1697 if (r)
1698 return r;
1699 break;
1700 }
1701 }
1702 return 0;
1703
1704}
1705
e3ecdffa
AD
1706/**
1707 * amdgpu_device_ip_is_idle - is the hardware IP idle
1708 *
1709 * @adev: amdgpu_device pointer
1710 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1711 *
1712 * Check if the hardware IP is idle or not.
1713 * Returns true if it the IP is idle, false if not.
1714 */
2990a1fc
AD
1715bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1716 enum amd_ip_block_type block_type)
5dbbb60b
AD
1717{
1718 int i;
1719
1720 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1721 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1722 continue;
a1255107
AD
1723 if (adev->ip_blocks[i].version->type == block_type)
1724 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1725 }
1726 return true;
1727
1728}
1729
e3ecdffa
AD
1730/**
1731 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1732 *
1733 * @adev: amdgpu_device pointer
87e3f136 1734 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1735 *
1736 * Returns a pointer to the hardware IP block structure
1737 * if it exists for the asic, otherwise NULL.
1738 */
2990a1fc
AD
1739struct amdgpu_ip_block *
1740amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1741 enum amd_ip_block_type type)
d38ceaf9
AD
1742{
1743 int i;
1744
1745 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1746 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1747 return &adev->ip_blocks[i];
1748
1749 return NULL;
1750}
1751
1752/**
2990a1fc 1753 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1754 *
1755 * @adev: amdgpu_device pointer
5fc3aeeb 1756 * @type: enum amd_ip_block_type
d38ceaf9
AD
1757 * @major: major version
1758 * @minor: minor version
1759 *
1760 * return 0 if equal or greater
1761 * return 1 if smaller or the ip_block doesn't exist
1762 */
2990a1fc
AD
1763int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1764 enum amd_ip_block_type type,
1765 u32 major, u32 minor)
d38ceaf9 1766{
2990a1fc 1767 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1768
a1255107
AD
1769 if (ip_block && ((ip_block->version->major > major) ||
1770 ((ip_block->version->major == major) &&
1771 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1772 return 0;
1773
1774 return 1;
1775}
1776
a1255107 1777/**
2990a1fc 1778 * amdgpu_device_ip_block_add
a1255107
AD
1779 *
1780 * @adev: amdgpu_device pointer
1781 * @ip_block_version: pointer to the IP to add
1782 *
1783 * Adds the IP block driver information to the collection of IPs
1784 * on the asic.
1785 */
2990a1fc
AD
1786int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1787 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1788{
1789 if (!ip_block_version)
1790 return -EINVAL;
1791
7bd939d0
LG
1792 switch (ip_block_version->type) {
1793 case AMD_IP_BLOCK_TYPE_VCN:
1794 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1795 return 0;
1796 break;
1797 case AMD_IP_BLOCK_TYPE_JPEG:
1798 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1799 return 0;
1800 break;
1801 default:
1802 break;
1803 }
1804
e966a725 1805 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1806 ip_block_version->funcs->name);
1807
a1255107
AD
1808 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1809
1810 return 0;
1811}
1812
e3ecdffa
AD
1813/**
1814 * amdgpu_device_enable_virtual_display - enable virtual display feature
1815 *
1816 * @adev: amdgpu_device pointer
1817 *
1818 * Enabled the virtual display feature if the user has enabled it via
1819 * the module parameter virtual_display. This feature provides a virtual
1820 * display hardware on headless boards or in virtualized environments.
1821 * This function parses and validates the configuration string specified by
1822 * the user and configues the virtual display configuration (number of
1823 * virtual connectors, crtcs, etc.) specified.
1824 */
483ef985 1825static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1826{
1827 adev->enable_virtual_display = false;
1828
1829 if (amdgpu_virtual_display) {
8f66090b 1830 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1831 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1832
1833 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1834 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1835 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1836 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1837 if (!strcmp("all", pciaddname)
1838 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1839 long num_crtc;
1840 int res = -1;
1841
9accf2fd 1842 adev->enable_virtual_display = true;
0f66356d
ED
1843
1844 if (pciaddname_tmp)
1845 res = kstrtol(pciaddname_tmp, 10,
1846 &num_crtc);
1847
1848 if (!res) {
1849 if (num_crtc < 1)
1850 num_crtc = 1;
1851 if (num_crtc > 6)
1852 num_crtc = 6;
1853 adev->mode_info.num_crtc = num_crtc;
1854 } else {
1855 adev->mode_info.num_crtc = 1;
1856 }
9accf2fd
ED
1857 break;
1858 }
1859 }
1860
0f66356d
ED
1861 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1862 amdgpu_virtual_display, pci_address_name,
1863 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1864
1865 kfree(pciaddstr);
1866 }
1867}
1868
25263da3
AD
1869void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1870{
1871 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1872 adev->mode_info.num_crtc = 1;
1873 adev->enable_virtual_display = true;
1874 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1875 adev->enable_virtual_display, adev->mode_info.num_crtc);
1876 }
1877}
1878
e3ecdffa
AD
1879/**
1880 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1881 *
1882 * @adev: amdgpu_device pointer
1883 *
1884 * Parses the asic configuration parameters specified in the gpu info
1885 * firmware and makes them availale to the driver for use in configuring
1886 * the asic.
1887 * Returns 0 on success, -EINVAL on failure.
1888 */
e2a75f88
AD
1889static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1890{
e2a75f88 1891 const char *chip_name;
c0a43457 1892 char fw_name[40];
e2a75f88
AD
1893 int err;
1894 const struct gpu_info_firmware_header_v1_0 *hdr;
1895
ab4fe3e1
HR
1896 adev->firmware.gpu_info_fw = NULL;
1897
72de33f8 1898 if (adev->mman.discovery_bin) {
cc375d8c
TY
1899 /*
1900 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1901 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1902 * when DAL no longer needs it.
1903 */
1904 if (adev->asic_type != CHIP_NAVI12)
1905 return 0;
258620d0
AD
1906 }
1907
e2a75f88 1908 switch (adev->asic_type) {
e2a75f88
AD
1909 default:
1910 return 0;
1911 case CHIP_VEGA10:
1912 chip_name = "vega10";
1913 break;
3f76dced
AD
1914 case CHIP_VEGA12:
1915 chip_name = "vega12";
1916 break;
2d2e5e7e 1917 case CHIP_RAVEN:
54f78a76 1918 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1919 chip_name = "raven2";
54f78a76 1920 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1921 chip_name = "picasso";
54c4d17e
FX
1922 else
1923 chip_name = "raven";
2d2e5e7e 1924 break;
65e60f6e
LM
1925 case CHIP_ARCTURUS:
1926 chip_name = "arcturus";
1927 break;
42b325e5
XY
1928 case CHIP_NAVI12:
1929 chip_name = "navi12";
1930 break;
e2a75f88
AD
1931 }
1932
1933 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1934 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1935 if (err) {
1936 dev_err(adev->dev,
b31d3063 1937 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1938 fw_name);
1939 goto out;
1940 }
1941
ab4fe3e1 1942 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1943 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1944
1945 switch (hdr->version_major) {
1946 case 1:
1947 {
1948 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1949 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1950 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1951
cc375d8c
TY
1952 /*
1953 * Should be droped when DAL no longer needs it.
1954 */
1955 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1956 goto parse_soc_bounding_box;
1957
b5ab16bf
AD
1958 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1959 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1960 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1961 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1962 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1963 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1964 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1965 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1966 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1967 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1968 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1969 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1970 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1971 adev->gfx.cu_info.max_waves_per_simd =
1972 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1973 adev->gfx.cu_info.max_scratch_slots_per_cu =
1974 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1975 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1976 if (hdr->version_minor >= 1) {
35c2e910
HZ
1977 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1978 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1979 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1980 adev->gfx.config.num_sc_per_sh =
1981 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1982 adev->gfx.config.num_packer_per_sc =
1983 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1984 }
ec51d3fa
XY
1985
1986parse_soc_bounding_box:
ec51d3fa
XY
1987 /*
1988 * soc bounding box info is not integrated in disocovery table,
258620d0 1989 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1990 */
48321c3d
HW
1991 if (hdr->version_minor == 2) {
1992 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1993 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1994 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1995 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1996 }
e2a75f88
AD
1997 break;
1998 }
1999 default:
2000 dev_err(adev->dev,
2001 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2002 err = -EINVAL;
2003 goto out;
2004 }
2005out:
e2a75f88
AD
2006 return err;
2007}
2008
e3ecdffa
AD
2009/**
2010 * amdgpu_device_ip_early_init - run early init for hardware IPs
2011 *
2012 * @adev: amdgpu_device pointer
2013 *
2014 * Early initialization pass for hardware IPs. The hardware IPs that make
2015 * up each asic are discovered each IP's early_init callback is run. This
2016 * is the first stage in initializing the asic.
2017 * Returns 0 on success, negative error code on failure.
2018 */
06ec9070 2019static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2020{
901e2be2
AD
2021 struct drm_device *dev = adev_to_drm(adev);
2022 struct pci_dev *parent;
aaa36a97 2023 int i, r;
ced69502 2024 bool total;
d38ceaf9 2025
483ef985 2026 amdgpu_device_enable_virtual_display(adev);
a6be7570 2027
00a979f3 2028 if (amdgpu_sriov_vf(adev)) {
00a979f3 2029 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2030 if (r)
2031 return r;
00a979f3
WS
2032 }
2033
d38ceaf9 2034 switch (adev->asic_type) {
33f34802
KW
2035#ifdef CONFIG_DRM_AMDGPU_SI
2036 case CHIP_VERDE:
2037 case CHIP_TAHITI:
2038 case CHIP_PITCAIRN:
2039 case CHIP_OLAND:
2040 case CHIP_HAINAN:
295d0daf 2041 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2042 r = si_set_ip_blocks(adev);
2043 if (r)
2044 return r;
2045 break;
2046#endif
a2e73f56
AD
2047#ifdef CONFIG_DRM_AMDGPU_CIK
2048 case CHIP_BONAIRE:
2049 case CHIP_HAWAII:
2050 case CHIP_KAVERI:
2051 case CHIP_KABINI:
2052 case CHIP_MULLINS:
e1ad2d53 2053 if (adev->flags & AMD_IS_APU)
a2e73f56 2054 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2055 else
2056 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2057
2058 r = cik_set_ip_blocks(adev);
2059 if (r)
2060 return r;
2061 break;
2062#endif
da87c30b
AD
2063 case CHIP_TOPAZ:
2064 case CHIP_TONGA:
2065 case CHIP_FIJI:
2066 case CHIP_POLARIS10:
2067 case CHIP_POLARIS11:
2068 case CHIP_POLARIS12:
2069 case CHIP_VEGAM:
2070 case CHIP_CARRIZO:
2071 case CHIP_STONEY:
2072 if (adev->flags & AMD_IS_APU)
2073 adev->family = AMDGPU_FAMILY_CZ;
2074 else
2075 adev->family = AMDGPU_FAMILY_VI;
2076
2077 r = vi_set_ip_blocks(adev);
2078 if (r)
2079 return r;
2080 break;
d38ceaf9 2081 default:
63352b7f
AD
2082 r = amdgpu_discovery_set_ip_blocks(adev);
2083 if (r)
2084 return r;
2085 break;
d38ceaf9
AD
2086 }
2087
901e2be2
AD
2088 if (amdgpu_has_atpx() &&
2089 (amdgpu_is_atpx_hybrid() ||
2090 amdgpu_has_atpx_dgpu_power_cntl()) &&
2091 ((adev->flags & AMD_IS_APU) == 0) &&
2092 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2093 adev->flags |= AMD_IS_PX;
2094
85ac2021
AD
2095 if (!(adev->flags & AMD_IS_APU)) {
2096 parent = pci_upstream_bridge(adev->pdev);
2097 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2098 }
901e2be2 2099
1884734a 2100
3b94fb10 2101 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2102 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2103 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2104 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2105 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2106
ced69502 2107 total = true;
d38ceaf9
AD
2108 for (i = 0; i < adev->num_ip_blocks; i++) {
2109 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2110 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2111 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2112 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2113 } else {
a1255107
AD
2114 if (adev->ip_blocks[i].version->funcs->early_init) {
2115 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2116 if (r == -ENOENT) {
a1255107 2117 adev->ip_blocks[i].status.valid = false;
2c1a2784 2118 } else if (r) {
a1255107
AD
2119 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2120 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2121 total = false;
2c1a2784 2122 } else {
a1255107 2123 adev->ip_blocks[i].status.valid = true;
2c1a2784 2124 }
974e6b64 2125 } else {
a1255107 2126 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2127 }
d38ceaf9 2128 }
21a249ca
AD
2129 /* get the vbios after the asic_funcs are set up */
2130 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2131 r = amdgpu_device_parse_gpu_info_fw(adev);
2132 if (r)
2133 return r;
2134
21a249ca 2135 /* Read BIOS */
9535a86a
SZ
2136 if (amdgpu_device_read_bios(adev)) {
2137 if (!amdgpu_get_bios(adev))
2138 return -EINVAL;
21a249ca 2139
9535a86a
SZ
2140 r = amdgpu_atombios_init(adev);
2141 if (r) {
2142 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2143 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2144 return r;
2145 }
21a249ca 2146 }
77eabc6f
PJZ
2147
2148 /*get pf2vf msg info at it's earliest time*/
2149 if (amdgpu_sriov_vf(adev))
2150 amdgpu_virt_init_data_exchange(adev);
2151
21a249ca 2152 }
d38ceaf9 2153 }
ced69502
ML
2154 if (!total)
2155 return -ENODEV;
d38ceaf9 2156
00fa4035 2157 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2158 adev->cg_flags &= amdgpu_cg_mask;
2159 adev->pg_flags &= amdgpu_pg_mask;
2160
d38ceaf9
AD
2161 return 0;
2162}
2163
0a4f2520
RZ
2164static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2165{
2166 int i, r;
2167
2168 for (i = 0; i < adev->num_ip_blocks; i++) {
2169 if (!adev->ip_blocks[i].status.sw)
2170 continue;
2171 if (adev->ip_blocks[i].status.hw)
2172 continue;
2173 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2174 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2175 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2176 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2177 if (r) {
2178 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2179 adev->ip_blocks[i].version->funcs->name, r);
2180 return r;
2181 }
2182 adev->ip_blocks[i].status.hw = true;
2183 }
2184 }
2185
2186 return 0;
2187}
2188
2189static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2190{
2191 int i, r;
2192
2193 for (i = 0; i < adev->num_ip_blocks; i++) {
2194 if (!adev->ip_blocks[i].status.sw)
2195 continue;
2196 if (adev->ip_blocks[i].status.hw)
2197 continue;
2198 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2199 if (r) {
2200 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2201 adev->ip_blocks[i].version->funcs->name, r);
2202 return r;
2203 }
2204 adev->ip_blocks[i].status.hw = true;
2205 }
2206
2207 return 0;
2208}
2209
7a3e0bb2
RZ
2210static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2211{
2212 int r = 0;
2213 int i;
80f41f84 2214 uint32_t smu_version;
7a3e0bb2
RZ
2215
2216 if (adev->asic_type >= CHIP_VEGA10) {
2217 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2218 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2219 continue;
2220
e3c1b071 2221 if (!adev->ip_blocks[i].status.sw)
2222 continue;
2223
482f0e53
ML
2224 /* no need to do the fw loading again if already done*/
2225 if (adev->ip_blocks[i].status.hw == true)
2226 break;
2227
53b3f8f4 2228 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2229 r = adev->ip_blocks[i].version->funcs->resume(adev);
2230 if (r) {
2231 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2232 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2233 return r;
2234 }
2235 } else {
2236 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2237 if (r) {
2238 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2239 adev->ip_blocks[i].version->funcs->name, r);
2240 return r;
7a3e0bb2 2241 }
7a3e0bb2 2242 }
482f0e53
ML
2243
2244 adev->ip_blocks[i].status.hw = true;
2245 break;
7a3e0bb2
RZ
2246 }
2247 }
482f0e53 2248
8973d9ec
ED
2249 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2250 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2251
80f41f84 2252 return r;
7a3e0bb2
RZ
2253}
2254
5fd8518d
AG
2255static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2256{
2257 long timeout;
2258 int r, i;
2259
2260 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2261 struct amdgpu_ring *ring = adev->rings[i];
2262
2263 /* No need to setup the GPU scheduler for rings that don't need it */
2264 if (!ring || ring->no_scheduler)
2265 continue;
2266
2267 switch (ring->funcs->type) {
2268 case AMDGPU_RING_TYPE_GFX:
2269 timeout = adev->gfx_timeout;
2270 break;
2271 case AMDGPU_RING_TYPE_COMPUTE:
2272 timeout = adev->compute_timeout;
2273 break;
2274 case AMDGPU_RING_TYPE_SDMA:
2275 timeout = adev->sdma_timeout;
2276 break;
2277 default:
2278 timeout = adev->video_timeout;
2279 break;
2280 }
2281
2282 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
56e44960 2283 DRM_SCHED_PRIORITY_COUNT,
11f25c84 2284 ring->num_hw_submission, 0,
8ab62eda
JG
2285 timeout, adev->reset_domain->wq,
2286 ring->sched_score, ring->name,
2287 adev->dev);
5fd8518d
AG
2288 if (r) {
2289 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2290 ring->name);
2291 return r;
2292 }
2293 }
2294
d425c6f4
JZ
2295 amdgpu_xcp_update_partition_sched_list(adev);
2296
5fd8518d
AG
2297 return 0;
2298}
2299
2300
e3ecdffa
AD
2301/**
2302 * amdgpu_device_ip_init - run init for hardware IPs
2303 *
2304 * @adev: amdgpu_device pointer
2305 *
2306 * Main initialization pass for hardware IPs. The list of all the hardware
2307 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2308 * are run. sw_init initializes the software state associated with each IP
2309 * and hw_init initializes the hardware associated with each IP.
2310 * Returns 0 on success, negative error code on failure.
2311 */
06ec9070 2312static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2313{
2314 int i, r;
2315
c030f2e4 2316 r = amdgpu_ras_init(adev);
2317 if (r)
2318 return r;
2319
d38ceaf9 2320 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2321 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2322 continue;
a1255107 2323 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2324 if (r) {
a1255107
AD
2325 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2326 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2327 goto init_failed;
2c1a2784 2328 }
a1255107 2329 adev->ip_blocks[i].status.sw = true;
bfca0289 2330
c1c39032
AD
2331 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2332 /* need to do common hw init early so everything is set up for gmc */
2333 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2334 if (r) {
2335 DRM_ERROR("hw_init %d failed %d\n", i, r);
2336 goto init_failed;
2337 }
2338 adev->ip_blocks[i].status.hw = true;
2339 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2340 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2341 /* Try to reserve bad pages early */
2342 if (amdgpu_sriov_vf(adev))
2343 amdgpu_virt_exchange_data(adev);
2344
7ccfd79f 2345 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2346 if (r) {
7ccfd79f 2347 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2348 goto init_failed;
2c1a2784 2349 }
a1255107 2350 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2351 if (r) {
2352 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2353 goto init_failed;
2c1a2784 2354 }
06ec9070 2355 r = amdgpu_device_wb_init(adev);
2c1a2784 2356 if (r) {
06ec9070 2357 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2358 goto init_failed;
2c1a2784 2359 }
a1255107 2360 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2361
2362 /* right after GMC hw init, we create CSA */
02ff519e 2363 if (adev->gfx.mcbp) {
1e256e27 2364 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2365 AMDGPU_GEM_DOMAIN_VRAM |
2366 AMDGPU_GEM_DOMAIN_GTT,
2367 AMDGPU_CSA_SIZE);
2493664f
ML
2368 if (r) {
2369 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2370 goto init_failed;
2493664f
ML
2371 }
2372 }
d38ceaf9
AD
2373 }
2374 }
2375
c9ffa427 2376 if (amdgpu_sriov_vf(adev))
22c16d25 2377 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2378
533aed27
AG
2379 r = amdgpu_ib_pool_init(adev);
2380 if (r) {
2381 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2382 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2383 goto init_failed;
2384 }
2385
c8963ea4
RZ
2386 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2387 if (r)
72d3f592 2388 goto init_failed;
0a4f2520
RZ
2389
2390 r = amdgpu_device_ip_hw_init_phase1(adev);
2391 if (r)
72d3f592 2392 goto init_failed;
0a4f2520 2393
7a3e0bb2
RZ
2394 r = amdgpu_device_fw_loading(adev);
2395 if (r)
72d3f592 2396 goto init_failed;
7a3e0bb2 2397
0a4f2520
RZ
2398 r = amdgpu_device_ip_hw_init_phase2(adev);
2399 if (r)
72d3f592 2400 goto init_failed;
d38ceaf9 2401
121a2bc6
AG
2402 /*
2403 * retired pages will be loaded from eeprom and reserved here,
2404 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2405 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2406 * for I2C communication which only true at this point.
b82e65a9
GC
2407 *
2408 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2409 * failure from bad gpu situation and stop amdgpu init process
2410 * accordingly. For other failed cases, it will still release all
2411 * the resource and print error message, rather than returning one
2412 * negative value to upper level.
121a2bc6
AG
2413 *
2414 * Note: theoretically, this should be called before all vram allocations
2415 * to protect retired page from abusing
2416 */
b82e65a9
GC
2417 r = amdgpu_ras_recovery_init(adev);
2418 if (r)
2419 goto init_failed;
121a2bc6 2420
cfbb6b00
AG
2421 /**
2422 * In case of XGMI grab extra reference for reset domain for this device
2423 */
a4c63caf 2424 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2425 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2426 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2427 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2428
dfd0287b
LH
2429 if (WARN_ON(!hive)) {
2430 r = -ENOENT;
2431 goto init_failed;
2432 }
2433
46c67660 2434 if (!hive->reset_domain ||
2435 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2436 r = -ENOENT;
2437 amdgpu_put_xgmi_hive(hive);
2438 goto init_failed;
2439 }
2440
2441 /* Drop the early temporary reset domain we created for device */
2442 amdgpu_reset_put_reset_domain(adev->reset_domain);
2443 adev->reset_domain = hive->reset_domain;
9dfa4860 2444 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2445 }
a4c63caf
AG
2446 }
2447 }
2448
5fd8518d
AG
2449 r = amdgpu_device_init_schedulers(adev);
2450 if (r)
2451 goto init_failed;
e3c1b071 2452
2453 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2454 if (!adev->gmc.xgmi.pending_reset) {
2455 kgd2kfd_init_zone_device(adev);
e3c1b071 2456 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2457 }
c6332b97 2458
bd607166
KR
2459 amdgpu_fru_get_product_info(adev);
2460
72d3f592 2461init_failed:
c6332b97 2462
72d3f592 2463 return r;
d38ceaf9
AD
2464}
2465
e3ecdffa
AD
2466/**
2467 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2468 *
2469 * @adev: amdgpu_device pointer
2470 *
2471 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2472 * this function before a GPU reset. If the value is retained after a
2473 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2474 */
06ec9070 2475static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2476{
2477 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2478}
2479
e3ecdffa
AD
2480/**
2481 * amdgpu_device_check_vram_lost - check if vram is valid
2482 *
2483 * @adev: amdgpu_device pointer
2484 *
2485 * Checks the reset magic value written to the gart pointer in VRAM.
2486 * The driver calls this after a GPU reset to see if the contents of
2487 * VRAM is lost or now.
2488 * returns true if vram is lost, false if not.
2489 */
06ec9070 2490static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2491{
dadce777
EQ
2492 if (memcmp(adev->gart.ptr, adev->reset_magic,
2493 AMDGPU_RESET_MAGIC_NUM))
2494 return true;
2495
53b3f8f4 2496 if (!amdgpu_in_reset(adev))
dadce777
EQ
2497 return false;
2498
2499 /*
2500 * For all ASICs with baco/mode1 reset, the VRAM is
2501 * always assumed to be lost.
2502 */
2503 switch (amdgpu_asic_reset_method(adev)) {
2504 case AMD_RESET_METHOD_BACO:
2505 case AMD_RESET_METHOD_MODE1:
2506 return true;
2507 default:
2508 return false;
2509 }
0c49e0b8
CZ
2510}
2511
e3ecdffa 2512/**
1112a46b 2513 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2514 *
2515 * @adev: amdgpu_device pointer
b8b72130 2516 * @state: clockgating state (gate or ungate)
e3ecdffa 2517 *
e3ecdffa 2518 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2519 * set_clockgating_state callbacks are run.
2520 * Late initialization pass enabling clockgating for hardware IPs.
2521 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2522 * Returns 0 on success, negative error code on failure.
2523 */
fdd34271 2524
5d89bb2d
LL
2525int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2526 enum amd_clockgating_state state)
d38ceaf9 2527{
1112a46b 2528 int i, j, r;
d38ceaf9 2529
4a2ba394
SL
2530 if (amdgpu_emu_mode == 1)
2531 return 0;
2532
1112a46b
RZ
2533 for (j = 0; j < adev->num_ip_blocks; j++) {
2534 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2535 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2536 continue;
47198eb7 2537 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2538 if (adev->in_s0ix &&
47198eb7
AD
2539 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2540 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2541 continue;
4a446d55 2542 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2543 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2544 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2545 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2546 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2547 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2548 /* enable clockgating to save power */
a1255107 2549 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2550 state);
4a446d55
AD
2551 if (r) {
2552 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2553 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2554 return r;
2555 }
b0b00ff1 2556 }
d38ceaf9 2557 }
06b18f61 2558
c9f96fd5
RZ
2559 return 0;
2560}
2561
5d89bb2d
LL
2562int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2563 enum amd_powergating_state state)
c9f96fd5 2564{
1112a46b 2565 int i, j, r;
06b18f61 2566
c9f96fd5
RZ
2567 if (amdgpu_emu_mode == 1)
2568 return 0;
2569
1112a46b
RZ
2570 for (j = 0; j < adev->num_ip_blocks; j++) {
2571 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2572 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2573 continue;
47198eb7 2574 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2575 if (adev->in_s0ix &&
47198eb7
AD
2576 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2577 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2578 continue;
c9f96fd5
RZ
2579 /* skip CG for VCE/UVD, it's handled specially */
2580 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2581 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2582 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2583 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2584 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2585 /* enable powergating to save power */
2586 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2587 state);
c9f96fd5
RZ
2588 if (r) {
2589 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2590 adev->ip_blocks[i].version->funcs->name, r);
2591 return r;
2592 }
2593 }
2594 }
2dc80b00
S
2595 return 0;
2596}
2597
beff74bc
AD
2598static int amdgpu_device_enable_mgpu_fan_boost(void)
2599{
2600 struct amdgpu_gpu_instance *gpu_ins;
2601 struct amdgpu_device *adev;
2602 int i, ret = 0;
2603
2604 mutex_lock(&mgpu_info.mutex);
2605
2606 /*
2607 * MGPU fan boost feature should be enabled
2608 * only when there are two or more dGPUs in
2609 * the system
2610 */
2611 if (mgpu_info.num_dgpu < 2)
2612 goto out;
2613
2614 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2615 gpu_ins = &(mgpu_info.gpu_ins[i]);
2616 adev = gpu_ins->adev;
2617 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2618 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2619 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2620 if (ret)
2621 break;
2622
2623 gpu_ins->mgpu_fan_enabled = 1;
2624 }
2625 }
2626
2627out:
2628 mutex_unlock(&mgpu_info.mutex);
2629
2630 return ret;
2631}
2632
e3ecdffa
AD
2633/**
2634 * amdgpu_device_ip_late_init - run late init for hardware IPs
2635 *
2636 * @adev: amdgpu_device pointer
2637 *
2638 * Late initialization pass for hardware IPs. The list of all the hardware
2639 * IPs that make up the asic is walked and the late_init callbacks are run.
2640 * late_init covers any special initialization that an IP requires
2641 * after all of the have been initialized or something that needs to happen
2642 * late in the init process.
2643 * Returns 0 on success, negative error code on failure.
2644 */
06ec9070 2645static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2646{
60599a03 2647 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2648 int i = 0, r;
2649
2650 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2651 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2652 continue;
2653 if (adev->ip_blocks[i].version->funcs->late_init) {
2654 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2655 if (r) {
2656 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2657 adev->ip_blocks[i].version->funcs->name, r);
2658 return r;
2659 }
2dc80b00 2660 }
73f847db 2661 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2662 }
2663
867e24ca 2664 r = amdgpu_ras_late_init(adev);
2665 if (r) {
2666 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2667 return r;
2668 }
2669
a891d239
DL
2670 amdgpu_ras_set_error_query_ready(adev, true);
2671
1112a46b
RZ
2672 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2673 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2674
06ec9070 2675 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2676
beff74bc
AD
2677 r = amdgpu_device_enable_mgpu_fan_boost();
2678 if (r)
2679 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2680
4da8b639 2681 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2682 if (amdgpu_passthrough(adev) &&
2683 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2684 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2685 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2686
2687 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2688 mutex_lock(&mgpu_info.mutex);
2689
2690 /*
2691 * Reset device p-state to low as this was booted with high.
2692 *
2693 * This should be performed only after all devices from the same
2694 * hive get initialized.
2695 *
2696 * However, it's unknown how many device in the hive in advance.
2697 * As this is counted one by one during devices initializations.
2698 *
2699 * So, we wait for all XGMI interlinked devices initialized.
2700 * This may bring some delays as those devices may come from
2701 * different hives. But that should be OK.
2702 */
2703 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2704 for (i = 0; i < mgpu_info.num_gpu; i++) {
2705 gpu_instance = &(mgpu_info.gpu_ins[i]);
2706 if (gpu_instance->adev->flags & AMD_IS_APU)
2707 continue;
2708
d84a430d
JK
2709 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2710 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2711 if (r) {
2712 DRM_ERROR("pstate setting failed (%d).\n", r);
2713 break;
2714 }
2715 }
2716 }
2717
2718 mutex_unlock(&mgpu_info.mutex);
2719 }
2720
d38ceaf9
AD
2721 return 0;
2722}
2723
613aa3ea
LY
2724/**
2725 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2726 *
2727 * @adev: amdgpu_device pointer
2728 *
2729 * For ASICs need to disable SMC first
2730 */
2731static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2732{
2733 int i, r;
2734
2735 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2736 return;
2737
2738 for (i = 0; i < adev->num_ip_blocks; i++) {
2739 if (!adev->ip_blocks[i].status.hw)
2740 continue;
2741 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2742 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2743 /* XXX handle errors */
2744 if (r) {
2745 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2746 adev->ip_blocks[i].version->funcs->name, r);
2747 }
2748 adev->ip_blocks[i].status.hw = false;
2749 break;
2750 }
2751 }
2752}
2753
e9669fb7 2754static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2755{
2756 int i, r;
2757
e9669fb7
AG
2758 for (i = 0; i < adev->num_ip_blocks; i++) {
2759 if (!adev->ip_blocks[i].version->funcs->early_fini)
2760 continue;
5278a159 2761
e9669fb7
AG
2762 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2763 if (r) {
2764 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2765 adev->ip_blocks[i].version->funcs->name, r);
2766 }
2767 }
c030f2e4 2768
05df1f01 2769 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2770 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2771
7270e895
TY
2772 amdgpu_amdkfd_suspend(adev, false);
2773
613aa3ea
LY
2774 /* Workaroud for ASICs need to disable SMC first */
2775 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2776
d38ceaf9 2777 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2778 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2779 continue;
8201a67a 2780
a1255107 2781 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2782 /* XXX handle errors */
2c1a2784 2783 if (r) {
a1255107
AD
2784 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2785 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2786 }
8201a67a 2787
a1255107 2788 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2789 }
2790
6effad8a
GC
2791 if (amdgpu_sriov_vf(adev)) {
2792 if (amdgpu_virt_release_full_gpu(adev, false))
2793 DRM_ERROR("failed to release exclusive mode on fini\n");
2794 }
2795
e9669fb7
AG
2796 return 0;
2797}
2798
2799/**
2800 * amdgpu_device_ip_fini - run fini for hardware IPs
2801 *
2802 * @adev: amdgpu_device pointer
2803 *
2804 * Main teardown pass for hardware IPs. The list of all the hardware
2805 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2806 * are run. hw_fini tears down the hardware associated with each IP
2807 * and sw_fini tears down any software state associated with each IP.
2808 * Returns 0 on success, negative error code on failure.
2809 */
2810static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2811{
2812 int i, r;
2813
2814 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2815 amdgpu_virt_release_ras_err_handler_data(adev);
2816
e9669fb7
AG
2817 if (adev->gmc.xgmi.num_physical_nodes > 1)
2818 amdgpu_xgmi_remove_device(adev);
2819
c004d44e 2820 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2821
d38ceaf9 2822 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2823 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2824 continue;
c12aba3a
ML
2825
2826 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2827 amdgpu_ucode_free_bo(adev);
1e256e27 2828 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2829 amdgpu_device_wb_fini(adev);
7ccfd79f 2830 amdgpu_device_mem_scratch_fini(adev);
533aed27 2831 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2832 }
2833
a1255107 2834 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2835 /* XXX handle errors */
2c1a2784 2836 if (r) {
a1255107
AD
2837 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2838 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2839 }
a1255107
AD
2840 adev->ip_blocks[i].status.sw = false;
2841 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2842 }
2843
a6dcfd9c 2844 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2845 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2846 continue;
a1255107
AD
2847 if (adev->ip_blocks[i].version->funcs->late_fini)
2848 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2849 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2850 }
2851
c030f2e4 2852 amdgpu_ras_fini(adev);
2853
d38ceaf9
AD
2854 return 0;
2855}
2856
e3ecdffa 2857/**
beff74bc 2858 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2859 *
1112a46b 2860 * @work: work_struct.
e3ecdffa 2861 */
beff74bc 2862static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2863{
2864 struct amdgpu_device *adev =
beff74bc 2865 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2866 int r;
2867
2868 r = amdgpu_ib_ring_tests(adev);
2869 if (r)
2870 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2871}
2872
1e317b99
RZ
2873static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2874{
2875 struct amdgpu_device *adev =
2876 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2877
90a92662
MD
2878 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2879 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2880
2881 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2882 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2883}
2884
e3ecdffa 2885/**
e7854a03 2886 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2887 *
2888 * @adev: amdgpu_device pointer
2889 *
2890 * Main suspend function for hardware IPs. The list of all the hardware
2891 * IPs that make up the asic is walked, clockgating is disabled and the
2892 * suspend callbacks are run. suspend puts the hardware and software state
2893 * in each IP into a state suitable for suspend.
2894 * Returns 0 on success, negative error code on failure.
2895 */
e7854a03
AD
2896static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2897{
2898 int i, r;
2899
50ec83f0
AD
2900 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2901 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2902
b31d6ada
EQ
2903 /*
2904 * Per PMFW team's suggestion, driver needs to handle gfxoff
2905 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2906 * scenario. Add the missing df cstate disablement here.
2907 */
2908 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2909 dev_warn(adev->dev, "Failed to disallow df cstate");
2910
e7854a03
AD
2911 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2912 if (!adev->ip_blocks[i].status.valid)
2913 continue;
2b9f7848 2914
e7854a03 2915 /* displays are handled separately */
2b9f7848
ND
2916 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2917 continue;
2918
2919 /* XXX handle errors */
2920 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2921 /* XXX handle errors */
2922 if (r) {
2923 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2924 adev->ip_blocks[i].version->funcs->name, r);
2925 return r;
e7854a03 2926 }
2b9f7848
ND
2927
2928 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2929 }
2930
e7854a03
AD
2931 return 0;
2932}
2933
2934/**
2935 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2936 *
2937 * @adev: amdgpu_device pointer
2938 *
2939 * Main suspend function for hardware IPs. The list of all the hardware
2940 * IPs that make up the asic is walked, clockgating is disabled and the
2941 * suspend callbacks are run. suspend puts the hardware and software state
2942 * in each IP into a state suitable for suspend.
2943 * Returns 0 on success, negative error code on failure.
2944 */
2945static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2946{
2947 int i, r;
2948
557f42a2 2949 if (adev->in_s0ix)
bc143d8b 2950 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2951
d38ceaf9 2952 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2953 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2954 continue;
e7854a03
AD
2955 /* displays are handled in phase1 */
2956 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2957 continue;
bff77e86
LM
2958 /* PSP lost connection when err_event_athub occurs */
2959 if (amdgpu_ras_intr_triggered() &&
2960 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2961 adev->ip_blocks[i].status.hw = false;
2962 continue;
2963 }
e3c1b071 2964
2965 /* skip unnecessary suspend if we do not initialize them yet */
2966 if (adev->gmc.xgmi.pending_reset &&
2967 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2968 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2969 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2970 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2971 adev->ip_blocks[i].status.hw = false;
2972 continue;
2973 }
557f42a2 2974
afa6646b 2975 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
2976 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2977 * like at runtime. PSP is also part of the always on hardware
2978 * so no need to suspend it.
2979 */
557f42a2 2980 if (adev->in_s0ix &&
32ff160d 2981 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
2982 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2983 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
2984 continue;
2985
2a7798ea
AD
2986 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
2987 if (adev->in_s0ix &&
2988 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
2989 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2990 continue;
2991
e11c7750
TH
2992 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
2993 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
2994 * from this location and RLC Autoload automatically also gets loaded
2995 * from here based on PMFW -> PSP message during re-init sequence.
2996 * Therefore, the psp suspend & resume should be skipped to avoid destroy
2997 * the TMR and reload FWs again for IMU enabled APU ASICs.
2998 */
2999 if (amdgpu_in_reset(adev) &&
3000 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3001 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3002 continue;
3003
d38ceaf9 3004 /* XXX handle errors */
a1255107 3005 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3006 /* XXX handle errors */
2c1a2784 3007 if (r) {
a1255107
AD
3008 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3009 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3010 }
876923fb 3011 adev->ip_blocks[i].status.hw = false;
a3a09142 3012 /* handle putting the SMC in the appropriate state */
47fc644f 3013 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3014 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3015 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3016 if (r) {
3017 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3018 adev->mp1_state, r);
3019 return r;
3020 }
a3a09142
AD
3021 }
3022 }
d38ceaf9
AD
3023 }
3024
3025 return 0;
3026}
3027
e7854a03
AD
3028/**
3029 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3030 *
3031 * @adev: amdgpu_device pointer
3032 *
3033 * Main suspend function for hardware IPs. The list of all the hardware
3034 * IPs that make up the asic is walked, clockgating is disabled and the
3035 * suspend callbacks are run. suspend puts the hardware and software state
3036 * in each IP into a state suitable for suspend.
3037 * Returns 0 on success, negative error code on failure.
3038 */
3039int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3040{
3041 int r;
3042
3c73683c
JC
3043 if (amdgpu_sriov_vf(adev)) {
3044 amdgpu_virt_fini_data_exchange(adev);
e7819644 3045 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3046 }
e7819644 3047
e7854a03
AD
3048 r = amdgpu_device_ip_suspend_phase1(adev);
3049 if (r)
3050 return r;
3051 r = amdgpu_device_ip_suspend_phase2(adev);
3052
e7819644
YT
3053 if (amdgpu_sriov_vf(adev))
3054 amdgpu_virt_release_full_gpu(adev, false);
3055
e7854a03
AD
3056 return r;
3057}
3058
06ec9070 3059static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3060{
3061 int i, r;
3062
2cb681b6 3063 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3064 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3065 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3066 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3067 AMD_IP_BLOCK_TYPE_IH,
3068 };
a90ad3c2 3069
95ea3dbc 3070 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3071 int j;
3072 struct amdgpu_ip_block *block;
a90ad3c2 3073
4cd2a96d
J
3074 block = &adev->ip_blocks[i];
3075 block->status.hw = false;
2cb681b6 3076
4cd2a96d 3077 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3078
4cd2a96d 3079 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3080 !block->status.valid)
3081 continue;
3082
3083 r = block->version->funcs->hw_init(adev);
0aaeefcc 3084 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3085 if (r)
3086 return r;
482f0e53 3087 block->status.hw = true;
a90ad3c2
ML
3088 }
3089 }
3090
3091 return 0;
3092}
3093
06ec9070 3094static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3095{
3096 int i, r;
3097
2cb681b6
ML
3098 static enum amd_ip_block_type ip_order[] = {
3099 AMD_IP_BLOCK_TYPE_SMC,
3100 AMD_IP_BLOCK_TYPE_DCE,
3101 AMD_IP_BLOCK_TYPE_GFX,
3102 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3103 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3104 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3105 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3106 AMD_IP_BLOCK_TYPE_VCN,
3107 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3108 };
a90ad3c2 3109
2cb681b6
ML
3110 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3111 int j;
3112 struct amdgpu_ip_block *block;
a90ad3c2 3113
2cb681b6
ML
3114 for (j = 0; j < adev->num_ip_blocks; j++) {
3115 block = &adev->ip_blocks[j];
3116
3117 if (block->version->type != ip_order[i] ||
482f0e53
ML
3118 !block->status.valid ||
3119 block->status.hw)
2cb681b6
ML
3120 continue;
3121
895bd048
JZ
3122 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3123 r = block->version->funcs->resume(adev);
3124 else
3125 r = block->version->funcs->hw_init(adev);
3126
0aaeefcc 3127 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3128 if (r)
3129 return r;
482f0e53 3130 block->status.hw = true;
a90ad3c2
ML
3131 }
3132 }
3133
3134 return 0;
3135}
3136
e3ecdffa
AD
3137/**
3138 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3139 *
3140 * @adev: amdgpu_device pointer
3141 *
3142 * First resume function for hardware IPs. The list of all the hardware
3143 * IPs that make up the asic is walked and the resume callbacks are run for
3144 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3145 * after a suspend and updates the software state as necessary. This
3146 * function is also used for restoring the GPU after a GPU reset.
3147 * Returns 0 on success, negative error code on failure.
3148 */
06ec9070 3149static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3150{
3151 int i, r;
3152
a90ad3c2 3153 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3154 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3155 continue;
a90ad3c2 3156 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3157 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3158 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3159 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3160
fcf0649f
CZ
3161 r = adev->ip_blocks[i].version->funcs->resume(adev);
3162 if (r) {
3163 DRM_ERROR("resume of IP block <%s> failed %d\n",
3164 adev->ip_blocks[i].version->funcs->name, r);
3165 return r;
3166 }
482f0e53 3167 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3168 }
3169 }
3170
3171 return 0;
3172}
3173
e3ecdffa
AD
3174/**
3175 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3176 *
3177 * @adev: amdgpu_device pointer
3178 *
3179 * First resume function for hardware IPs. The list of all the hardware
3180 * IPs that make up the asic is walked and the resume callbacks are run for
3181 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3182 * functional state after a suspend and updates the software state as
3183 * necessary. This function is also used for restoring the GPU after a GPU
3184 * reset.
3185 * Returns 0 on success, negative error code on failure.
3186 */
06ec9070 3187static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3188{
3189 int i, r;
3190
3191 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3192 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3193 continue;
fcf0649f 3194 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3195 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3196 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3197 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3198 continue;
a1255107 3199 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3200 if (r) {
a1255107
AD
3201 DRM_ERROR("resume of IP block <%s> failed %d\n",
3202 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3203 return r;
2c1a2784 3204 }
482f0e53 3205 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3206 }
3207
3208 return 0;
3209}
3210
e3ecdffa
AD
3211/**
3212 * amdgpu_device_ip_resume - run resume for hardware IPs
3213 *
3214 * @adev: amdgpu_device pointer
3215 *
3216 * Main resume function for hardware IPs. The hardware IPs
3217 * are split into two resume functions because they are
b8920e1e 3218 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3219 * steps need to be take between them. In this case (S3/S4) they are
3220 * run sequentially.
3221 * Returns 0 on success, negative error code on failure.
3222 */
06ec9070 3223static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3224{
3225 int r;
3226
06ec9070 3227 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3228 if (r)
3229 return r;
7a3e0bb2
RZ
3230
3231 r = amdgpu_device_fw_loading(adev);
3232 if (r)
3233 return r;
3234
06ec9070 3235 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3236
3237 return r;
3238}
3239
e3ecdffa
AD
3240/**
3241 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3242 *
3243 * @adev: amdgpu_device pointer
3244 *
3245 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3246 */
4e99a44e 3247static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3248{
6867e1b5
ML
3249 if (amdgpu_sriov_vf(adev)) {
3250 if (adev->is_atom_fw) {
58ff791a 3251 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3252 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3253 } else {
3254 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3255 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3256 }
3257
3258 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3259 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3260 }
048765ad
AR
3261}
3262
e3ecdffa
AD
3263/**
3264 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3265 *
3266 * @asic_type: AMD asic type
3267 *
3268 * Check if there is DC (new modesetting infrastructre) support for an asic.
3269 * returns true if DC has support, false if not.
3270 */
4562236b
HW
3271bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3272{
3273 switch (asic_type) {
0637d417
AD
3274#ifdef CONFIG_DRM_AMDGPU_SI
3275 case CHIP_HAINAN:
3276#endif
3277 case CHIP_TOPAZ:
3278 /* chips with no display hardware */
3279 return false;
4562236b 3280#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3281 case CHIP_TAHITI:
3282 case CHIP_PITCAIRN:
3283 case CHIP_VERDE:
3284 case CHIP_OLAND:
2d32ffd6
AD
3285 /*
3286 * We have systems in the wild with these ASICs that require
3287 * LVDS and VGA support which is not supported with DC.
3288 *
3289 * Fallback to the non-DC driver here by default so as not to
3290 * cause regressions.
3291 */
3292#if defined(CONFIG_DRM_AMD_DC_SI)
3293 return amdgpu_dc > 0;
3294#else
3295 return false;
64200c46 3296#endif
4562236b 3297 case CHIP_BONAIRE:
0d6fbccb 3298 case CHIP_KAVERI:
367e6687
AD
3299 case CHIP_KABINI:
3300 case CHIP_MULLINS:
d9fda248
HW
3301 /*
3302 * We have systems in the wild with these ASICs that require
b5a0168e 3303 * VGA support which is not supported with DC.
d9fda248
HW
3304 *
3305 * Fallback to the non-DC driver here by default so as not to
3306 * cause regressions.
3307 */
3308 return amdgpu_dc > 0;
f7f12b25 3309 default:
fd187853 3310 return amdgpu_dc != 0;
f7f12b25 3311#else
4562236b 3312 default:
93b09a9a 3313 if (amdgpu_dc > 0)
b8920e1e 3314 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3315 return false;
f7f12b25 3316#endif
4562236b
HW
3317 }
3318}
3319
3320/**
3321 * amdgpu_device_has_dc_support - check if dc is supported
3322 *
982a820b 3323 * @adev: amdgpu_device pointer
4562236b
HW
3324 *
3325 * Returns true for supported, false for not supported
3326 */
3327bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3328{
25263da3 3329 if (adev->enable_virtual_display ||
abaf210c 3330 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3331 return false;
3332
4562236b
HW
3333 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3334}
3335
d4535e2c
AG
3336static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3337{
3338 struct amdgpu_device *adev =
3339 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3340 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3341
c6a6e2db
AG
3342 /* It's a bug to not have a hive within this function */
3343 if (WARN_ON(!hive))
3344 return;
3345
3346 /*
3347 * Use task barrier to synchronize all xgmi reset works across the
3348 * hive. task_barrier_enter and task_barrier_exit will block
3349 * until all the threads running the xgmi reset works reach
3350 * those points. task_barrier_full will do both blocks.
3351 */
3352 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3353
3354 task_barrier_enter(&hive->tb);
4a580877 3355 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3356
3357 if (adev->asic_reset_res)
3358 goto fail;
3359
3360 task_barrier_exit(&hive->tb);
4a580877 3361 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3362
3363 if (adev->asic_reset_res)
3364 goto fail;
43c4d576 3365
5e67bba3 3366 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3367 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3368 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3369 } else {
3370
3371 task_barrier_full(&hive->tb);
3372 adev->asic_reset_res = amdgpu_asic_reset(adev);
3373 }
ce316fa5 3374
c6a6e2db 3375fail:
d4535e2c 3376 if (adev->asic_reset_res)
fed184e9 3377 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3378 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3379 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3380}
3381
71f98027
AD
3382static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3383{
3384 char *input = amdgpu_lockup_timeout;
3385 char *timeout_setting = NULL;
3386 int index = 0;
3387 long timeout;
3388 int ret = 0;
3389
3390 /*
67387dfe
AD
3391 * By default timeout for non compute jobs is 10000
3392 * and 60000 for compute jobs.
71f98027 3393 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3394 * jobs are 60000 by default.
71f98027
AD
3395 */
3396 adev->gfx_timeout = msecs_to_jiffies(10000);
3397 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3398 if (amdgpu_sriov_vf(adev))
3399 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3400 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3401 else
67387dfe 3402 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3403
f440ff44 3404 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3405 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3406 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3407 ret = kstrtol(timeout_setting, 0, &timeout);
3408 if (ret)
3409 return ret;
3410
3411 if (timeout == 0) {
3412 index++;
3413 continue;
3414 } else if (timeout < 0) {
3415 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3416 dev_warn(adev->dev, "lockup timeout disabled");
3417 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3418 } else {
3419 timeout = msecs_to_jiffies(timeout);
3420 }
3421
3422 switch (index++) {
3423 case 0:
3424 adev->gfx_timeout = timeout;
3425 break;
3426 case 1:
3427 adev->compute_timeout = timeout;
3428 break;
3429 case 2:
3430 adev->sdma_timeout = timeout;
3431 break;
3432 case 3:
3433 adev->video_timeout = timeout;
3434 break;
3435 default:
3436 break;
3437 }
3438 }
3439 /*
3440 * There is only one value specified and
3441 * it should apply to all non-compute jobs.
3442 */
bcccee89 3443 if (index == 1) {
71f98027 3444 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3445 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3446 adev->compute_timeout = adev->gfx_timeout;
3447 }
71f98027
AD
3448 }
3449
3450 return ret;
3451}
d4535e2c 3452
4a74c38c
PY
3453/**
3454 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3455 *
3456 * @adev: amdgpu_device pointer
3457 *
3458 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3459 */
3460static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3461{
3462 struct iommu_domain *domain;
3463
3464 domain = iommu_get_domain_for_dev(adev->dev);
3465 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3466 adev->ram_is_direct_mapped = true;
3467}
3468
77f3a5cd 3469static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3470 &dev_attr_pcie_replay_count.attr,
3471 NULL
3472};
3473
02ff519e
AD
3474static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3475{
3476 if (amdgpu_mcbp == 1)
3477 adev->gfx.mcbp = true;
1e9e15dc
JZ
3478 else if (amdgpu_mcbp == 0)
3479 adev->gfx.mcbp = false;
3480 else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3481 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3482 adev->gfx.num_gfx_rings)
50a7c876
AD
3483 adev->gfx.mcbp = true;
3484
02ff519e
AD
3485 if (amdgpu_sriov_vf(adev))
3486 adev->gfx.mcbp = true;
3487
3488 if (adev->gfx.mcbp)
3489 DRM_INFO("MCBP is enabled\n");
3490}
3491
d38ceaf9
AD
3492/**
3493 * amdgpu_device_init - initialize the driver
3494 *
3495 * @adev: amdgpu_device pointer
d38ceaf9
AD
3496 * @flags: driver flags
3497 *
3498 * Initializes the driver info and hw (all asics).
3499 * Returns 0 for success or an error on failure.
3500 * Called at driver startup.
3501 */
3502int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3503 uint32_t flags)
3504{
8aba21b7
LT
3505 struct drm_device *ddev = adev_to_drm(adev);
3506 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3507 int r, i;
b98c6299 3508 bool px = false;
95844d20 3509 u32 max_MBps;
59e9fff1 3510 int tmp;
d38ceaf9
AD
3511
3512 adev->shutdown = false;
d38ceaf9 3513 adev->flags = flags;
4e66d7d2
YZ
3514
3515 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3516 adev->asic_type = amdgpu_force_asic_type;
3517 else
3518 adev->asic_type = flags & AMD_ASIC_MASK;
3519
d38ceaf9 3520 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3521 if (amdgpu_emu_mode == 1)
8bdab6bb 3522 adev->usec_timeout *= 10;
770d13b1 3523 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3524 adev->accel_working = false;
3525 adev->num_rings = 0;
68ce8b24 3526 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3527 adev->mman.buffer_funcs = NULL;
3528 adev->mman.buffer_funcs_ring = NULL;
3529 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3530 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3531 adev->gmc.gmc_funcs = NULL;
7bd939d0 3532 adev->harvest_ip_mask = 0x0;
f54d1867 3533 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3534 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3535
3536 adev->smc_rreg = &amdgpu_invalid_rreg;
3537 adev->smc_wreg = &amdgpu_invalid_wreg;
3538 adev->pcie_rreg = &amdgpu_invalid_rreg;
3539 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3540 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3541 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3542 adev->pciep_rreg = &amdgpu_invalid_rreg;
3543 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3544 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3545 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3546 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3547 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3548 adev->didt_rreg = &amdgpu_invalid_rreg;
3549 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3550 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3551 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3552 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3553 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3554
3e39ab90
AD
3555 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3556 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3557 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3558
3559 /* mutex initialization are all done here so we
b8920e1e
SS
3560 * can recall function without having locking issues
3561 */
0e5ca0d1 3562 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3563 mutex_init(&adev->pm.mutex);
3564 mutex_init(&adev->gfx.gpu_clock_mutex);
3565 mutex_init(&adev->srbm_mutex);
b8866c26 3566 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3567 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3568 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3569 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3570 mutex_init(&adev->mn_lock);
e23b74aa 3571 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3572 hash_init(adev->mn_hash);
32eaeae0 3573 mutex_init(&adev->psp.mutex);
bd052211 3574 mutex_init(&adev->notifier_lock);
8cda7a4f 3575 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3576 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3577
ab3b9de6 3578 amdgpu_device_init_apu_flags(adev);
9f6a7857 3579
912dfc84
EQ
3580 r = amdgpu_device_check_arguments(adev);
3581 if (r)
3582 return r;
d38ceaf9 3583
d38ceaf9
AD
3584 spin_lock_init(&adev->mmio_idx_lock);
3585 spin_lock_init(&adev->smc_idx_lock);
3586 spin_lock_init(&adev->pcie_idx_lock);
3587 spin_lock_init(&adev->uvd_ctx_idx_lock);
3588 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3589 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3590 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3591 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3592 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3593
0c4e7fa5
CZ
3594 INIT_LIST_HEAD(&adev->shadow_list);
3595 mutex_init(&adev->shadow_list_lock);
3596
655ce9cb 3597 INIT_LIST_HEAD(&adev->reset_list);
3598
6492e1b0 3599 INIT_LIST_HEAD(&adev->ras_list);
3600
beff74bc
AD
3601 INIT_DELAYED_WORK(&adev->delayed_init_work,
3602 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3603 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3604 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3605
d4535e2c
AG
3606 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3607
d23ee13f 3608 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3609 adev->gfx.gfx_off_residency = 0;
3610 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3611 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3612
b265bdbd
EQ
3613 atomic_set(&adev->throttling_logging_enabled, 1);
3614 /*
3615 * If throttling continues, logging will be performed every minute
3616 * to avoid log flooding. "-1" is subtracted since the thermal
3617 * throttling interrupt comes every second. Thus, the total logging
3618 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3619 * for throttling interrupt) = 60 seconds.
3620 */
3621 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3622 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3623
0fa49558
AX
3624 /* Registers mapping */
3625 /* TODO: block userspace mapping of io register */
da69c161
KW
3626 if (adev->asic_type >= CHIP_BONAIRE) {
3627 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3628 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3629 } else {
3630 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3631 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3632 }
d38ceaf9 3633
6c08e0ef
EQ
3634 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3635 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3636
d38ceaf9 3637 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3638 if (!adev->rmmio)
d38ceaf9 3639 return -ENOMEM;
b8920e1e 3640
d38ceaf9 3641 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3642 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3643
436afdfa
PY
3644 /*
3645 * Reset domain needs to be present early, before XGMI hive discovered
3646 * (if any) and intitialized to use reset sem and in_gpu reset flag
3647 * early on during init and before calling to RREG32.
3648 */
3649 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3650 if (!adev->reset_domain)
3651 return -ENOMEM;
3652
3aa0115d
ML
3653 /* detect hw virtualization here */
3654 amdgpu_detect_virtualization(adev);
3655
04e85958
TL
3656 amdgpu_device_get_pcie_info(adev);
3657
dffa11b4
ML
3658 r = amdgpu_device_get_job_timeout_settings(adev);
3659 if (r) {
3660 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3661 return r;
a190d1c7
XY
3662 }
3663
d38ceaf9 3664 /* early init functions */
06ec9070 3665 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3666 if (r)
4ef87d8f 3667 return r;
d38ceaf9 3668
02ff519e
AD
3669 amdgpu_device_set_mcbp(adev);
3670
b7cdb41e
ML
3671 /* Get rid of things like offb */
3672 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3673 if (r)
3674 return r;
3675
4d33e704
SK
3676 /* Enable TMZ based on IP_VERSION */
3677 amdgpu_gmc_tmz_set(adev);
3678
957b0787 3679 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3680 /* Need to get xgmi info early to decide the reset behavior*/
3681 if (adev->gmc.xgmi.supported) {
3682 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3683 if (r)
3684 return r;
3685 }
3686
8e6d0b69 3687 /* enable PCIE atomic ops */
b4520bfd
GW
3688 if (amdgpu_sriov_vf(adev)) {
3689 if (adev->virt.fw_reserve.p_pf2vf)
3690 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3691 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3692 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3693 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3694 * internal path natively support atomics, set have_atomics_support to true.
3695 */
b4520bfd
GW
3696 } else if ((adev->flags & AMD_IS_APU) &&
3697 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3698 adev->have_atomics_support = true;
b4520bfd 3699 } else {
8e6d0b69 3700 adev->have_atomics_support =
3701 !pci_enable_atomic_ops_to_root(adev->pdev,
3702 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3703 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3704 }
3705
8e6d0b69 3706 if (!adev->have_atomics_support)
3707 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3708
6585661d 3709 /* doorbell bar mapping and doorbell index init*/
43c064db 3710 amdgpu_doorbell_init(adev);
6585661d 3711
9475a943
SL
3712 if (amdgpu_emu_mode == 1) {
3713 /* post the asic on emulation mode */
3714 emu_soc_asic_init(adev);
bfca0289 3715 goto fence_driver_init;
9475a943 3716 }
bfca0289 3717
04442bf7
LL
3718 amdgpu_reset_init(adev);
3719
4e99a44e 3720 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3721 if (adev->bios)
3722 amdgpu_device_detect_sriov_bios(adev);
048765ad 3723
95e8e59e
AD
3724 /* check if we need to reset the asic
3725 * E.g., driver was not cleanly unloaded previously, etc.
3726 */
f14899fd 3727 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3728 if (adev->gmc.xgmi.num_physical_nodes) {
3729 dev_info(adev->dev, "Pending hive reset.\n");
3730 adev->gmc.xgmi.pending_reset = true;
3731 /* Only need to init necessary block for SMU to handle the reset */
3732 for (i = 0; i < adev->num_ip_blocks; i++) {
3733 if (!adev->ip_blocks[i].status.valid)
3734 continue;
3735 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3736 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3737 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3738 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3739 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3740 adev->ip_blocks[i].version->funcs->name);
3741 adev->ip_blocks[i].status.hw = true;
3742 }
3743 }
3744 } else {
59e9fff1 3745 tmp = amdgpu_reset_method;
3746 /* It should do a default reset when loading or reloading the driver,
3747 * regardless of the module parameter reset_method.
3748 */
3749 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3750 r = amdgpu_asic_reset(adev);
59e9fff1 3751 amdgpu_reset_method = tmp;
e3c1b071 3752 if (r) {
3753 dev_err(adev->dev, "asic reset on init failed\n");
3754 goto failed;
3755 }
95e8e59e
AD
3756 }
3757 }
3758
d38ceaf9 3759 /* Post card if necessary */
39c640c0 3760 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3761 if (!adev->bios) {
bec86378 3762 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3763 r = -EINVAL;
3764 goto failed;
d38ceaf9 3765 }
bec86378 3766 DRM_INFO("GPU posting now...\n");
4d2997ab 3767 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3768 if (r) {
3769 dev_err(adev->dev, "gpu post error!\n");
3770 goto failed;
3771 }
d38ceaf9
AD
3772 }
3773
9535a86a
SZ
3774 if (adev->bios) {
3775 if (adev->is_atom_fw) {
3776 /* Initialize clocks */
3777 r = amdgpu_atomfirmware_get_clock_info(adev);
3778 if (r) {
3779 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3780 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3781 goto failed;
3782 }
3783 } else {
3784 /* Initialize clocks */
3785 r = amdgpu_atombios_get_clock_info(adev);
3786 if (r) {
3787 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3788 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3789 goto failed;
3790 }
3791 /* init i2c buses */
3792 if (!amdgpu_device_has_dc_support(adev))
3793 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3794 }
2c1a2784 3795 }
d38ceaf9 3796
bfca0289 3797fence_driver_init:
d38ceaf9 3798 /* Fence driver */
067f44c8 3799 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3800 if (r) {
067f44c8 3801 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3802 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3803 goto failed;
2c1a2784 3804 }
d38ceaf9
AD
3805
3806 /* init the mode config */
4a580877 3807 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3808
06ec9070 3809 r = amdgpu_device_ip_init(adev);
d38ceaf9 3810 if (r) {
06ec9070 3811 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3812 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3813 goto release_ras_con;
d38ceaf9
AD
3814 }
3815
8d35a259
LG
3816 amdgpu_fence_driver_hw_init(adev);
3817
d69b8971
YZ
3818 dev_info(adev->dev,
3819 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3820 adev->gfx.config.max_shader_engines,
3821 adev->gfx.config.max_sh_per_se,
3822 adev->gfx.config.max_cu_per_sh,
3823 adev->gfx.cu_info.number);
3824
d38ceaf9
AD
3825 adev->accel_working = true;
3826
e59c0205
AX
3827 amdgpu_vm_check_compute_bug(adev);
3828
95844d20
MO
3829 /* Initialize the buffer migration limit. */
3830 if (amdgpu_moverate >= 0)
3831 max_MBps = amdgpu_moverate;
3832 else
3833 max_MBps = 8; /* Allow 8 MB/s. */
3834 /* Get a log2 for easy divisions. */
3835 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3836
184d8384
LL
3837 r = amdgpu_atombios_sysfs_init(adev);
3838 if (r)
3839 drm_err(&adev->ddev,
3840 "registering atombios sysfs failed (%d).\n", r);
3841
d2f52ac8 3842 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3843 if (r)
3844 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3845
5bb23532 3846 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3847 if (r) {
3848 adev->ucode_sysfs_en = false;
5bb23532 3849 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3850 } else
3851 adev->ucode_sysfs_en = true;
5bb23532 3852
b0adca4d
EQ
3853 /*
3854 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3855 * Otherwise the mgpu fan boost feature will be skipped due to the
3856 * gpu instance is counted less.
3857 */
3858 amdgpu_register_gpu_instance(adev);
3859
d38ceaf9
AD
3860 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3861 * explicit gating rather than handling it automatically.
3862 */
e3c1b071 3863 if (!adev->gmc.xgmi.pending_reset) {
3864 r = amdgpu_device_ip_late_init(adev);
3865 if (r) {
3866 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3867 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3868 goto release_ras_con;
e3c1b071 3869 }
3870 /* must succeed. */
3871 amdgpu_ras_resume(adev);
3872 queue_delayed_work(system_wq, &adev->delayed_init_work,
3873 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3874 }
d38ceaf9 3875
38eecbe0
CL
3876 if (amdgpu_sriov_vf(adev)) {
3877 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3878 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3879 }
2c738637 3880
77f3a5cd 3881 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3882 if (r)
77f3a5cd 3883 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3884
7957ec80
LL
3885 amdgpu_fru_sysfs_init(adev);
3886
d155bef0
AB
3887 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3888 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3889 if (r)
3890 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3891
c1dd4aa6
AG
3892 /* Have stored pci confspace at hand for restore in sudden PCI error */
3893 if (amdgpu_device_cache_pci_state(adev->pdev))
3894 pci_restore_state(pdev);
3895
8c3dd61c
KHF
3896 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3897 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
3898 * ignore it
3899 */
8c3dd61c 3900 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3901 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3902
d37a3929
OC
3903 px = amdgpu_device_supports_px(ddev);
3904
3905 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3906 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3907 vga_switcheroo_register_client(adev->pdev,
3908 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3909
3910 if (px)
8c3dd61c 3911 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3912
e3c1b071 3913 if (adev->gmc.xgmi.pending_reset)
3914 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3915 msecs_to_jiffies(AMDGPU_RESUME_MS));
3916
4a74c38c
PY
3917 amdgpu_device_check_iommu_direct_map(adev);
3918
d38ceaf9 3919 return 0;
83ba126a 3920
970fd197 3921release_ras_con:
38eecbe0
CL
3922 if (amdgpu_sriov_vf(adev))
3923 amdgpu_virt_release_full_gpu(adev, true);
3924
3925 /* failed in exclusive mode due to timeout */
3926 if (amdgpu_sriov_vf(adev) &&
3927 !amdgpu_sriov_runtime(adev) &&
3928 amdgpu_virt_mmio_blocked(adev) &&
3929 !amdgpu_virt_wait_reset(adev)) {
3930 dev_err(adev->dev, "VF exclusive mode timeout\n");
3931 /* Don't send request since VF is inactive. */
3932 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3933 adev->virt.ops = NULL;
3934 r = -EAGAIN;
3935 }
970fd197
SY
3936 amdgpu_release_ras_context(adev);
3937
83ba126a 3938failed:
89041940 3939 amdgpu_vf_error_trans_all(adev);
8840a387 3940
83ba126a 3941 return r;
d38ceaf9
AD
3942}
3943
07775fc1
AG
3944static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3945{
62d5f9f7 3946
07775fc1
AG
3947 /* Clear all CPU mappings pointing to this device */
3948 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3949
3950 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 3951 amdgpu_doorbell_fini(adev);
07775fc1
AG
3952
3953 iounmap(adev->rmmio);
3954 adev->rmmio = NULL;
3955 if (adev->mman.aper_base_kaddr)
3956 iounmap(adev->mman.aper_base_kaddr);
3957 adev->mman.aper_base_kaddr = NULL;
3958
3959 /* Memory manager related */
a0ba1279 3960 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
3961 arch_phys_wc_del(adev->gmc.vram_mtrr);
3962 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3963 }
3964}
3965
d38ceaf9 3966/**
bbe04dec 3967 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3968 *
3969 * @adev: amdgpu_device pointer
3970 *
3971 * Tear down the driver info (all asics).
3972 * Called at driver shutdown.
3973 */
72c8c97b 3974void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3975{
aac89168 3976 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3977 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3978 adev->shutdown = true;
9f875167 3979
752c683d
ML
3980 /* make sure IB test finished before entering exclusive mode
3981 * to avoid preemption on IB test
b8920e1e 3982 */
519b8b76 3983 if (amdgpu_sriov_vf(adev)) {
752c683d 3984 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3985 amdgpu_virt_fini_data_exchange(adev);
3986 }
752c683d 3987
e5b03032
ML
3988 /* disable all interrupts */
3989 amdgpu_irq_disable_all(adev);
47fc644f 3990 if (adev->mode_info.mode_config_initialized) {
1053b9c9 3991 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3992 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3993 else
4a580877 3994 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3995 }
8d35a259 3996 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3997
cd3a8a59 3998 if (adev->mman.initialized)
9bff18d1 3999 drain_workqueue(adev->mman.bdev.wq);
98f56188 4000
53e9d836 4001 if (adev->pm.sysfs_initialized)
7c868b59 4002 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4003 if (adev->ucode_sysfs_en)
4004 amdgpu_ucode_sysfs_fini(adev);
4005 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4006 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4007
232d1d43
SY
4008 /* disable ras feature must before hw fini */
4009 amdgpu_ras_pre_fini(adev);
4010
e9669fb7 4011 amdgpu_device_ip_fini_early(adev);
d10d0daa 4012
a3848df6
YW
4013 amdgpu_irq_fini_hw(adev);
4014
b6fd6e0f
SK
4015 if (adev->mman.initialized)
4016 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4017
d10d0daa 4018 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4019
39934d3e
VP
4020 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4021 amdgpu_device_unmap_mmio(adev);
87172e89 4022
72c8c97b
AG
4023}
4024
4025void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4026{
62d5f9f7 4027 int idx;
d37a3929 4028 bool px;
62d5f9f7 4029
8d35a259 4030 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4031 amdgpu_device_ip_fini(adev);
b31d3063 4032 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4033 adev->accel_working = false;
68ce8b24 4034 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4035
4036 amdgpu_reset_fini(adev);
4037
d38ceaf9 4038 /* free i2c buses */
4562236b
HW
4039 if (!amdgpu_device_has_dc_support(adev))
4040 amdgpu_i2c_fini(adev);
bfca0289
SL
4041
4042 if (amdgpu_emu_mode != 1)
4043 amdgpu_atombios_fini(adev);
4044
d38ceaf9
AD
4045 kfree(adev->bios);
4046 adev->bios = NULL;
d37a3929
OC
4047
4048 px = amdgpu_device_supports_px(adev_to_drm(adev));
4049
4050 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4051 apple_gmux_detect(NULL, NULL)))
84c8b22e 4052 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4053
4054 if (px)
83ba126a 4055 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4056
38d6be81 4057 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4058 vga_client_unregister(adev->pdev);
e9bc1bf7 4059
62d5f9f7
LS
4060 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4061
4062 iounmap(adev->rmmio);
4063 adev->rmmio = NULL;
43c064db 4064 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4065 drm_dev_exit(idx);
4066 }
4067
d155bef0
AB
4068 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4069 amdgpu_pmu_fini(adev);
72de33f8 4070 if (adev->mman.discovery_bin)
a190d1c7 4071 amdgpu_discovery_fini(adev);
72c8c97b 4072
cfbb6b00
AG
4073 amdgpu_reset_put_reset_domain(adev->reset_domain);
4074 adev->reset_domain = NULL;
4075
72c8c97b
AG
4076 kfree(adev->pci_state);
4077
d38ceaf9
AD
4078}
4079
58144d28
ND
4080/**
4081 * amdgpu_device_evict_resources - evict device resources
4082 * @adev: amdgpu device object
4083 *
4084 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4085 * of the vram memory type. Mainly used for evicting device resources
4086 * at suspend time.
4087 *
4088 */
7863c155 4089static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4090{
7863c155
ML
4091 int ret;
4092
e53d9665
ML
4093 /* No need to evict vram on APUs for suspend to ram or s2idle */
4094 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4095 return 0;
58144d28 4096
7863c155
ML
4097 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4098 if (ret)
58144d28 4099 DRM_WARN("evicting device resources failed\n");
7863c155 4100 return ret;
58144d28 4101}
d38ceaf9
AD
4102
4103/*
4104 * Suspend & resume.
4105 */
4106/**
810ddc3a 4107 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4108 *
87e3f136 4109 * @dev: drm dev pointer
87e3f136 4110 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4111 *
4112 * Puts the hw in the suspend state (all asics).
4113 * Returns 0 for success or an error on failure.
4114 * Called at driver suspend.
4115 */
de185019 4116int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4117{
a2e15b0e 4118 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4119 int r = 0;
d38ceaf9 4120
d38ceaf9
AD
4121 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4122 return 0;
4123
44779b43 4124 adev->in_suspend = true;
3fa8f89d 4125
47ea2076
SF
4126 /* Evict the majority of BOs before grabbing the full access */
4127 r = amdgpu_device_evict_resources(adev);
4128 if (r)
4129 return r;
4130
d7274ec7
BZ
4131 if (amdgpu_sriov_vf(adev)) {
4132 amdgpu_virt_fini_data_exchange(adev);
4133 r = amdgpu_virt_request_full_gpu(adev, false);
4134 if (r)
4135 return r;
4136 }
4137
3fa8f89d
S
4138 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4139 DRM_WARN("smart shift update failed\n");
4140
5f818173 4141 if (fbcon)
087451f3 4142 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4143
beff74bc 4144 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4145 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4146
5e6932fe 4147 amdgpu_ras_suspend(adev);
4148
2196927b 4149 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4150
c004d44e 4151 if (!adev->in_s0ix)
5d3a2d95 4152 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4153
7863c155
ML
4154 r = amdgpu_device_evict_resources(adev);
4155 if (r)
4156 return r;
d38ceaf9 4157
8d35a259 4158 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4159
2196927b 4160 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4161
d7274ec7
BZ
4162 if (amdgpu_sriov_vf(adev))
4163 amdgpu_virt_release_full_gpu(adev, false);
4164
d38ceaf9
AD
4165 return 0;
4166}
4167
4168/**
810ddc3a 4169 * amdgpu_device_resume - initiate device resume
d38ceaf9 4170 *
87e3f136 4171 * @dev: drm dev pointer
87e3f136 4172 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4173 *
4174 * Bring the hw back to operating state (all asics).
4175 * Returns 0 for success or an error on failure.
4176 * Called at driver resume.
4177 */
de185019 4178int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4179{
1348969a 4180 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4181 int r = 0;
d38ceaf9 4182
d7274ec7
BZ
4183 if (amdgpu_sriov_vf(adev)) {
4184 r = amdgpu_virt_request_full_gpu(adev, true);
4185 if (r)
4186 return r;
4187 }
4188
d38ceaf9
AD
4189 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4190 return 0;
4191
62498733 4192 if (adev->in_s0ix)
bc143d8b 4193 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4194
d38ceaf9 4195 /* post card */
39c640c0 4196 if (amdgpu_device_need_post(adev)) {
4d2997ab 4197 r = amdgpu_device_asic_init(adev);
74b0b157 4198 if (r)
aac89168 4199 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4200 }
d38ceaf9 4201
06ec9070 4202 r = amdgpu_device_ip_resume(adev);
d7274ec7 4203
e6707218 4204 if (r) {
aac89168 4205 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4206 goto exit;
e6707218 4207 }
8d35a259 4208 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4209
06ec9070 4210 r = amdgpu_device_ip_late_init(adev);
03161a6e 4211 if (r)
3c22c1ea 4212 goto exit;
d38ceaf9 4213
beff74bc
AD
4214 queue_delayed_work(system_wq, &adev->delayed_init_work,
4215 msecs_to_jiffies(AMDGPU_RESUME_MS));
4216
c004d44e 4217 if (!adev->in_s0ix) {
5d3a2d95
AD
4218 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4219 if (r)
3c22c1ea 4220 goto exit;
5d3a2d95 4221 }
756e6880 4222
3c22c1ea
SF
4223exit:
4224 if (amdgpu_sriov_vf(adev)) {
4225 amdgpu_virt_init_data_exchange(adev);
4226 amdgpu_virt_release_full_gpu(adev, true);
4227 }
4228
4229 if (r)
4230 return r;
4231
96a5d8d4 4232 /* Make sure IB tests flushed */
beff74bc 4233 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4234
a2e15b0e 4235 if (fbcon)
087451f3 4236 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4237
5e6932fe 4238 amdgpu_ras_resume(adev);
4239
d09ef243
AD
4240 if (adev->mode_info.num_crtc) {
4241 /*
4242 * Most of the connector probing functions try to acquire runtime pm
4243 * refs to ensure that the GPU is powered on when connector polling is
4244 * performed. Since we're calling this from a runtime PM callback,
4245 * trying to acquire rpm refs will cause us to deadlock.
4246 *
4247 * Since we're guaranteed to be holding the rpm lock, it's safe to
4248 * temporarily disable the rpm helpers so this doesn't deadlock us.
4249 */
23a1a9e5 4250#ifdef CONFIG_PM
d09ef243 4251 dev->dev->power.disable_depth++;
23a1a9e5 4252#endif
d09ef243
AD
4253 if (!adev->dc_enabled)
4254 drm_helper_hpd_irq_event(dev);
4255 else
4256 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4257#ifdef CONFIG_PM
d09ef243 4258 dev->dev->power.disable_depth--;
23a1a9e5 4259#endif
d09ef243 4260 }
44779b43
RZ
4261 adev->in_suspend = false;
4262
dc907c9d
JX
4263 if (adev->enable_mes)
4264 amdgpu_mes_self_test(adev);
4265
3fa8f89d
S
4266 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4267 DRM_WARN("smart shift update failed\n");
4268
4d3b9ae5 4269 return 0;
d38ceaf9
AD
4270}
4271
e3ecdffa
AD
4272/**
4273 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4274 *
4275 * @adev: amdgpu_device pointer
4276 *
4277 * The list of all the hardware IPs that make up the asic is walked and
4278 * the check_soft_reset callbacks are run. check_soft_reset determines
4279 * if the asic is still hung or not.
4280 * Returns true if any of the IPs are still in a hung state, false if not.
4281 */
06ec9070 4282static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4283{
4284 int i;
4285 bool asic_hang = false;
4286
f993d628
ML
4287 if (amdgpu_sriov_vf(adev))
4288 return true;
4289
8bc04c29
AD
4290 if (amdgpu_asic_need_full_reset(adev))
4291 return true;
4292
63fbf42f 4293 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4294 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4295 continue;
a1255107
AD
4296 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4297 adev->ip_blocks[i].status.hang =
4298 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4299 if (adev->ip_blocks[i].status.hang) {
aac89168 4300 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4301 asic_hang = true;
4302 }
4303 }
4304 return asic_hang;
4305}
4306
e3ecdffa
AD
4307/**
4308 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4309 *
4310 * @adev: amdgpu_device pointer
4311 *
4312 * The list of all the hardware IPs that make up the asic is walked and the
4313 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4314 * handles any IP specific hardware or software state changes that are
4315 * necessary for a soft reset to succeed.
4316 * Returns 0 on success, negative error code on failure.
4317 */
06ec9070 4318static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4319{
4320 int i, r = 0;
4321
4322 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4323 if (!adev->ip_blocks[i].status.valid)
d31a501e 4324 continue;
a1255107
AD
4325 if (adev->ip_blocks[i].status.hang &&
4326 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4327 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4328 if (r)
4329 return r;
4330 }
4331 }
4332
4333 return 0;
4334}
4335
e3ecdffa
AD
4336/**
4337 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4338 *
4339 * @adev: amdgpu_device pointer
4340 *
4341 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4342 * reset is necessary to recover.
4343 * Returns true if a full asic reset is required, false if not.
4344 */
06ec9070 4345static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4346{
da146d3b
AD
4347 int i;
4348
8bc04c29
AD
4349 if (amdgpu_asic_need_full_reset(adev))
4350 return true;
4351
da146d3b 4352 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4353 if (!adev->ip_blocks[i].status.valid)
da146d3b 4354 continue;
a1255107
AD
4355 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4356 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4357 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4358 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4359 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4360 if (adev->ip_blocks[i].status.hang) {
aac89168 4361 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4362 return true;
4363 }
4364 }
35d782fe
CZ
4365 }
4366 return false;
4367}
4368
e3ecdffa
AD
4369/**
4370 * amdgpu_device_ip_soft_reset - do a soft reset
4371 *
4372 * @adev: amdgpu_device pointer
4373 *
4374 * The list of all the hardware IPs that make up the asic is walked and the
4375 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4376 * IP specific hardware or software state changes that are necessary to soft
4377 * reset the IP.
4378 * Returns 0 on success, negative error code on failure.
4379 */
06ec9070 4380static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4381{
4382 int i, r = 0;
4383
4384 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4385 if (!adev->ip_blocks[i].status.valid)
35d782fe 4386 continue;
a1255107
AD
4387 if (adev->ip_blocks[i].status.hang &&
4388 adev->ip_blocks[i].version->funcs->soft_reset) {
4389 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4390 if (r)
4391 return r;
4392 }
4393 }
4394
4395 return 0;
4396}
4397
e3ecdffa
AD
4398/**
4399 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4400 *
4401 * @adev: amdgpu_device pointer
4402 *
4403 * The list of all the hardware IPs that make up the asic is walked and the
4404 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4405 * handles any IP specific hardware or software state changes that are
4406 * necessary after the IP has been soft reset.
4407 * Returns 0 on success, negative error code on failure.
4408 */
06ec9070 4409static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4410{
4411 int i, r = 0;
4412
4413 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4414 if (!adev->ip_blocks[i].status.valid)
35d782fe 4415 continue;
a1255107
AD
4416 if (adev->ip_blocks[i].status.hang &&
4417 adev->ip_blocks[i].version->funcs->post_soft_reset)
4418 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4419 if (r)
4420 return r;
4421 }
4422
4423 return 0;
4424}
4425
e3ecdffa 4426/**
c33adbc7 4427 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4428 *
4429 * @adev: amdgpu_device pointer
4430 *
4431 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4432 * restore things like GPUVM page tables after a GPU reset where
4433 * the contents of VRAM might be lost.
403009bf
CK
4434 *
4435 * Returns:
4436 * 0 on success, negative error code on failure.
e3ecdffa 4437 */
c33adbc7 4438static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4439{
c41d1cf6 4440 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4441 struct amdgpu_bo *shadow;
e18aaea7 4442 struct amdgpu_bo_vm *vmbo;
403009bf 4443 long r = 1, tmo;
c41d1cf6
ML
4444
4445 if (amdgpu_sriov_runtime(adev))
b045d3af 4446 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4447 else
4448 tmo = msecs_to_jiffies(100);
4449
aac89168 4450 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4451 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4452 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4453 /* If vm is compute context or adev is APU, shadow will be NULL */
4454 if (!vmbo->shadow)
4455 continue;
4456 shadow = vmbo->shadow;
4457
403009bf 4458 /* No need to recover an evicted BO */
d3116756
CK
4459 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4460 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4461 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4462 continue;
4463
4464 r = amdgpu_bo_restore_shadow(shadow, &next);
4465 if (r)
4466 break;
4467
c41d1cf6 4468 if (fence) {
1712fb1a 4469 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4470 dma_fence_put(fence);
4471 fence = next;
1712fb1a 4472 if (tmo == 0) {
4473 r = -ETIMEDOUT;
c41d1cf6 4474 break;
1712fb1a 4475 } else if (tmo < 0) {
4476 r = tmo;
4477 break;
4478 }
403009bf
CK
4479 } else {
4480 fence = next;
c41d1cf6 4481 }
c41d1cf6
ML
4482 }
4483 mutex_unlock(&adev->shadow_list_lock);
4484
403009bf
CK
4485 if (fence)
4486 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4487 dma_fence_put(fence);
4488
1712fb1a 4489 if (r < 0 || tmo <= 0) {
aac89168 4490 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4491 return -EIO;
4492 }
c41d1cf6 4493
aac89168 4494 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4495 return 0;
c41d1cf6
ML
4496}
4497
a90ad3c2 4498
e3ecdffa 4499/**
06ec9070 4500 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4501 *
982a820b 4502 * @adev: amdgpu_device pointer
87e3f136 4503 * @from_hypervisor: request from hypervisor
5740682e
ML
4504 *
4505 * do VF FLR and reinitialize Asic
3f48c681 4506 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4507 */
4508static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4509 bool from_hypervisor)
5740682e
ML
4510{
4511 int r;
a5f67c93 4512 struct amdgpu_hive_info *hive = NULL;
7258fa31 4513 int retry_limit = 0;
5740682e 4514
7258fa31 4515retry:
c004d44e 4516 amdgpu_amdkfd_pre_reset(adev);
428890a3 4517
5740682e
ML
4518 if (from_hypervisor)
4519 r = amdgpu_virt_request_full_gpu(adev, true);
4520 else
4521 r = amdgpu_virt_reset_gpu(adev);
4522 if (r)
4523 return r;
f734b213 4524 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4525
83f24a8f
HC
4526 /* some sw clean up VF needs to do before recover */
4527 amdgpu_virt_post_reset(adev);
4528
a90ad3c2 4529 /* Resume IP prior to SMC */
06ec9070 4530 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4531 if (r)
4532 goto error;
a90ad3c2 4533
c9ffa427 4534 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4535
7a3e0bb2
RZ
4536 r = amdgpu_device_fw_loading(adev);
4537 if (r)
4538 return r;
4539
a90ad3c2 4540 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4541 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4542 if (r)
4543 goto error;
a90ad3c2 4544
a5f67c93
ZL
4545 hive = amdgpu_get_xgmi_hive(adev);
4546 /* Update PSP FW topology after reset */
4547 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4548 r = amdgpu_xgmi_update_topology(hive, adev);
4549
4550 if (hive)
4551 amdgpu_put_xgmi_hive(hive);
4552
4553 if (!r) {
a5f67c93 4554 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4555
c004d44e 4556 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4557 }
a90ad3c2 4558
abc34253 4559error:
c41d1cf6 4560 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4561 amdgpu_inc_vram_lost(adev);
c33adbc7 4562 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4563 }
437f3e0b 4564 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4565
7258fa31
SK
4566 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4567 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4568 retry_limit++;
4569 goto retry;
4570 } else
4571 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4572 }
4573
a90ad3c2
ML
4574 return r;
4575}
4576
9a1cddd6 4577/**
4578 * amdgpu_device_has_job_running - check if there is any job in mirror list
4579 *
982a820b 4580 * @adev: amdgpu_device pointer
9a1cddd6 4581 *
4582 * check if there is any job in mirror list
4583 */
4584bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4585{
4586 int i;
4587 struct drm_sched_job *job;
4588
4589 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4590 struct amdgpu_ring *ring = adev->rings[i];
4591
4592 if (!ring || !ring->sched.thread)
4593 continue;
4594
4595 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4596 job = list_first_entry_or_null(&ring->sched.pending_list,
4597 struct drm_sched_job, list);
9a1cddd6 4598 spin_unlock(&ring->sched.job_list_lock);
4599 if (job)
4600 return true;
4601 }
4602 return false;
4603}
4604
12938fad
CK
4605/**
4606 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4607 *
982a820b 4608 * @adev: amdgpu_device pointer
12938fad
CK
4609 *
4610 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4611 * a hung GPU.
4612 */
4613bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4614{
12938fad 4615
3ba7b418
AG
4616 if (amdgpu_gpu_recovery == 0)
4617 goto disabled;
4618
1a11a65d
YC
4619 /* Skip soft reset check in fatal error mode */
4620 if (!amdgpu_ras_is_poison_mode_supported(adev))
4621 return true;
4622
3ba7b418
AG
4623 if (amdgpu_sriov_vf(adev))
4624 return true;
4625
4626 if (amdgpu_gpu_recovery == -1) {
4627 switch (adev->asic_type) {
b3523c45
AD
4628#ifdef CONFIG_DRM_AMDGPU_SI
4629 case CHIP_VERDE:
4630 case CHIP_TAHITI:
4631 case CHIP_PITCAIRN:
4632 case CHIP_OLAND:
4633 case CHIP_HAINAN:
4634#endif
4635#ifdef CONFIG_DRM_AMDGPU_CIK
4636 case CHIP_KAVERI:
4637 case CHIP_KABINI:
4638 case CHIP_MULLINS:
4639#endif
4640 case CHIP_CARRIZO:
4641 case CHIP_STONEY:
4642 case CHIP_CYAN_SKILLFISH:
3ba7b418 4643 goto disabled;
b3523c45
AD
4644 default:
4645 break;
3ba7b418 4646 }
12938fad
CK
4647 }
4648
4649 return true;
3ba7b418
AG
4650
4651disabled:
aac89168 4652 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4653 return false;
12938fad
CK
4654}
4655
5c03e584
FX
4656int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4657{
47fc644f
SS
4658 u32 i;
4659 int ret = 0;
5c03e584 4660
47fc644f 4661 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4662
47fc644f 4663 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4664
47fc644f
SS
4665 /* disable BM */
4666 pci_clear_master(adev->pdev);
5c03e584 4667
47fc644f 4668 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4669
47fc644f
SS
4670 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4671 dev_info(adev->dev, "GPU smu mode1 reset\n");
4672 ret = amdgpu_dpm_mode1_reset(adev);
4673 } else {
4674 dev_info(adev->dev, "GPU psp mode1 reset\n");
4675 ret = psp_gpu_reset(adev);
4676 }
5c03e584 4677
47fc644f 4678 if (ret)
7d442437 4679 goto mode1_reset_failed;
5c03e584 4680
47fc644f 4681 amdgpu_device_load_pci_state(adev->pdev);
7656168a
LL
4682 ret = amdgpu_psp_wait_for_bootloader(adev);
4683 if (ret)
7d442437 4684 goto mode1_reset_failed;
5c03e584 4685
47fc644f
SS
4686 /* wait for asic to come out of reset */
4687 for (i = 0; i < adev->usec_timeout; i++) {
4688 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4689
47fc644f
SS
4690 if (memsize != 0xffffffff)
4691 break;
4692 udelay(1);
4693 }
5c03e584 4694
7d442437
HZ
4695 if (i >= adev->usec_timeout) {
4696 ret = -ETIMEDOUT;
4697 goto mode1_reset_failed;
4698 }
4699
47fc644f 4700 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
7656168a 4701
7d442437
HZ
4702 return 0;
4703
4704mode1_reset_failed:
4705 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4706 return ret;
5c03e584 4707}
5c6dd71e 4708
e3c1b071 4709int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4710 struct amdgpu_reset_context *reset_context)
26bc5340 4711{
5c1e6fa4 4712 int i, r = 0;
04442bf7
LL
4713 struct amdgpu_job *job = NULL;
4714 bool need_full_reset =
4715 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4716
4717 if (reset_context->reset_req_dev == adev)
4718 job = reset_context->job;
71182665 4719
b602ca5f
TZ
4720 if (amdgpu_sriov_vf(adev)) {
4721 /* stop the data exchange thread */
4722 amdgpu_virt_fini_data_exchange(adev);
4723 }
4724
9e225fb9
AG
4725 amdgpu_fence_driver_isr_toggle(adev, true);
4726
71182665 4727 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4728 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4729 struct amdgpu_ring *ring = adev->rings[i];
4730
51687759 4731 if (!ring || !ring->sched.thread)
0875dc9e 4732 continue;
5740682e 4733
b8920e1e
SS
4734 /* Clear job fence from fence drv to avoid force_completion
4735 * leave NULL and vm flush fence in fence drv
4736 */
5c1e6fa4 4737 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4738
2f9d4084
ML
4739 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4740 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4741 }
d38ceaf9 4742
9e225fb9
AG
4743 amdgpu_fence_driver_isr_toggle(adev, false);
4744
ff99849b 4745 if (job && job->vm)
222b5f04
AG
4746 drm_sched_increase_karma(&job->base);
4747
04442bf7 4748 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4749 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4750 if (r == -EOPNOTSUPP)
404b277b
LL
4751 r = 0;
4752 else
04442bf7
LL
4753 return r;
4754
1d721ed6 4755 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4756 if (!amdgpu_sriov_vf(adev)) {
4757
4758 if (!need_full_reset)
4759 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4760
360cd081
LG
4761 if (!need_full_reset && amdgpu_gpu_recovery &&
4762 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4763 amdgpu_device_ip_pre_soft_reset(adev);
4764 r = amdgpu_device_ip_soft_reset(adev);
4765 amdgpu_device_ip_post_soft_reset(adev);
4766 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4767 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4768 need_full_reset = true;
4769 }
4770 }
4771
4772 if (need_full_reset)
4773 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4774 if (need_full_reset)
4775 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4776 else
4777 clear_bit(AMDGPU_NEED_FULL_RESET,
4778 &reset_context->flags);
26bc5340
AG
4779 }
4780
4781 return r;
4782}
4783
15fd09a0
SA
4784static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4785{
15fd09a0
SA
4786 int i;
4787
38a15ad9 4788 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4789
4790 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4791 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4792 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4793 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4794 }
4795
4796 return 0;
4797}
4798
3d8785f6
SA
4799#ifdef CONFIG_DEV_COREDUMP
4800static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4801 size_t count, void *data, size_t datalen)
4802{
4803 struct drm_printer p;
4804 struct amdgpu_device *adev = data;
4805 struct drm_print_iterator iter;
4806 int i;
4807
4808 iter.data = buffer;
4809 iter.offset = 0;
4810 iter.start = offset;
4811 iter.remain = count;
4812
4813 p = drm_coredump_printer(&iter);
4814
4815 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4816 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4817 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4818 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4819 if (adev->reset_task_info.pid)
4820 drm_printf(&p, "process_name: %s PID: %d\n",
4821 adev->reset_task_info.process_name,
4822 adev->reset_task_info.pid);
4823
4824 if (adev->reset_vram_lost)
4825 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4826 if (adev->num_regs) {
4827 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4828
4829 for (i = 0; i < adev->num_regs; i++)
4830 drm_printf(&p, "0x%08x: 0x%08x\n",
4831 adev->reset_dump_reg_list[i],
4832 adev->reset_dump_reg_value[i]);
4833 }
4834
4835 return count - iter.remain;
4836}
4837
4838static void amdgpu_devcoredump_free(void *data)
4839{
4840}
4841
4842static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4843{
4844 struct drm_device *dev = adev_to_drm(adev);
4845
4846 ktime_get_ts64(&adev->reset_time);
6d1b3455 4847 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
3d8785f6
SA
4848 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4849}
4850#endif
4851
04442bf7
LL
4852int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4853 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4854{
4855 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4856 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4857 int r = 0;
f5c7e779 4858 bool gpu_reset_for_dev_remove = 0;
26bc5340 4859
04442bf7
LL
4860 /* Try reset handler method first */
4861 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4862 reset_list);
15fd09a0 4863 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4864
4865 reset_context->reset_device_list = device_list_handle;
04442bf7 4866 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 4867 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4868 if (r == -EOPNOTSUPP)
404b277b
LL
4869 r = 0;
4870 else
04442bf7
LL
4871 return r;
4872
4873 /* Reset handler not implemented, use the default method */
4874 need_full_reset =
4875 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4876 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4877
f5c7e779
YC
4878 gpu_reset_for_dev_remove =
4879 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4880 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4881
26bc5340 4882 /*
655ce9cb 4883 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4884 * to allow proper links negotiation in FW (within 1 sec)
4885 */
7ac71382 4886 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4887 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4888 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4889 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4890 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4891 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4892 r = -EALREADY;
4893 } else
4894 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4895
041a62bc 4896 if (r) {
aac89168 4897 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4898 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4899 break;
ce316fa5
LM
4900 }
4901 }
4902
041a62bc
AG
4903 /* For XGMI wait for all resets to complete before proceed */
4904 if (!r) {
655ce9cb 4905 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4906 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4907 flush_work(&tmp_adev->xgmi_reset_work);
4908 r = tmp_adev->asic_reset_res;
4909 if (r)
4910 break;
ce316fa5
LM
4911 }
4912 }
4913 }
ce316fa5 4914 }
26bc5340 4915
43c4d576 4916 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4917 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4918 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4919 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4920 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4921 }
4922
00eaa571 4923 amdgpu_ras_intr_cleared();
43c4d576 4924 }
00eaa571 4925
f5c7e779
YC
4926 /* Since the mode1 reset affects base ip blocks, the
4927 * phase1 ip blocks need to be resumed. Otherwise there
4928 * will be a BIOS signature error and the psp bootloader
4929 * can't load kdb on the next amdgpu install.
4930 */
4931 if (gpu_reset_for_dev_remove) {
4932 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4933 amdgpu_device_ip_resume_phase1(tmp_adev);
4934
4935 goto end;
4936 }
4937
655ce9cb 4938 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4939 if (need_full_reset) {
4940 /* post card */
e3c1b071 4941 r = amdgpu_device_asic_init(tmp_adev);
4942 if (r) {
aac89168 4943 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4944 } else {
26bc5340 4945 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 4946
26bc5340
AG
4947 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4948 if (r)
4949 goto out;
4950
4951 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4952#ifdef CONFIG_DEV_COREDUMP
4953 tmp_adev->reset_vram_lost = vram_lost;
4954 memset(&tmp_adev->reset_task_info, 0,
4955 sizeof(tmp_adev->reset_task_info));
4956 if (reset_context->job && reset_context->job->vm)
4957 tmp_adev->reset_task_info =
4958 reset_context->job->vm->task_info;
4959 amdgpu_reset_capture_coredumpm(tmp_adev);
4960#endif
26bc5340 4961 if (vram_lost) {
77e7f829 4962 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4963 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4964 }
4965
26bc5340
AG
4966 r = amdgpu_device_fw_loading(tmp_adev);
4967 if (r)
4968 return r;
4969
4970 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4971 if (r)
4972 goto out;
4973
4974 if (vram_lost)
4975 amdgpu_device_fill_reset_magic(tmp_adev);
4976
fdafb359
EQ
4977 /*
4978 * Add this ASIC as tracked as reset was already
4979 * complete successfully.
4980 */
4981 amdgpu_register_gpu_instance(tmp_adev);
4982
04442bf7
LL
4983 if (!reset_context->hive &&
4984 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4985 amdgpu_xgmi_add_device(tmp_adev);
4986
7c04ca50 4987 r = amdgpu_device_ip_late_init(tmp_adev);
4988 if (r)
4989 goto out;
4990
087451f3 4991 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4992
e8fbaf03
GC
4993 /*
4994 * The GPU enters bad state once faulty pages
4995 * by ECC has reached the threshold, and ras
4996 * recovery is scheduled next. So add one check
4997 * here to break recovery if it indeed exceeds
4998 * bad page threshold, and remind user to
4999 * retire this GPU or setting one bigger
5000 * bad_page_threshold value to fix this once
5001 * probing driver again.
5002 */
11003c68 5003 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5004 /* must succeed. */
5005 amdgpu_ras_resume(tmp_adev);
5006 } else {
5007 r = -EINVAL;
5008 goto out;
5009 }
e79a04d5 5010
26bc5340 5011 /* Update PSP FW topology after reset */
04442bf7
LL
5012 if (reset_context->hive &&
5013 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5014 r = amdgpu_xgmi_update_topology(
5015 reset_context->hive, tmp_adev);
26bc5340
AG
5016 }
5017 }
5018
26bc5340
AG
5019out:
5020 if (!r) {
5021 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5022 r = amdgpu_ib_ring_tests(tmp_adev);
5023 if (r) {
5024 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5025 need_full_reset = true;
5026 r = -EAGAIN;
5027 goto end;
5028 }
5029 }
5030
5031 if (!r)
5032 r = amdgpu_device_recover_vram(tmp_adev);
5033 else
5034 tmp_adev->asic_reset_res = r;
5035 }
5036
5037end:
04442bf7
LL
5038 if (need_full_reset)
5039 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5040 else
5041 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5042 return r;
5043}
5044
e923be99 5045static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5046{
5740682e 5047
a3a09142
AD
5048 switch (amdgpu_asic_reset_method(adev)) {
5049 case AMD_RESET_METHOD_MODE1:
5050 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5051 break;
5052 case AMD_RESET_METHOD_MODE2:
5053 adev->mp1_state = PP_MP1_STATE_RESET;
5054 break;
5055 default:
5056 adev->mp1_state = PP_MP1_STATE_NONE;
5057 break;
5058 }
26bc5340 5059}
d38ceaf9 5060
e923be99 5061static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5062{
89041940 5063 amdgpu_vf_error_trans_all(adev);
a3a09142 5064 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5065}
5066
3f12acc8
EQ
5067static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5068{
5069 struct pci_dev *p = NULL;
5070
5071 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5072 adev->pdev->bus->number, 1);
5073 if (p) {
5074 pm_runtime_enable(&(p->dev));
5075 pm_runtime_resume(&(p->dev));
5076 }
b85e285e
YY
5077
5078 pci_dev_put(p);
3f12acc8
EQ
5079}
5080
5081static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5082{
5083 enum amd_reset_method reset_method;
5084 struct pci_dev *p = NULL;
5085 u64 expires;
5086
5087 /*
5088 * For now, only BACO and mode1 reset are confirmed
5089 * to suffer the audio issue without proper suspended.
5090 */
5091 reset_method = amdgpu_asic_reset_method(adev);
5092 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5093 (reset_method != AMD_RESET_METHOD_MODE1))
5094 return -EINVAL;
5095
5096 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5097 adev->pdev->bus->number, 1);
5098 if (!p)
5099 return -ENODEV;
5100
5101 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5102 if (!expires)
5103 /*
5104 * If we cannot get the audio device autosuspend delay,
5105 * a fixed 4S interval will be used. Considering 3S is
5106 * the audio controller default autosuspend delay setting.
5107 * 4S used here is guaranteed to cover that.
5108 */
54b7feb9 5109 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5110
5111 while (!pm_runtime_status_suspended(&(p->dev))) {
5112 if (!pm_runtime_suspend(&(p->dev)))
5113 break;
5114
5115 if (expires < ktime_get_mono_fast_ns()) {
5116 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5117 pci_dev_put(p);
3f12acc8
EQ
5118 /* TODO: abort the succeeding gpu reset? */
5119 return -ETIMEDOUT;
5120 }
5121 }
5122
5123 pm_runtime_disable(&(p->dev));
5124
b85e285e 5125 pci_dev_put(p);
3f12acc8
EQ
5126 return 0;
5127}
5128
d193b12b 5129static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5130{
5131 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5132
5133#if defined(CONFIG_DEBUG_FS)
5134 if (!amdgpu_sriov_vf(adev))
5135 cancel_work(&adev->reset_work);
5136#endif
5137
5138 if (adev->kfd.dev)
5139 cancel_work(&adev->kfd.reset_work);
5140
5141 if (amdgpu_sriov_vf(adev))
5142 cancel_work(&adev->virt.flr_work);
5143
5144 if (con && adev->ras_enabled)
5145 cancel_work(&con->recovery_work);
5146
5147}
5148
26bc5340 5149/**
6e9c65f7 5150 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5151 *
982a820b 5152 * @adev: amdgpu_device pointer
26bc5340 5153 * @job: which job trigger hang
80bd2de1 5154 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5155 *
5156 * Attempt to reset the GPU if it has hung (all asics).
5157 * Attempt to do soft-reset or full-reset and reinitialize Asic
5158 * Returns 0 for success or an error on failure.
5159 */
5160
cf727044 5161int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5162 struct amdgpu_job *job,
5163 struct amdgpu_reset_context *reset_context)
26bc5340 5164{
1d721ed6 5165 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5166 bool job_signaled = false;
26bc5340 5167 struct amdgpu_hive_info *hive = NULL;
26bc5340 5168 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5169 int i, r = 0;
bb5c7235 5170 bool need_emergency_restart = false;
3f12acc8 5171 bool audio_suspended = false;
f5c7e779
YC
5172 bool gpu_reset_for_dev_remove = false;
5173
5174 gpu_reset_for_dev_remove =
5175 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5176 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5177
6e3cd2a9 5178 /*
bb5c7235
WS
5179 * Special case: RAS triggered and full reset isn't supported
5180 */
5181 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5182
d5ea093e
AG
5183 /*
5184 * Flush RAM to disk so that after reboot
5185 * the user can read log and see why the system rebooted.
5186 */
bb5c7235 5187 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5188 DRM_WARN("Emergency reboot.");
5189
5190 ksys_sync_helper();
5191 emergency_restart();
5192 }
5193
b823821f 5194 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5195 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5196
175ac6ec
ZL
5197 if (!amdgpu_sriov_vf(adev))
5198 hive = amdgpu_get_xgmi_hive(adev);
681260df 5199 if (hive)
53b3f8f4 5200 mutex_lock(&hive->hive_lock);
26bc5340 5201
f1549c09
LG
5202 reset_context->job = job;
5203 reset_context->hive = hive;
9e94d22c
EQ
5204 /*
5205 * Build list of devices to reset.
5206 * In case we are in XGMI hive mode, resort the device list
5207 * to put adev in the 1st position.
5208 */
5209 INIT_LIST_HEAD(&device_list);
175ac6ec 5210 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5211 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5212 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5213 if (gpu_reset_for_dev_remove && adev->shutdown)
5214 tmp_adev->shutdown = true;
5215 }
655ce9cb 5216 if (!list_is_first(&adev->reset_list, &device_list))
5217 list_rotate_to_front(&adev->reset_list, &device_list);
5218 device_list_handle = &device_list;
26bc5340 5219 } else {
655ce9cb 5220 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5221 device_list_handle = &device_list;
5222 }
5223
e923be99
AG
5224 /* We need to lock reset domain only once both for XGMI and single device */
5225 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5226 reset_list);
3675c2f2 5227 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5228
1d721ed6 5229 /* block all schedulers and reset given job's ring */
655ce9cb 5230 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5231
e923be99 5232 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5233
3f12acc8
EQ
5234 /*
5235 * Try to put the audio codec into suspend state
5236 * before gpu reset started.
5237 *
5238 * Due to the power domain of the graphics device
5239 * is shared with AZ power domain. Without this,
5240 * we may change the audio hardware from behind
5241 * the audio driver's back. That will trigger
5242 * some audio codec errors.
5243 */
5244 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5245 audio_suspended = true;
5246
9e94d22c
EQ
5247 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5248
52fb44cf
EQ
5249 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5250
c004d44e 5251 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5252 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5253
12ffa55d
AG
5254 /*
5255 * Mark these ASICs to be reseted as untracked first
5256 * And add them back after reset completed
5257 */
5258 amdgpu_unregister_gpu_instance(tmp_adev);
5259
163d4cd2 5260 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5261
f1c1314b 5262 /* disable ras on ALL IPs */
bb5c7235 5263 if (!need_emergency_restart &&
b823821f 5264 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5265 amdgpu_ras_suspend(tmp_adev);
5266
1d721ed6
AG
5267 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5268 struct amdgpu_ring *ring = tmp_adev->rings[i];
5269
5270 if (!ring || !ring->sched.thread)
5271 continue;
5272
0b2d2c2e 5273 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5274
bb5c7235 5275 if (need_emergency_restart)
7c6e68c7 5276 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5277 }
8f8c80f4 5278 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5279 }
5280
bb5c7235 5281 if (need_emergency_restart)
7c6e68c7
AG
5282 goto skip_sched_resume;
5283
1d721ed6
AG
5284 /*
5285 * Must check guilty signal here since after this point all old
5286 * HW fences are force signaled.
5287 *
5288 * job->base holds a reference to parent fence
5289 */
f6a3f660 5290 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5291 job_signaled = true;
1d721ed6
AG
5292 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5293 goto skip_hw_reset;
5294 }
5295
26bc5340 5296retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5297 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5298 if (gpu_reset_for_dev_remove) {
5299 /* Workaroud for ASICs need to disable SMC first */
5300 amdgpu_device_smu_fini_early(tmp_adev);
5301 }
f1549c09 5302 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5303 /*TODO Should we stop ?*/
5304 if (r) {
aac89168 5305 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5306 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5307 tmp_adev->asic_reset_res = r;
5308 }
247c7b0d
AG
5309
5310 /*
5311 * Drop all pending non scheduler resets. Scheduler resets
5312 * were already dropped during drm_sched_stop
5313 */
d193b12b 5314 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5315 }
5316
5317 /* Actual ASIC resets if needed.*/
4f30d920 5318 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5319 if (amdgpu_sriov_vf(adev)) {
5320 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5321 if (r)
5322 adev->asic_reset_res = r;
950d6425 5323
28606c4e
YC
5324 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5325 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5326 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5327 amdgpu_ras_resume(adev);
26bc5340 5328 } else {
f1549c09 5329 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5330 if (r && r == -EAGAIN)
26bc5340 5331 goto retry;
f5c7e779
YC
5332
5333 if (!r && gpu_reset_for_dev_remove)
5334 goto recover_end;
26bc5340
AG
5335 }
5336
1d721ed6
AG
5337skip_hw_reset:
5338
26bc5340 5339 /* Post ASIC reset for all devs .*/
655ce9cb 5340 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5341
1d721ed6
AG
5342 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5343 struct amdgpu_ring *ring = tmp_adev->rings[i];
5344
5345 if (!ring || !ring->sched.thread)
5346 continue;
5347
6868a2c4 5348 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5349 }
5350
693073a0 5351 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5352 amdgpu_mes_self_test(tmp_adev);
5353
b8920e1e 5354 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5355 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5356
7258fa31
SK
5357 if (tmp_adev->asic_reset_res)
5358 r = tmp_adev->asic_reset_res;
5359
1d721ed6 5360 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5361
5362 if (r) {
5363 /* bad news, how to tell it to userspace ? */
12ffa55d 5364 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5365 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5366 } else {
12ffa55d 5367 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5368 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5369 DRM_WARN("smart shift update failed\n");
26bc5340 5370 }
7c6e68c7 5371 }
26bc5340 5372
7c6e68c7 5373skip_sched_resume:
655ce9cb 5374 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5375 /* unlock kfd: SRIOV would do it separately */
c004d44e 5376 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5377 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5378
5379 /* kfd_post_reset will do nothing if kfd device is not initialized,
5380 * need to bring up kfd here if it's not be initialized before
5381 */
5382 if (!adev->kfd.init_complete)
5383 amdgpu_amdkfd_device_init(adev);
5384
3f12acc8
EQ
5385 if (audio_suspended)
5386 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5387
5388 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5389
5390 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5391 }
5392
f5c7e779 5393recover_end:
e923be99
AG
5394 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5395 reset_list);
5396 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5397
9e94d22c 5398 if (hive) {
9e94d22c 5399 mutex_unlock(&hive->hive_lock);
d95e8e97 5400 amdgpu_put_xgmi_hive(hive);
9e94d22c 5401 }
26bc5340 5402
f287a3c5 5403 if (r)
26bc5340 5404 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5405
5406 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5407 return r;
5408}
5409
e3ecdffa
AD
5410/**
5411 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5412 *
5413 * @adev: amdgpu_device pointer
5414 *
5415 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5416 * and lanes) of the slot the device is in. Handles APUs and
5417 * virtualized environments where PCIE config space may not be available.
5418 */
5494d864 5419static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5420{
5d9a6330 5421 struct pci_dev *pdev;
c5313457
HK
5422 enum pci_bus_speed speed_cap, platform_speed_cap;
5423 enum pcie_link_width platform_link_width;
d0dd7f0c 5424
cd474ba0
AD
5425 if (amdgpu_pcie_gen_cap)
5426 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5427
cd474ba0
AD
5428 if (amdgpu_pcie_lane_cap)
5429 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5430
cd474ba0 5431 /* covers APUs as well */
04e85958 5432 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5433 if (adev->pm.pcie_gen_mask == 0)
5434 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5435 if (adev->pm.pcie_mlw_mask == 0)
5436 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5437 return;
cd474ba0 5438 }
d0dd7f0c 5439
c5313457
HK
5440 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5441 return;
5442
dbaa922b
AD
5443 pcie_bandwidth_available(adev->pdev, NULL,
5444 &platform_speed_cap, &platform_link_width);
c5313457 5445
cd474ba0 5446 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5447 /* asic caps */
5448 pdev = adev->pdev;
5449 speed_cap = pcie_get_speed_cap(pdev);
5450 if (speed_cap == PCI_SPEED_UNKNOWN) {
5451 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5452 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5453 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5454 } else {
2b3a1f51
FX
5455 if (speed_cap == PCIE_SPEED_32_0GT)
5456 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5459 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5460 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5461 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5462 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5466 else if (speed_cap == PCIE_SPEED_8_0GT)
5467 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5470 else if (speed_cap == PCIE_SPEED_5_0GT)
5471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5473 else
5474 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5475 }
5476 /* platform caps */
c5313457 5477 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5478 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5479 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5480 } else {
2b3a1f51
FX
5481 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5482 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5485 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5486 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5487 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5488 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5489 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5490 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5492 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5493 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5495 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5496 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5497 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5499 else
5500 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5501
cd474ba0
AD
5502 }
5503 }
5504 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5505 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5506 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5507 } else {
c5313457 5508 switch (platform_link_width) {
5d9a6330 5509 case PCIE_LNK_X32:
cd474ba0
AD
5510 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5517 break;
5d9a6330 5518 case PCIE_LNK_X16:
cd474ba0
AD
5519 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5525 break;
5d9a6330 5526 case PCIE_LNK_X12:
cd474ba0
AD
5527 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5532 break;
5d9a6330 5533 case PCIE_LNK_X8:
cd474ba0
AD
5534 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5538 break;
5d9a6330 5539 case PCIE_LNK_X4:
cd474ba0
AD
5540 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5543 break;
5d9a6330 5544 case PCIE_LNK_X2:
cd474ba0
AD
5545 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5547 break;
5d9a6330 5548 case PCIE_LNK_X1:
cd474ba0
AD
5549 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5550 break;
5551 default:
5552 break;
5553 }
d0dd7f0c
AD
5554 }
5555 }
5556}
d38ceaf9 5557
08a2fd23
RE
5558/**
5559 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5560 *
5561 * @adev: amdgpu_device pointer
5562 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5563 *
5564 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5565 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5566 * @peer_adev.
5567 */
5568bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5569 struct amdgpu_device *peer_adev)
5570{
5571#ifdef CONFIG_HSA_AMD_P2P
5572 uint64_t address_mask = peer_adev->dev->dma_mask ?
5573 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5574 resource_size_t aper_limit =
5575 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5576 bool p2p_access =
5577 !adev->gmc.xgmi.connected_to_cpu &&
5578 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5579
5580 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5581 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5582 !(adev->gmc.aper_base & address_mask ||
5583 aper_limit & address_mask));
5584#else
5585 return false;
5586#endif
5587}
5588
361dbd01
AD
5589int amdgpu_device_baco_enter(struct drm_device *dev)
5590{
1348969a 5591 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5592 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5593
6ab68650 5594 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5595 return -ENOTSUPP;
5596
8ab0d6f0 5597 if (ras && adev->ras_enabled &&
acdae216 5598 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5599 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5600
9530273e 5601 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5602}
5603
5604int amdgpu_device_baco_exit(struct drm_device *dev)
5605{
1348969a 5606 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5607 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5608 int ret = 0;
361dbd01 5609
6ab68650 5610 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5611 return -ENOTSUPP;
5612
9530273e
EQ
5613 ret = amdgpu_dpm_baco_exit(adev);
5614 if (ret)
5615 return ret;
7a22677b 5616
8ab0d6f0 5617 if (ras && adev->ras_enabled &&
acdae216 5618 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5619 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5620
1bece222
CL
5621 if (amdgpu_passthrough(adev) &&
5622 adev->nbio.funcs->clear_doorbell_interrupt)
5623 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5624
7a22677b 5625 return 0;
361dbd01 5626}
c9a6b82f
AG
5627
5628/**
5629 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5630 * @pdev: PCI device struct
5631 * @state: PCI channel state
5632 *
5633 * Description: Called when a PCI error is detected.
5634 *
5635 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5636 */
5637pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5638{
5639 struct drm_device *dev = pci_get_drvdata(pdev);
5640 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5641 int i;
c9a6b82f
AG
5642
5643 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5644
6894305c
AG
5645 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5646 DRM_WARN("No support for XGMI hive yet...");
5647 return PCI_ERS_RESULT_DISCONNECT;
5648 }
5649
e17e27f9
GC
5650 adev->pci_channel_state = state;
5651
c9a6b82f
AG
5652 switch (state) {
5653 case pci_channel_io_normal:
5654 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5655 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5656 case pci_channel_io_frozen:
5657 /*
d0fb18b5 5658 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5659 * to GPU during PCI error recovery
5660 */
3675c2f2 5661 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5662 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5663
5664 /*
5665 * Block any work scheduling as we do for regular GPU reset
5666 * for the duration of the recovery
5667 */
5668 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5669 struct amdgpu_ring *ring = adev->rings[i];
5670
5671 if (!ring || !ring->sched.thread)
5672 continue;
5673
5674 drm_sched_stop(&ring->sched, NULL);
5675 }
8f8c80f4 5676 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5677 return PCI_ERS_RESULT_NEED_RESET;
5678 case pci_channel_io_perm_failure:
5679 /* Permanent error, prepare for device removal */
5680 return PCI_ERS_RESULT_DISCONNECT;
5681 }
5682
5683 return PCI_ERS_RESULT_NEED_RESET;
5684}
5685
5686/**
5687 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5688 * @pdev: pointer to PCI device
5689 */
5690pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5691{
5692
5693 DRM_INFO("PCI error: mmio enabled callback!!\n");
5694
5695 /* TODO - dump whatever for debugging purposes */
5696
5697 /* This called only if amdgpu_pci_error_detected returns
5698 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5699 * works, no need to reset slot.
5700 */
5701
5702 return PCI_ERS_RESULT_RECOVERED;
5703}
5704
5705/**
5706 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5707 * @pdev: PCI device struct
5708 *
5709 * Description: This routine is called by the pci error recovery
5710 * code after the PCI slot has been reset, just before we
5711 * should resume normal operations.
5712 */
5713pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5714{
5715 struct drm_device *dev = pci_get_drvdata(pdev);
5716 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5717 int r, i;
04442bf7 5718 struct amdgpu_reset_context reset_context;
362c7b91 5719 u32 memsize;
7ac71382 5720 struct list_head device_list;
c9a6b82f
AG
5721
5722 DRM_INFO("PCI error: slot reset callback!!\n");
5723
04442bf7
LL
5724 memset(&reset_context, 0, sizeof(reset_context));
5725
7ac71382 5726 INIT_LIST_HEAD(&device_list);
655ce9cb 5727 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5728
362c7b91
AG
5729 /* wait for asic to come out of reset */
5730 msleep(500);
5731
7ac71382 5732 /* Restore PCI confspace */
c1dd4aa6 5733 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5734
362c7b91
AG
5735 /* confirm ASIC came out of reset */
5736 for (i = 0; i < adev->usec_timeout; i++) {
5737 memsize = amdgpu_asic_get_config_memsize(adev);
5738
5739 if (memsize != 0xffffffff)
5740 break;
5741 udelay(1);
5742 }
5743 if (memsize == 0xffffffff) {
5744 r = -ETIME;
5745 goto out;
5746 }
5747
04442bf7
LL
5748 reset_context.method = AMD_RESET_METHOD_NONE;
5749 reset_context.reset_req_dev = adev;
5750 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5751 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5752
7afefb81 5753 adev->no_hw_access = true;
04442bf7 5754 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5755 adev->no_hw_access = false;
c9a6b82f
AG
5756 if (r)
5757 goto out;
5758
04442bf7 5759 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5760
5761out:
c9a6b82f 5762 if (!r) {
c1dd4aa6
AG
5763 if (amdgpu_device_cache_pci_state(adev->pdev))
5764 pci_restore_state(adev->pdev);
5765
c9a6b82f
AG
5766 DRM_INFO("PCIe error recovery succeeded\n");
5767 } else {
5768 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5769 amdgpu_device_unset_mp1_state(adev);
5770 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5771 }
5772
5773 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5774}
5775
5776/**
5777 * amdgpu_pci_resume() - resume normal ops after PCI reset
5778 * @pdev: pointer to PCI device
5779 *
5780 * Called when the error recovery driver tells us that its
505199a3 5781 * OK to resume normal operation.
c9a6b82f
AG
5782 */
5783void amdgpu_pci_resume(struct pci_dev *pdev)
5784{
5785 struct drm_device *dev = pci_get_drvdata(pdev);
5786 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5787 int i;
c9a6b82f 5788
c9a6b82f
AG
5789
5790 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5791
e17e27f9
GC
5792 /* Only continue execution for the case of pci_channel_io_frozen */
5793 if (adev->pci_channel_state != pci_channel_io_frozen)
5794 return;
5795
acd89fca
AG
5796 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5797 struct amdgpu_ring *ring = adev->rings[i];
5798
5799 if (!ring || !ring->sched.thread)
5800 continue;
5801
acd89fca
AG
5802 drm_sched_start(&ring->sched, true);
5803 }
5804
e923be99
AG
5805 amdgpu_device_unset_mp1_state(adev);
5806 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5807}
c1dd4aa6
AG
5808
5809bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5810{
5811 struct drm_device *dev = pci_get_drvdata(pdev);
5812 struct amdgpu_device *adev = drm_to_adev(dev);
5813 int r;
5814
5815 r = pci_save_state(pdev);
5816 if (!r) {
5817 kfree(adev->pci_state);
5818
5819 adev->pci_state = pci_store_saved_state(pdev);
5820
5821 if (!adev->pci_state) {
5822 DRM_ERROR("Failed to store PCI saved state");
5823 return false;
5824 }
5825 } else {
5826 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5827 return false;
5828 }
5829
5830 return true;
5831}
5832
5833bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5834{
5835 struct drm_device *dev = pci_get_drvdata(pdev);
5836 struct amdgpu_device *adev = drm_to_adev(dev);
5837 int r;
5838
5839 if (!adev->pci_state)
5840 return false;
5841
5842 r = pci_load_saved_state(pdev, adev->pci_state);
5843
5844 if (!r) {
5845 pci_restore_state(pdev);
5846 } else {
5847 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5848 return false;
5849 }
5850
5851 return true;
5852}
5853
810085dd
EH
5854void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5855 struct amdgpu_ring *ring)
5856{
5857#ifdef CONFIG_X86_64
b818a5d3 5858 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5859 return;
5860#endif
5861 if (adev->gmc.xgmi.connected_to_cpu)
5862 return;
5863
5864 if (ring && ring->funcs->emit_hdp_flush)
5865 amdgpu_ring_emit_hdp_flush(ring);
5866 else
5867 amdgpu_asic_flush_hdp(adev, ring);
5868}
c1dd4aa6 5869
810085dd
EH
5870void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5871 struct amdgpu_ring *ring)
5872{
5873#ifdef CONFIG_X86_64
b818a5d3 5874 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5875 return;
5876#endif
5877 if (adev->gmc.xgmi.connected_to_cpu)
5878 return;
c1dd4aa6 5879
810085dd
EH
5880 amdgpu_asic_invalidate_hdp(adev, ring);
5881}
34f3a4a9 5882
89a7a870
AG
5883int amdgpu_in_reset(struct amdgpu_device *adev)
5884{
5885 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5886}
5887
34f3a4a9
LY
5888/**
5889 * amdgpu_device_halt() - bring hardware to some kind of halt state
5890 *
5891 * @adev: amdgpu_device pointer
5892 *
5893 * Bring hardware to some kind of halt state so that no one can touch it
5894 * any more. It will help to maintain error context when error occurred.
5895 * Compare to a simple hang, the system will keep stable at least for SSH
5896 * access. Then it should be trivial to inspect the hardware state and
5897 * see what's going on. Implemented as following:
5898 *
5899 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5900 * clears all CPU mappings to device, disallows remappings through page faults
5901 * 2. amdgpu_irq_disable_all() disables all interrupts
5902 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5903 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5904 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5905 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5906 * flush any in flight DMA operations
5907 */
5908void amdgpu_device_halt(struct amdgpu_device *adev)
5909{
5910 struct pci_dev *pdev = adev->pdev;
e0f943b4 5911 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 5912
2c1c7ba4 5913 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
5914 drm_dev_unplug(ddev);
5915
5916 amdgpu_irq_disable_all(adev);
5917
5918 amdgpu_fence_driver_hw_fini(adev);
5919
5920 adev->no_hw_access = true;
5921
5922 amdgpu_device_unmap_mmio(adev);
5923
5924 pci_disable_device(pdev);
5925 pci_wait_for_pending_transaction(pdev);
5926}
86700a40
XD
5927
5928u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5929 u32 reg)
5930{
5931 unsigned long flags, address, data;
5932 u32 r;
5933
5934 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5935 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5936
5937 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5938 WREG32(address, reg * 4);
5939 (void)RREG32(address);
5940 r = RREG32(data);
5941 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5942 return r;
5943}
5944
5945void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5946 u32 reg, u32 v)
5947{
5948 unsigned long flags, address, data;
5949
5950 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5951 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5952
5953 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5954 WREG32(address, reg * 4);
5955 (void)RREG32(address);
5956 WREG32(data, v);
5957 (void)RREG32(data);
5958 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5959}
68ce8b24
CK
5960
5961/**
5962 * amdgpu_device_switch_gang - switch to a new gang
5963 * @adev: amdgpu_device pointer
5964 * @gang: the gang to switch to
5965 *
5966 * Try to switch to a new gang.
5967 * Returns: NULL if we switched to the new gang or a reference to the current
5968 * gang leader.
5969 */
5970struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5971 struct dma_fence *gang)
5972{
5973 struct dma_fence *old = NULL;
5974
5975 do {
5976 dma_fence_put(old);
5977 rcu_read_lock();
5978 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5979 rcu_read_unlock();
5980
5981 if (old == gang)
5982 break;
5983
5984 if (!dma_fence_is_signaled(old))
5985 return old;
5986
5987 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5988 old, gang) != old);
5989
5990 dma_fence_put(old);
5991 return NULL;
5992}
220c8cc8
AD
5993
5994bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5995{
5996 switch (adev->asic_type) {
5997#ifdef CONFIG_DRM_AMDGPU_SI
5998 case CHIP_HAINAN:
5999#endif
6000 case CHIP_TOPAZ:
6001 /* chips with no display hardware */
6002 return false;
6003#ifdef CONFIG_DRM_AMDGPU_SI
6004 case CHIP_TAHITI:
6005 case CHIP_PITCAIRN:
6006 case CHIP_VERDE:
6007 case CHIP_OLAND:
6008#endif
6009#ifdef CONFIG_DRM_AMDGPU_CIK
6010 case CHIP_BONAIRE:
6011 case CHIP_HAWAII:
6012 case CHIP_KAVERI:
6013 case CHIP_KABINI:
6014 case CHIP_MULLINS:
6015#endif
6016 case CHIP_TONGA:
6017 case CHIP_FIJI:
6018 case CHIP_POLARIS10:
6019 case CHIP_POLARIS11:
6020 case CHIP_POLARIS12:
6021 case CHIP_VEGAM:
6022 case CHIP_CARRIZO:
6023 case CHIP_STONEY:
6024 /* chips with display hardware */
6025 return true;
6026 default:
6027 /* IP discovery */
6028 if (!adev->ip_versions[DCE_HWIP][0] ||
6029 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6030 return false;
6031 return true;
6032 }
6033}
81283fee
JZ
6034
6035uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6036 uint32_t inst, uint32_t reg_addr, char reg_name[],
6037 uint32_t expected_value, uint32_t mask)
6038{
6039 uint32_t ret = 0;
6040 uint32_t old_ = 0;
6041 uint32_t tmp_ = RREG32(reg_addr);
6042 uint32_t loop = adev->usec_timeout;
6043
6044 while ((tmp_ & (mask)) != (expected_value)) {
6045 if (old_ != tmp_) {
6046 loop = adev->usec_timeout;
6047 old_ = tmp_;
6048 } else
6049 udelay(1);
6050 tmp_ = RREG32(reg_addr);
6051 loop--;
6052 if (!loop) {
6053 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6054 inst, reg_name, (uint32_t)expected_value,
6055 (uint32_t)(tmp_ & (mask)));
6056 ret = -ETIMEDOUT;
6057 break;
6058 }
6059 }
6060 return ret;
6061}