]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Merge tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel
[thirdparty/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
ea1d2a38
TZ
28
29#include <linux/aperture.h>
b1ddf548 30#include <linux/power_supply.h>
0875dc9e 31#include <linux/kthread.h>
fdf2f6c5 32#include <linux/module.h>
d38ceaf9
AD
33#include <linux/console.h>
34#include <linux/slab.h>
4a74c38c 35#include <linux/iommu.h>
901e2be2 36#include <linux/pci.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
4562236b 40#include <drm/drm_atomic_helper.h>
4cf50bae 41#include <drm/drm_client_event.h>
973ad627 42#include <drm/drm_crtc_helper.h>
fcd70cd3 43#include <drm/drm_probe_helper.h>
d38ceaf9 44#include <drm/amdgpu_drm.h>
7b1c6263 45#include <linux/device.h>
d38ceaf9
AD
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
85150626 77#include "amdgpu_virt.h"
9022f01b 78#include "amdgpu_dev_coredump.h"
5183411b 79
d5ea093e 80#include <linux/suspend.h>
c6a6e2db 81#include <drm/task_barrier.h>
3f12acc8 82#include <linux/pm_runtime.h>
d5ea093e 83
f89f8c6b
AG
84#include <drm/drm_drv.h>
85
3ad5dcfe
KHF
86#if IS_ENABLED(CONFIG_X86)
87#include <asm/intel-family.h>
c770ef19 88#include <asm/cpu_device_id.h>
3ad5dcfe
KHF
89#endif
90
e2a75f88 91MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 92MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 93MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 94MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 95MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 96MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 97MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 98
2dc80b00 99#define AMDGPU_RESUME_MS 2000
7258fa31
SK
100#define AMDGPU_MAX_RETRY_LIMIT 2
101#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
ad390542
HZ
102#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
103#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
104#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
2dc80b00 105
7e0aa706 106#define AMDGPU_VBIOS_SKIP (1U << 0)
6e8ca38e 107#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
7e0aa706 108
b7cdb41e
ML
109static const struct drm_driver amdgpu_kms_driver;
110
050091ab 111const char *amdgpu_asic_name[] = {
da69c161
KW
112 "TAHITI",
113 "PITCAIRN",
114 "VERDE",
115 "OLAND",
116 "HAINAN",
d38ceaf9
AD
117 "BONAIRE",
118 "KAVERI",
119 "KABINI",
120 "HAWAII",
121 "MULLINS",
122 "TOPAZ",
123 "TONGA",
48299f95 124 "FIJI",
d38ceaf9 125 "CARRIZO",
139f4917 126 "STONEY",
2cc0c0b5
FC
127 "POLARIS10",
128 "POLARIS11",
c4642a47 129 "POLARIS12",
48ff108d 130 "VEGAM",
d4196f01 131 "VEGA10",
8fab806a 132 "VEGA12",
956fcddc 133 "VEGA20",
2ca8a5d2 134 "RAVEN",
d6c3b24e 135 "ARCTURUS",
1eee4228 136 "RENOIR",
d46b417a 137 "ALDEBARAN",
852a6626 138 "NAVI10",
d0f56dc2 139 "CYAN_SKILLFISH",
87dbad02 140 "NAVI14",
9802f5d7 141 "NAVI12",
ccaf72d3 142 "SIENNA_CICHLID",
ddd8fbe7 143 "NAVY_FLOUNDER",
4f1e9a76 144 "VANGOGH",
a2468e04 145 "DIMGREY_CAVEFISH",
6f169591 146 "BEIGE_GOBY",
ee9236b7 147 "YELLOW_CARP",
3ae695d6 148 "IP DISCOVERY",
d38ceaf9
AD
149 "LAST",
150};
151
ee2003d5 152#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
14f2fe34
LL
153/*
154 * Default init level where all blocks are expected to be initialized. This is
155 * the level of initialization expected by default and also after a full reset
156 * of the device.
157 */
158struct amdgpu_init_level amdgpu_init_default = {
159 .level = AMDGPU_INIT_LEVEL_DEFAULT,
160 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
161};
162
a86e0c0e
LL
163struct amdgpu_init_level amdgpu_init_recovery = {
164 .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
165 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
166};
167
14f2fe34
LL
168/*
169 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
170 * is used for cases like reset on initialization where the entire hive needs to
171 * be reset before first use.
172 */
173struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
174 .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
175 .hwini_ip_block_mask =
176 BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
631af731
LL
177 BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
178 BIT(AMD_IP_BLOCK_TYPE_PSP)
14f2fe34
LL
179};
180
181static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
182 enum amd_ip_block_type block)
183{
184 return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
185}
186
187void amdgpu_set_init_level(struct amdgpu_device *adev,
188 enum amdgpu_init_lvl_id lvl)
189{
190 switch (lvl) {
191 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
192 adev->init_lvl = &amdgpu_init_minimal_xgmi;
193 break;
a86e0c0e
LL
194 case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
195 adev->init_lvl = &amdgpu_init_recovery;
196 break;
14f2fe34
LL
197 case AMDGPU_INIT_LEVEL_DEFAULT:
198 fallthrough;
199 default:
200 adev->init_lvl = &amdgpu_init_default;
201 break;
202 }
203}
204
ab66c832 205static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
2965e635
ML
206static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
207 void *data);
ab66c832 208
dcea6e65
KR
209/**
210 * DOC: pcie_replay_count
211 *
212 * The amdgpu driver provides a sysfs API for reporting the total number
a567db80 213 * of PCIe replays (NAKs).
dcea6e65 214 * The file pcie_replay_count is used for this and returns the total
a567db80 215 * number of replays as a sum of the NAKs generated and NAKs received.
dcea6e65
KR
216 */
217
218static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
219 struct device_attribute *attr, char *buf)
220{
221 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 222 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
223 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
224
36000c7a 225 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
226}
227
b8920e1e 228static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
229 amdgpu_device_get_pcie_replay_count, NULL);
230
9c05636c
VS
231static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
232{
233 int ret = 0;
234
196aefea 235 if (amdgpu_nbio_is_replay_cnt_supported(adev))
9c05636c
VS
236 ret = sysfs_create_file(&adev->dev->kobj,
237 &dev_attr_pcie_replay_count.attr);
238
239 return ret;
240}
241
242static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
243{
196aefea 244 if (amdgpu_nbio_is_replay_cnt_supported(adev))
9c05636c
VS
245 sysfs_remove_file(&adev->dev->kobj,
246 &dev_attr_pcie_replay_count.attr);
247}
248
af39e6f4 249static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
2d0f5001 250 const struct bin_attribute *attr, char *buf,
af39e6f4
LL
251 loff_t ppos, size_t count)
252{
253 struct device *dev = kobj_to_dev(kobj);
254 struct drm_device *ddev = dev_get_drvdata(dev);
255 struct amdgpu_device *adev = drm_to_adev(ddev);
256 ssize_t bytes_read;
257
258 switch (ppos) {
259 case AMDGPU_SYS_REG_STATE_XGMI:
260 bytes_read = amdgpu_asic_get_reg_state(
261 adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
262 break;
263 case AMDGPU_SYS_REG_STATE_WAFL:
264 bytes_read = amdgpu_asic_get_reg_state(
265 adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
266 break;
267 case AMDGPU_SYS_REG_STATE_PCIE:
268 bytes_read = amdgpu_asic_get_reg_state(
269 adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
270 break;
271 case AMDGPU_SYS_REG_STATE_USR:
272 bytes_read = amdgpu_asic_get_reg_state(
273 adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
274 break;
275 case AMDGPU_SYS_REG_STATE_USR_1:
276 bytes_read = amdgpu_asic_get_reg_state(
277 adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
278 break;
279 default:
280 return -EINVAL;
281 }
282
283 return bytes_read;
284}
285
2d0f5001
TW
286static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
287 AMDGPU_SYS_REG_STATE_END);
af39e6f4
LL
288
289int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
290{
291 int ret;
292
293 if (!amdgpu_asic_get_reg_state_supported(adev))
294 return 0;
295
296 ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
297
298 return ret;
299}
300
301void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
302{
303 if (!amdgpu_asic_get_reg_state_supported(adev))
304 return;
305 sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306}
307
e095026f
SK
308int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
309{
310 int r;
311
312 if (ip_block->version->funcs->suspend) {
313 r = ip_block->version->funcs->suspend(ip_block);
314 if (r) {
315 dev_err(ip_block->adev->dev,
316 "suspend of IP block <%s> failed %d\n",
317 ip_block->version->funcs->name, r);
318 return r;
319 }
320 }
321
322 ip_block->status.hw = false;
323 return 0;
324}
325
502d7630
SK
326int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
327{
328 int r;
329
330 if (ip_block->version->funcs->resume) {
331 r = ip_block->version->funcs->resume(ip_block);
332 if (r) {
333 dev_err(ip_block->adev->dev,
334 "resume of IP block <%s> failed %d\n",
335 ip_block->version->funcs->name, r);
336 return r;
337 }
338 }
339
340 ip_block->status.hw = true;
341 return 0;
342}
343
4798db85
LL
344/**
345 * DOC: board_info
346 *
347 * The amdgpu driver provides a sysfs API for giving board related information.
348 * It provides the form factor information in the format
349 *
350 * type : form factor
351 *
352 * Possible form factor values
353 *
354 * - "cem" - PCIE CEM card
355 * - "oam" - Open Compute Accelerator Module
356 * - "unknown" - Not known
357 *
358 */
359
76da73f0
LL
360static ssize_t amdgpu_device_get_board_info(struct device *dev,
361 struct device_attribute *attr,
362 char *buf)
363{
364 struct drm_device *ddev = dev_get_drvdata(dev);
365 struct amdgpu_device *adev = drm_to_adev(ddev);
366 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
367 const char *pkg;
368
369 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
370 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
371
372 switch (pkg_type) {
373 case AMDGPU_PKG_TYPE_CEM:
374 pkg = "cem";
375 break;
376 case AMDGPU_PKG_TYPE_OAM:
377 pkg = "oam";
378 break;
379 default:
380 pkg = "unknown";
381 break;
382 }
383
384 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
385}
386
387static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
388
389static struct attribute *amdgpu_board_attrs[] = {
390 &dev_attr_board_info.attr,
391 NULL,
392};
393
394static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
395 struct attribute *attr, int n)
396{
397 struct device *dev = kobj_to_dev(kobj);
398 struct drm_device *ddev = dev_get_drvdata(dev);
399 struct amdgpu_device *adev = drm_to_adev(ddev);
400
401 if (adev->flags & AMD_IS_APU)
402 return 0;
403
404 return attr->mode;
405}
406
407static const struct attribute_group amdgpu_board_attrs_group = {
408 .attrs = amdgpu_board_attrs,
409 .is_visible = amdgpu_board_attrs_is_visible
410};
411
5494d864
AD
412static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
413
fd496ca8 414/**
b98c6299 415 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8 416 *
127ed492 417 * @adev: amdgpu device pointer
fd496ca8 418 *
b98c6299 419 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
420 * otherwise return false.
421 */
127ed492 422bool amdgpu_device_supports_px(struct amdgpu_device *adev)
fd496ca8 423{
b98c6299 424 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
425 return true;
426 return false;
427}
428
e3ecdffa 429/**
0330b848 430 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa 431 *
127ed492 432 * @adev: amdgpu device pointer
e3ecdffa 433 *
b98c6299 434 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
435 * otherwise return false.
436 */
127ed492 437bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
d38ceaf9 438{
1ad5bdc2
ML
439 if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
440 return false;
441
b98c6299
AD
442 if (adev->has_pr3 ||
443 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
444 return true;
445 return false;
446}
447
a69cba42
AD
448/**
449 * amdgpu_device_supports_baco - Does the device support BACO
450 *
127ed492 451 * @adev: amdgpu device pointer
a69cba42 452 *
b2207dc6 453 * Return:
a567db80
RD
454 * 1 if the device supports BACO;
455 * 3 if the device supports MACO (only works if BACO is supported)
b2207dc6 456 * otherwise return 0.
a69cba42 457 */
127ed492 458int amdgpu_device_supports_baco(struct amdgpu_device *adev)
a69cba42 459{
a69cba42
AD
460 return amdgpu_asic_supports_baco(adev);
461}
462
13478532
MJ
463void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
464{
13478532
MJ
465 int bamaco_support;
466
13478532 467 adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
127ed492 468 bamaco_support = amdgpu_device_supports_baco(adev);
13478532
MJ
469
470 switch (amdgpu_runtime_pm) {
471 case 2:
472 if (bamaco_support & MACO_SUPPORT) {
473 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
474 dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
475 } else if (bamaco_support == BACO_SUPPORT) {
476 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
477 dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
478 }
479 break;
480 case 1:
481 if (bamaco_support & BACO_SUPPORT) {
482 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
483 dev_info(adev->dev, "Forcing BACO for runtime pm\n");
484 }
485 break;
486 case -1:
487 case -2:
127ed492
LL
488 if (amdgpu_device_supports_px(adev)) {
489 /* enable PX as runtime mode */
13478532
MJ
490 adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
491 dev_info(adev->dev, "Using ATPX for runtime pm\n");
127ed492
LL
492 } else if (amdgpu_device_supports_boco(adev)) {
493 /* enable boco as runtime mode */
13478532
MJ
494 adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
495 dev_info(adev->dev, "Using BOCO for runtime pm\n");
496 } else {
497 if (!bamaco_support)
498 goto no_runtime_pm;
499
500 switch (adev->asic_type) {
501 case CHIP_VEGA20:
502 case CHIP_ARCTURUS:
503 /* BACO are not supported on vega20 and arctrus */
504 break;
505 case CHIP_VEGA10:
506 /* enable BACO as runpm mode if noretry=0 */
e90bd6d8 507 if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
13478532
MJ
508 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
509 break;
510 default:
511 /* enable BACO as runpm mode on CI+ */
e90bd6d8
AD
512 if (!amdgpu_passthrough(adev))
513 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
13478532
MJ
514 break;
515 }
516
517 if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
518 if (bamaco_support & MACO_SUPPORT) {
519 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
520 dev_info(adev->dev, "Using BAMACO for runtime pm\n");
521 } else {
522 dev_info(adev->dev, "Using BACO for runtime pm\n");
523 }
524 }
525 }
526 break;
527 case 0:
528 dev_info(adev->dev, "runtime pm is manually disabled\n");
529 break;
530 default:
531 break;
532 }
533
534no_runtime_pm:
535 if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
536 dev_info(adev->dev, "Runtime PM not available\n");
537}
3fa8f89d
S
538/**
539 * amdgpu_device_supports_smart_shift - Is the device dGPU with
540 * smart shift support
541 *
127ed492 542 * @adev: amdgpu device pointer
3fa8f89d
S
543 *
544 * Returns true if the device is a dGPU with Smart Shift support,
545 * otherwise returns false.
546 */
127ed492 547bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
3fa8f89d 548{
127ed492 549 return (amdgpu_device_supports_boco(adev) &&
3fa8f89d
S
550 amdgpu_acpi_is_power_shift_control_supported());
551}
552
6e3cd2a9
MCC
553/*
554 * VRAM access helper functions
555 */
556
e35e2b11 557/**
048af66b 558 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
559 *
560 * @adev: amdgpu_device pointer
561 * @pos: offset of the buffer in vram
562 * @buf: virtual address of the buffer in system memory
563 * @size: read/write size, sizeof(@buf) must > @size
564 * @write: true - write to vram, otherwise - read from vram
565 */
048af66b
KW
566void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
567 void *buf, size_t size, bool write)
e35e2b11 568{
e35e2b11 569 unsigned long flags;
048af66b
KW
570 uint32_t hi = ~0, tmp = 0;
571 uint32_t *data = buf;
ce05ac56 572 uint64_t last;
f89f8c6b 573 int idx;
ce05ac56 574
c58a863b 575 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 576 return;
9d11eb0d 577
048af66b
KW
578 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
579
580 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
581 for (last = pos + size; pos < last; pos += 4) {
582 tmp = pos >> 31;
583
584 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
585 if (tmp != hi) {
586 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
587 hi = tmp;
588 }
589 if (write)
590 WREG32_NO_KIQ(mmMM_DATA, *data++);
591 else
592 *data++ = RREG32_NO_KIQ(mmMM_DATA);
593 }
594
595 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
596 drm_dev_exit(idx);
597}
598
599/**
a567db80 600 * amdgpu_device_aper_access - access vram by vram aperture
048af66b
KW
601 *
602 * @adev: amdgpu_device pointer
603 * @pos: offset of the buffer in vram
604 * @buf: virtual address of the buffer in system memory
605 * @size: read/write size, sizeof(@buf) must > @size
606 * @write: true - write to vram, otherwise - read from vram
607 *
608 * The return value means how many bytes have been transferred.
609 */
610size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
611 void *buf, size_t size, bool write)
612{
9d11eb0d 613#ifdef CONFIG_64BIT
048af66b
KW
614 void __iomem *addr;
615 size_t count = 0;
616 uint64_t last;
617
618 if (!adev->mman.aper_base_kaddr)
619 return 0;
620
9d11eb0d
CK
621 last = min(pos + size, adev->gmc.visible_vram_size);
622 if (last > pos) {
048af66b
KW
623 addr = adev->mman.aper_base_kaddr + pos;
624 count = last - pos;
9d11eb0d
CK
625
626 if (write) {
627 memcpy_toio(addr, buf, count);
4c452b5c
SS
628 /* Make sure HDP write cache flush happens without any reordering
629 * after the system memory contents are sent over PCIe device
630 */
9d11eb0d 631 mb();
810085dd 632 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 633 } else {
810085dd 634 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
635 /* Make sure HDP read cache is invalidated before issuing a read
636 * to the PCIe device
637 */
9d11eb0d
CK
638 mb();
639 memcpy_fromio(buf, addr, count);
640 }
641
9d11eb0d 642 }
048af66b
KW
643
644 return count;
645#else
646 return 0;
9d11eb0d 647#endif
048af66b 648}
9d11eb0d 649
048af66b
KW
650/**
651 * amdgpu_device_vram_access - read/write a buffer in vram
652 *
653 * @adev: amdgpu_device pointer
654 * @pos: offset of the buffer in vram
655 * @buf: virtual address of the buffer in system memory
656 * @size: read/write size, sizeof(@buf) must > @size
657 * @write: true - write to vram, otherwise - read from vram
658 */
659void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
660 void *buf, size_t size, bool write)
661{
662 size_t count;
e35e2b11 663
048af66b
KW
664 /* try to using vram apreature to access vram first */
665 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
666 size -= count;
667 if (size) {
668 /* using MM to access rest vram */
669 pos += count;
670 buf += count;
671 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
672 }
673}
674
d38ceaf9 675/*
f7ee1874 676 * register access helper functions.
d38ceaf9 677 */
56b53c0b
DL
678
679/* Check if hw access should be skipped because of hotplug or device error */
680bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
681{
7afefb81 682 if (adev->no_hw_access)
56b53c0b
DL
683 return true;
684
685#ifdef CONFIG_LOCKDEP
686 /*
687 * This is a bit complicated to understand, so worth a comment. What we assert
688 * here is that the GPU reset is not running on another thread in parallel.
689 *
690 * For this we trylock the read side of the reset semaphore, if that succeeds
a567db80 691 * we know that the reset is not running in parallel.
56b53c0b
DL
692 *
693 * If the trylock fails we assert that we are either already holding the read
694 * side of the lock or are the reset thread itself and hold the write side of
695 * the lock.
696 */
697 if (in_task()) {
d0fb18b5
AG
698 if (down_read_trylock(&adev->reset_domain->sem))
699 up_read(&adev->reset_domain->sem);
56b53c0b 700 else
d0fb18b5 701 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
702 }
703#endif
704 return false;
705}
706
e3ecdffa 707/**
f7ee1874 708 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
709 *
710 * @adev: amdgpu_device pointer
711 * @reg: dword aligned register offset
712 * @acc_flags: access flags which require special behavior
713 *
714 * Returns the 32 bit value from the offset specified.
715 */
f7ee1874
HZ
716uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
717 uint32_t reg, uint32_t acc_flags)
d38ceaf9 718{
f4b373f4
TSD
719 uint32_t ret;
720
56b53c0b 721 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
722 return 0;
723
f7ee1874
HZ
724 if ((reg * 4) < adev->rmmio_size) {
725 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
726 amdgpu_sriov_runtime(adev) &&
d0fb18b5 727 down_read_trylock(&adev->reset_domain->sem)) {
85150626 728 ret = amdgpu_kiq_rreg(adev, reg, 0);
d0fb18b5 729 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
730 } else {
731 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
732 }
733 } else {
734 ret = adev->pcie_rreg(adev, reg * 4);
81202807 735 }
bc992ba5 736
f7ee1874 737 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 738
f4b373f4 739 return ret;
d38ceaf9
AD
740}
741
421a2a30
ML
742/*
743 * MMIO register read with bytes helper functions
744 * @offset:bytes offset from MMIO start
b8920e1e 745 */
421a2a30 746
e3ecdffa
AD
747/**
748 * amdgpu_mm_rreg8 - read a memory mapped IO register
749 *
750 * @adev: amdgpu_device pointer
751 * @offset: byte aligned register offset
752 *
753 * Returns the 8 bit value from the offset specified.
754 */
7cbbc745
AG
755uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
756{
56b53c0b 757 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
758 return 0;
759
421a2a30
ML
760 if (offset < adev->rmmio_size)
761 return (readb(adev->rmmio + offset));
762 BUG();
763}
764
85150626
VL
765
766/**
767 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
768 *
769 * @adev: amdgpu_device pointer
770 * @reg: dword aligned register offset
771 * @acc_flags: access flags which require special behavior
772 * @xcc_id: xcc accelerated compute core id
773 *
774 * Returns the 32 bit value from the offset specified.
775 */
776uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
777 uint32_t reg, uint32_t acc_flags,
778 uint32_t xcc_id)
779{
780 uint32_t ret, rlcg_flag;
781
782 if (amdgpu_device_skip_hw_access(adev))
783 return 0;
784
785 if ((reg * 4) < adev->rmmio_size) {
786 if (amdgpu_sriov_vf(adev) &&
787 !amdgpu_sriov_runtime(adev) &&
788 adev->gfx.rlc.rlcg_reg_access_supported &&
789 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
790 GC_HWIP, false,
791 &rlcg_flag)) {
e21e0b78 792 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
85150626
VL
793 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
794 amdgpu_sriov_runtime(adev) &&
795 down_read_trylock(&adev->reset_domain->sem)) {
796 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
797 up_read(&adev->reset_domain->sem);
798 } else {
799 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
800 }
801 } else {
802 ret = adev->pcie_rreg(adev, reg * 4);
803 }
804
805 return ret;
806}
807
421a2a30
ML
808/*
809 * MMIO register write with bytes helper functions
810 * @offset:bytes offset from MMIO start
811 * @value: the value want to be written to the register
b8920e1e
SS
812 */
813
e3ecdffa
AD
814/**
815 * amdgpu_mm_wreg8 - read a memory mapped IO register
816 *
817 * @adev: amdgpu_device pointer
818 * @offset: byte aligned register offset
819 * @value: 8 bit value to write
820 *
821 * Writes the value specified to the offset specified.
822 */
7cbbc745
AG
823void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
824{
56b53c0b 825 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
826 return;
827
421a2a30
ML
828 if (offset < adev->rmmio_size)
829 writeb(value, adev->rmmio + offset);
830 else
831 BUG();
832}
833
e3ecdffa 834/**
f7ee1874 835 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
836 *
837 * @adev: amdgpu_device pointer
838 * @reg: dword aligned register offset
839 * @v: 32 bit value to write to the register
840 * @acc_flags: access flags which require special behavior
841 *
842 * Writes the value specified to the offset specified.
843 */
f7ee1874
HZ
844void amdgpu_device_wreg(struct amdgpu_device *adev,
845 uint32_t reg, uint32_t v,
846 uint32_t acc_flags)
d38ceaf9 847{
56b53c0b 848 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
849 return;
850
f7ee1874
HZ
851 if ((reg * 4) < adev->rmmio_size) {
852 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
853 amdgpu_sriov_runtime(adev) &&
d0fb18b5 854 down_read_trylock(&adev->reset_domain->sem)) {
85150626 855 amdgpu_kiq_wreg(adev, reg, v, 0);
d0fb18b5 856 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
857 } else {
858 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
859 }
860 } else {
861 adev->pcie_wreg(adev, reg * 4, v);
81202807 862 }
bc992ba5 863
f7ee1874 864 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 865}
d38ceaf9 866
03f2abb0 867/**
4cc9f86f 868 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 869 *
71579346
RB
870 * @adev: amdgpu_device pointer
871 * @reg: mmio/rlc register
872 * @v: value to write
8057a9d6 873 * @xcc_id: xcc accelerated compute core id
71579346
RB
874 *
875 * this function is invoked only for the debugfs register access
03f2abb0 876 */
f7ee1874 877void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
878 uint32_t reg, uint32_t v,
879 uint32_t xcc_id)
2e0cc4d4 880{
56b53c0b 881 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
882 return;
883
2e0cc4d4 884 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
885 adev->gfx.rlc.funcs &&
886 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 887 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 888 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
889 } else if ((reg * 4) >= adev->rmmio_size) {
890 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
891 } else {
892 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 893 }
d38ceaf9
AD
894}
895
85150626
VL
896/**
897 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
898 *
899 * @adev: amdgpu_device pointer
900 * @reg: dword aligned register offset
901 * @v: 32 bit value to write to the register
902 * @acc_flags: access flags which require special behavior
903 * @xcc_id: xcc accelerated compute core id
904 *
905 * Writes the value specified to the offset specified.
906 */
907void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
908 uint32_t reg, uint32_t v,
909 uint32_t acc_flags, uint32_t xcc_id)
910{
911 uint32_t rlcg_flag;
912
913 if (amdgpu_device_skip_hw_access(adev))
914 return;
915
916 if ((reg * 4) < adev->rmmio_size) {
917 if (amdgpu_sriov_vf(adev) &&
918 !amdgpu_sriov_runtime(adev) &&
919 adev->gfx.rlc.rlcg_reg_access_supported &&
920 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
921 GC_HWIP, true,
922 &rlcg_flag)) {
e21e0b78 923 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
85150626
VL
924 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
925 amdgpu_sriov_runtime(adev) &&
926 down_read_trylock(&adev->reset_domain->sem)) {
927 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
928 up_read(&adev->reset_domain->sem);
929 } else {
930 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
931 }
932 } else {
933 adev->pcie_wreg(adev, reg * 4, v);
934 }
935}
936
1bba3683
HZ
937/**
938 * amdgpu_device_indirect_rreg - read an indirect register
939 *
940 * @adev: amdgpu_device pointer
22f453fb 941 * @reg_addr: indirect register address to read from
1bba3683
HZ
942 *
943 * Returns the value of indirect register @reg_addr
944 */
945u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
946 u32 reg_addr)
947{
65ba96e9 948 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
949 void __iomem *pcie_index_offset;
950 void __iomem *pcie_data_offset;
65ba96e9
HZ
951 u32 r;
952
953 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
954 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
955
956 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
957 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
958 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
959
960 writel(reg_addr, pcie_index_offset);
961 readl(pcie_index_offset);
962 r = readl(pcie_data_offset);
963 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
964
965 return r;
966}
967
0c552ed3
LM
968u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
969 u64 reg_addr)
970{
971 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
972 u32 r;
973 void __iomem *pcie_index_offset;
974 void __iomem *pcie_index_hi_offset;
975 void __iomem *pcie_data_offset;
976
ad390542
HZ
977 if (unlikely(!adev->nbio.funcs)) {
978 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
979 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
980 } else {
981 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
982 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
983 }
984
985 if (reg_addr >> 32) {
986 if (unlikely(!adev->nbio.funcs))
987 pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
988 else
989 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
990 } else {
0c552ed3 991 pcie_index_hi = 0;
ad390542 992 }
0c552ed3
LM
993
994 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
995 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
996 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
997 if (pcie_index_hi != 0)
998 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
999 pcie_index_hi * 4;
1000
1001 writel(reg_addr, pcie_index_offset);
1002 readl(pcie_index_offset);
1003 if (pcie_index_hi != 0) {
1004 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1005 readl(pcie_index_hi_offset);
1006 }
1007 r = readl(pcie_data_offset);
1008
1009 /* clear the high bits */
1010 if (pcie_index_hi != 0) {
1011 writel(0, pcie_index_hi_offset);
1012 readl(pcie_index_hi_offset);
1013 }
1014
1015 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1016
1017 return r;
1018}
1019
1bba3683
HZ
1020/**
1021 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
1022 *
1023 * @adev: amdgpu_device pointer
22f453fb 1024 * @reg_addr: indirect register address to read from
1bba3683
HZ
1025 *
1026 * Returns the value of indirect register @reg_addr
1027 */
1028u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
1029 u32 reg_addr)
1030{
65ba96e9 1031 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
1032 void __iomem *pcie_index_offset;
1033 void __iomem *pcie_data_offset;
65ba96e9
HZ
1034 u64 r;
1035
1036 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1037 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
1038
1039 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1040 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1041 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1042
1043 /* read low 32 bits */
1044 writel(reg_addr, pcie_index_offset);
1045 readl(pcie_index_offset);
1046 r = readl(pcie_data_offset);
1047 /* read high 32 bits */
1048 writel(reg_addr + 4, pcie_index_offset);
1049 readl(pcie_index_offset);
1050 r |= ((u64)readl(pcie_data_offset) << 32);
1051 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1052
1053 return r;
1054}
1055
a76b2870
CL
1056u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1057 u64 reg_addr)
1058{
1059 unsigned long flags, pcie_index, pcie_data;
1060 unsigned long pcie_index_hi = 0;
1061 void __iomem *pcie_index_offset;
1062 void __iomem *pcie_index_hi_offset;
1063 void __iomem *pcie_data_offset;
1064 u64 r;
1065
1066 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1067 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1068 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1069 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1070
1071 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1072 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1073 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1074 if (pcie_index_hi != 0)
1075 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1076 pcie_index_hi * 4;
1077
1078 /* read low 32 bits */
1079 writel(reg_addr, pcie_index_offset);
1080 readl(pcie_index_offset);
1081 if (pcie_index_hi != 0) {
1082 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1083 readl(pcie_index_hi_offset);
1084 }
1085 r = readl(pcie_data_offset);
1086 /* read high 32 bits */
1087 writel(reg_addr + 4, pcie_index_offset);
1088 readl(pcie_index_offset);
1089 if (pcie_index_hi != 0) {
1090 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1091 readl(pcie_index_hi_offset);
1092 }
1093 r |= ((u64)readl(pcie_data_offset) << 32);
1094
1095 /* clear the high bits */
1096 if (pcie_index_hi != 0) {
1097 writel(0, pcie_index_hi_offset);
1098 readl(pcie_index_hi_offset);
1099 }
1100
1101 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1102
1103 return r;
1104}
1105
1bba3683
HZ
1106/**
1107 * amdgpu_device_indirect_wreg - write an indirect register address
1108 *
1109 * @adev: amdgpu_device pointer
1bba3683
HZ
1110 * @reg_addr: indirect register offset
1111 * @reg_data: indirect register data
1112 *
1113 */
1114void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
1115 u32 reg_addr, u32 reg_data)
1116{
65ba96e9 1117 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
1118 void __iomem *pcie_index_offset;
1119 void __iomem *pcie_data_offset;
1120
65ba96e9
HZ
1121 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1122 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1123
1bba3683
HZ
1124 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1125 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1126 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1127
1128 writel(reg_addr, pcie_index_offset);
1129 readl(pcie_index_offset);
1130 writel(reg_data, pcie_data_offset);
1131 readl(pcie_data_offset);
1132 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1133}
1134
0c552ed3
LM
1135void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1136 u64 reg_addr, u32 reg_data)
1137{
1138 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1139 void __iomem *pcie_index_offset;
1140 void __iomem *pcie_index_hi_offset;
1141 void __iomem *pcie_data_offset;
1142
1143 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1144 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 1145 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
1146 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1147 else
1148 pcie_index_hi = 0;
1149
1150 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1151 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1152 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1153 if (pcie_index_hi != 0)
1154 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1155 pcie_index_hi * 4;
1156
1157 writel(reg_addr, pcie_index_offset);
1158 readl(pcie_index_offset);
1159 if (pcie_index_hi != 0) {
1160 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1161 readl(pcie_index_hi_offset);
1162 }
1163 writel(reg_data, pcie_data_offset);
1164 readl(pcie_data_offset);
1165
1166 /* clear the high bits */
1167 if (pcie_index_hi != 0) {
1168 writel(0, pcie_index_hi_offset);
1169 readl(pcie_index_hi_offset);
1170 }
1171
1172 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1173}
1174
1bba3683
HZ
1175/**
1176 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1177 *
1178 * @adev: amdgpu_device pointer
1bba3683
HZ
1179 * @reg_addr: indirect register offset
1180 * @reg_data: indirect register data
1181 *
1182 */
1183void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
1184 u32 reg_addr, u64 reg_data)
1185{
65ba96e9 1186 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
1187 void __iomem *pcie_index_offset;
1188 void __iomem *pcie_data_offset;
1189
65ba96e9
HZ
1190 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1191 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1192
1bba3683
HZ
1193 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1194 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1195 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1196
1197 /* write low 32 bits */
1198 writel(reg_addr, pcie_index_offset);
1199 readl(pcie_index_offset);
1200 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1201 readl(pcie_data_offset);
1202 /* write high 32 bits */
1203 writel(reg_addr + 4, pcie_index_offset);
1204 readl(pcie_index_offset);
1205 writel((u32)(reg_data >> 32), pcie_data_offset);
1206 readl(pcie_data_offset);
1207 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1208}
1209
a76b2870
CL
1210void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1211 u64 reg_addr, u64 reg_data)
1212{
1213 unsigned long flags, pcie_index, pcie_data;
1214 unsigned long pcie_index_hi = 0;
1215 void __iomem *pcie_index_offset;
1216 void __iomem *pcie_index_hi_offset;
1217 void __iomem *pcie_data_offset;
1218
1219 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1220 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1221 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1222 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1223
1224 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1225 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1226 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1227 if (pcie_index_hi != 0)
1228 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1229 pcie_index_hi * 4;
1230
1231 /* write low 32 bits */
1232 writel(reg_addr, pcie_index_offset);
1233 readl(pcie_index_offset);
1234 if (pcie_index_hi != 0) {
1235 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1236 readl(pcie_index_hi_offset);
1237 }
1238 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1239 readl(pcie_data_offset);
1240 /* write high 32 bits */
1241 writel(reg_addr + 4, pcie_index_offset);
1242 readl(pcie_index_offset);
1243 if (pcie_index_hi != 0) {
1244 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1245 readl(pcie_index_hi_offset);
1246 }
1247 writel((u32)(reg_data >> 32), pcie_data_offset);
1248 readl(pcie_data_offset);
1249
1250 /* clear the high bits */
1251 if (pcie_index_hi != 0) {
1252 writel(0, pcie_index_hi_offset);
1253 readl(pcie_index_hi_offset);
1254 }
1255
1256 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1257}
1258
dabc114e
HZ
1259/**
1260 * amdgpu_device_get_rev_id - query device rev_id
1261 *
1262 * @adev: amdgpu_device pointer
1263 *
1264 * Return device rev_id
1265 */
1266u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1267{
1268 return adev->nbio.funcs->get_rev_id(adev);
1269}
1270
d38ceaf9
AD
1271/**
1272 * amdgpu_invalid_rreg - dummy reg read function
1273 *
982a820b 1274 * @adev: amdgpu_device pointer
d38ceaf9
AD
1275 * @reg: offset of register
1276 *
1277 * Dummy register read function. Used for register blocks
1278 * that certain asics don't have (all asics).
1279 * Returns the value in the register.
1280 */
1281static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1282{
a3e510fd 1283 dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
d38ceaf9
AD
1284 BUG();
1285 return 0;
1286}
1287
0c552ed3
LM
1288static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1289{
a3e510fd 1290 dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
0c552ed3
LM
1291 BUG();
1292 return 0;
1293}
1294
d38ceaf9
AD
1295/**
1296 * amdgpu_invalid_wreg - dummy reg write function
1297 *
982a820b 1298 * @adev: amdgpu_device pointer
d38ceaf9
AD
1299 * @reg: offset of register
1300 * @v: value to write to the register
1301 *
1302 * Dummy register read function. Used for register blocks
1303 * that certain asics don't have (all asics).
1304 */
1305static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1306{
a3e510fd
LL
1307 dev_err(adev->dev,
1308 "Invalid callback to write register 0x%04X with 0x%08X\n", reg,
1309 v);
d38ceaf9
AD
1310 BUG();
1311}
1312
0c552ed3
LM
1313static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1314{
a3e510fd
LL
1315 dev_err(adev->dev,
1316 "Invalid callback to write register 0x%llX with 0x%08X\n", reg,
1317 v);
0c552ed3
LM
1318 BUG();
1319}
1320
4fa1c6a6
TZ
1321/**
1322 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1323 *
982a820b 1324 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1325 * @reg: offset of register
1326 *
1327 * Dummy register read function. Used for register blocks
1328 * that certain asics don't have (all asics).
1329 * Returns the value in the register.
1330 */
1331static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1332{
a3e510fd
LL
1333 dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
1334 reg);
4fa1c6a6
TZ
1335 BUG();
1336 return 0;
1337}
1338
a76b2870
CL
1339static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1340{
a3e510fd 1341 dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
a76b2870
CL
1342 BUG();
1343 return 0;
1344}
1345
4fa1c6a6
TZ
1346/**
1347 * amdgpu_invalid_wreg64 - dummy reg write function
1348 *
982a820b 1349 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1350 * @reg: offset of register
1351 * @v: value to write to the register
1352 *
1353 * Dummy register read function. Used for register blocks
1354 * that certain asics don't have (all asics).
1355 */
1356static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1357{
a3e510fd
LL
1358 dev_err(adev->dev,
1359 "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1360 reg, v);
4fa1c6a6
TZ
1361 BUG();
1362}
1363
a76b2870
CL
1364static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1365{
a3e510fd
LL
1366 dev_err(adev->dev,
1367 "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1368 reg, v);
a76b2870
CL
1369 BUG();
1370}
1371
d38ceaf9
AD
1372/**
1373 * amdgpu_block_invalid_rreg - dummy reg read function
1374 *
982a820b 1375 * @adev: amdgpu_device pointer
d38ceaf9
AD
1376 * @block: offset of instance
1377 * @reg: offset of register
1378 *
1379 * Dummy register read function. Used for register blocks
1380 * that certain asics don't have (all asics).
1381 * Returns the value in the register.
1382 */
1383static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1384 uint32_t block, uint32_t reg)
1385{
a3e510fd
LL
1386 dev_err(adev->dev,
1387 "Invalid callback to read register 0x%04X in block 0x%04X\n",
1388 reg, block);
d38ceaf9
AD
1389 BUG();
1390 return 0;
1391}
1392
1393/**
1394 * amdgpu_block_invalid_wreg - dummy reg write function
1395 *
982a820b 1396 * @adev: amdgpu_device pointer
d38ceaf9
AD
1397 * @block: offset of instance
1398 * @reg: offset of register
1399 * @v: value to write to the register
1400 *
1401 * Dummy register read function. Used for register blocks
1402 * that certain asics don't have (all asics).
1403 */
1404static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1405 uint32_t block,
1406 uint32_t reg, uint32_t v)
1407{
a3e510fd
LL
1408 dev_err(adev->dev,
1409 "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1410 reg, block, v);
d38ceaf9
AD
1411 BUG();
1412}
1413
6e8ca38e
LL
1414static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
1415{
1416 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1417 return AMDGPU_VBIOS_SKIP;
1418
cc0e91a7
LL
1419 if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
1420 return AMDGPU_VBIOS_OPTIONAL;
1421
6e8ca38e
LL
1422 return 0;
1423}
1424
4d2997ab
AD
1425/**
1426 * amdgpu_device_asic_init - Wrapper for atom asic_init
1427 *
982a820b 1428 * @adev: amdgpu_device pointer
4d2997ab
AD
1429 *
1430 * Does any asic specific work and then calls atom asic init.
1431 */
1432static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1433{
6e8ca38e
LL
1434 uint32_t flags;
1435 bool optional;
7656168a
LL
1436 int ret;
1437
4d2997ab 1438 amdgpu_asic_pre_asic_init(adev);
6e8ca38e
LL
1439 flags = amdgpu_device_get_vbios_flags(adev);
1440 optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
4d2997ab 1441
4e8303cf 1442 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5f571c61 1443 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
0b58a55a 1444 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4e8303cf 1445 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
7656168a 1446 amdgpu_psp_wait_for_bootloader(adev);
6e8ca38e
LL
1447 if (optional && !adev->bios)
1448 return 0;
1449
7656168a
LL
1450 ret = amdgpu_atomfirmware_asic_init(adev, true);
1451 return ret;
1452 } else {
6e8ca38e
LL
1453 if (optional && !adev->bios)
1454 return 0;
1455
85d1bcc6 1456 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
7656168a
LL
1457 }
1458
1459 return 0;
4d2997ab
AD
1460}
1461
e3ecdffa 1462/**
7ccfd79f 1463 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1464 *
982a820b 1465 * @adev: amdgpu_device pointer
e3ecdffa
AD
1466 *
1467 * Allocates a scratch page of VRAM for use by various things in the
1468 * driver.
1469 */
7ccfd79f 1470static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1471{
7ccfd79f
CK
1472 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1473 AMDGPU_GEM_DOMAIN_VRAM |
1474 AMDGPU_GEM_DOMAIN_GTT,
1475 &adev->mem_scratch.robj,
1476 &adev->mem_scratch.gpu_addr,
1477 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1478}
1479
e3ecdffa 1480/**
7ccfd79f 1481 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1482 *
982a820b 1483 * @adev: amdgpu_device pointer
e3ecdffa
AD
1484 *
1485 * Frees the VRAM scratch page.
1486 */
7ccfd79f 1487static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1488{
7ccfd79f 1489 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1490}
1491
1492/**
9c3f2b54 1493 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1494 *
1495 * @adev: amdgpu_device pointer
1496 * @registers: pointer to the register array
1497 * @array_size: size of the register array
1498 *
b8920e1e 1499 * Programs an array or registers with and or masks.
d38ceaf9
AD
1500 * This is a helper for setting golden registers.
1501 */
9c3f2b54
AD
1502void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1503 const u32 *registers,
1504 const u32 array_size)
d38ceaf9
AD
1505{
1506 u32 tmp, reg, and_mask, or_mask;
1507 int i;
1508
1509 if (array_size % 3)
1510 return;
1511
47fc644f 1512 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1513 reg = registers[i + 0];
1514 and_mask = registers[i + 1];
1515 or_mask = registers[i + 2];
1516
1517 if (and_mask == 0xffffffff) {
1518 tmp = or_mask;
1519 } else {
1520 tmp = RREG32(reg);
1521 tmp &= ~and_mask;
e0d07657
HZ
1522 if (adev->family >= AMDGPU_FAMILY_AI)
1523 tmp |= (or_mask & and_mask);
1524 else
1525 tmp |= or_mask;
d38ceaf9
AD
1526 }
1527 WREG32(reg, tmp);
1528 }
1529}
1530
e3ecdffa
AD
1531/**
1532 * amdgpu_device_pci_config_reset - reset the GPU
1533 *
1534 * @adev: amdgpu_device pointer
1535 *
1536 * Resets the GPU using the pci config reset sequence.
1537 * Only applicable to asics prior to vega10.
1538 */
8111c387 1539void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1540{
1541 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1542}
1543
af484df8
AD
1544/**
1545 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1546 *
1547 * @adev: amdgpu_device pointer
1548 *
1549 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1550 */
1551int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1552{
1553 return pci_reset_function(adev->pdev);
1554}
1555
d38ceaf9 1556/*
06ec9070 1557 * amdgpu_device_wb_*()
455a7bc2 1558 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1559 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1560 */
1561
1562/**
06ec9070 1563 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1564 *
1565 * @adev: amdgpu_device pointer
1566 *
1567 * Disables Writeback and frees the Writeback memory (all asics).
1568 * Used at driver shutdown.
1569 */
06ec9070 1570static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1571{
1572 if (adev->wb.wb_obj) {
a76ed485
AD
1573 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1574 &adev->wb.gpu_addr,
1575 (void **)&adev->wb.wb);
d38ceaf9
AD
1576 adev->wb.wb_obj = NULL;
1577 }
1578}
1579
1580/**
03f2abb0 1581 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1582 *
1583 * @adev: amdgpu_device pointer
1584 *
455a7bc2 1585 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1586 * Used at driver startup.
1587 * Returns 0 on success or an -error on failure.
1588 */
06ec9070 1589static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1590{
1591 int r;
1592
1593 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1594 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1595 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1596 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1597 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1598 (void **)&adev->wb.wb);
d38ceaf9
AD
1599 if (r) {
1600 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1601 return r;
1602 }
d38ceaf9
AD
1603
1604 adev->wb.num_wb = AMDGPU_MAX_WB;
1605 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1606
1607 /* clear wb memory */
73469585 1608 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1609 }
1610
1611 return 0;
1612}
1613
1614/**
131b4b36 1615 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1616 *
1617 * @adev: amdgpu_device pointer
1618 * @wb: wb index
1619 *
1620 * Allocate a wb slot for use by the driver (all asics).
1621 * Returns 0 on success or -EINVAL on failure.
1622 */
131b4b36 1623int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9 1624{
497d7cee 1625 unsigned long flags, offset;
d38ceaf9 1626
497d7cee
AD
1627 spin_lock_irqsave(&adev->wb.lock, flags);
1628 offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
97407b63 1629 if (offset < adev->wb.num_wb) {
7014285a 1630 __set_bit(offset, adev->wb.used);
497d7cee 1631 spin_unlock_irqrestore(&adev->wb.lock, flags);
63ae07ca 1632 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1633 return 0;
1634 } else {
497d7cee 1635 spin_unlock_irqrestore(&adev->wb.lock, flags);
0915fdbc
ML
1636 return -EINVAL;
1637 }
1638}
1639
d38ceaf9 1640/**
131b4b36 1641 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1642 *
1643 * @adev: amdgpu_device pointer
1644 * @wb: wb index
1645 *
1646 * Free a wb slot allocated for use by the driver (all asics)
1647 */
131b4b36 1648void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1649{
497d7cee
AD
1650 unsigned long flags;
1651
73469585 1652 wb >>= 3;
497d7cee 1653 spin_lock_irqsave(&adev->wb.lock, flags);
d38ceaf9 1654 if (wb < adev->wb.num_wb)
73469585 1655 __clear_bit(wb, adev->wb.used);
497d7cee 1656 spin_unlock_irqrestore(&adev->wb.lock, flags);
d38ceaf9
AD
1657}
1658
d6895ad3
CK
1659/**
1660 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1661 *
1662 * @adev: amdgpu_device pointer
1663 *
1664 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1665 * to fail, but if any of the BARs is not accessible after the size we abort
1666 * driver loading by returning -ENODEV.
1667 */
1668int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1669{
453f617a 1670 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1671 struct pci_bus *root;
1672 struct resource *res;
b8920e1e 1673 unsigned int i;
d6895ad3
CK
1674 u16 cmd;
1675 int r;
1676
822130b5
AB
1677 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1678 return 0;
1679
0c03b912 1680 /* Bypass for VF */
1681 if (amdgpu_sriov_vf(adev))
1682 return 0;
1683
48b733d9
AD
1684 if (!amdgpu_rebar)
1685 return 0;
1686
5235053f
AD
1687 /* resizing on Dell G5 SE platforms causes problems with runtime pm */
1688 if ((amdgpu_runtime_pm != 0) &&
1689 adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1690 adev->pdev->device == 0x731f &&
1691 adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1692 return 0;
1693
e372baeb
MJ
1694 /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1695 if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
a3e510fd
LL
1696 dev_warn(
1697 adev->dev,
1698 "System can't access extended configuration space, please check!!\n");
e372baeb 1699
b7221f2b
AD
1700 /* skip if the bios has already enabled large BAR */
1701 if (adev->gmc.real_vram_size &&
1702 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1703 return 0;
1704
31b8adab
CK
1705 /* Check if the root BUS has 64bit memory resources */
1706 root = adev->pdev->bus;
1707 while (root->parent)
1708 root = root->parent;
1709
1710 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1711 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1712 res->start > 0x100000000ull)
1713 break;
1714 }
1715
1716 /* Trying to resize is pointless without a root hub window above 4GB */
1717 if (!res)
1718 return 0;
1719
453f617a
ND
1720 /* Limit the BAR size to what is available */
1721 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1722 rbar_size);
1723
d6895ad3
CK
1724 /* Disable memory decoding while we change the BAR addresses and size */
1725 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1726 pci_write_config_word(adev->pdev, PCI_COMMAND,
1727 cmd & ~PCI_COMMAND_MEMORY);
1728
1729 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1730 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1731 if (adev->asic_type >= CHIP_BONAIRE)
1732 pci_release_resource(adev->pdev, 2);
1733
1734 pci_release_resource(adev->pdev, 0);
1735
1736 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1737 if (r == -ENOSPC)
a3e510fd
LL
1738 dev_info(adev->dev,
1739 "Not enough PCI address space for a large BAR.");
d6895ad3 1740 else if (r && r != -ENOTSUPP)
a3e510fd 1741 dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
d6895ad3
CK
1742
1743 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1744
1745 /* When the doorbell or fb BAR isn't available we have no chance of
1746 * using the device.
1747 */
43c064db 1748 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1749 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1750 return -ENODEV;
1751
1752 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1753
1754 return 0;
1755}
a05502e5 1756
d38ceaf9
AD
1757/*
1758 * GPU helpers function.
1759 */
1760/**
39c640c0 1761 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1762 *
1763 * @adev: amdgpu_device pointer
1764 *
c836fec5
JQ
1765 * Check if the asic has been initialized (all asics) at driver startup
1766 * or post is needed if hw reset is performed.
1767 * Returns true if need or false if not.
d38ceaf9 1768 */
39c640c0 1769bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9 1770{
7e0aa706 1771 uint32_t reg, flags;
d38ceaf9 1772
bec86378
ML
1773 if (amdgpu_sriov_vf(adev))
1774 return false;
1775
7e0aa706
LL
1776 flags = amdgpu_device_get_vbios_flags(adev);
1777 if (flags & AMDGPU_VBIOS_SKIP)
9535a86a 1778 return false;
6e8ca38e 1779 if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
9535a86a
SZ
1780 return false;
1781
bec86378 1782 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1783 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1784 * some old smc fw still need driver do vPost otherwise gpu hang, while
1785 * those smc fw version above 22.15 doesn't have this flaw, so we force
1786 * vpost executed for smc version below 22.15
bec86378
ML
1787 */
1788 if (adev->asic_type == CHIP_FIJI) {
1789 int err;
1790 uint32_t fw_ver;
b8920e1e 1791
bec86378 1792 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
a567db80 1793 /* force vPost if error occurred */
bec86378
ML
1794 if (err)
1795 return true;
1796
1797 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
8a44fdd3 1798 release_firmware(adev->pm.fw);
1da2c326
ML
1799 if (fw_ver < 0x00160e00)
1800 return true;
bec86378 1801 }
bec86378 1802 }
91fe77eb 1803
e3c1b071 1804 /* Don't post if we need to reset whole hive on init */
5839d27d 1805 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
e3c1b071 1806 return false;
1807
91fe77eb 1808 if (adev->has_hw_reset) {
1809 adev->has_hw_reset = false;
1810 return true;
1811 }
1812
1813 /* bios scratch used on CIK+ */
1814 if (adev->asic_type >= CHIP_BONAIRE)
1815 return amdgpu_atombios_scratch_need_asic_init(adev);
1816
1817 /* check MEM_SIZE for older asics */
1818 reg = amdgpu_asic_get_config_memsize(adev);
1819
1820 if ((reg != 0) && (reg != 0xffffffff))
1821 return false;
1822
1823 return true;
bec86378
ML
1824}
1825
5d1eb4c4 1826/*
bb0f8429
ML
1827 * Check whether seamless boot is supported.
1828 *
7f4ce7b5
ML
1829 * So far we only support seamless boot on DCE 3.0 or later.
1830 * If users report that it works on older ASICS as well, we may
1831 * loosen this.
bb0f8429
ML
1832 */
1833bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1834{
5dc270d3
ML
1835 switch (amdgpu_seamless) {
1836 case -1:
1837 break;
1838 case 1:
1839 return true;
1840 case 0:
1841 return false;
1842 default:
a3e510fd
LL
1843 dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1844 amdgpu_seamless);
5dc270d3
ML
1845 return false;
1846 }
1847
3657a1d5
ML
1848 if (!(adev->flags & AMD_IS_APU))
1849 return false;
1850
5dc270d3
ML
1851 if (adev->mman.keep_stolen_vga_memory)
1852 return false;
1853
ed342a2e 1854 return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1855}
1856
5d1eb4c4 1857/*
2757a848
ML
1858 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1859 * don't support dynamic speed switching. Until we have confirmation from Intel
1860 * that a specific host supports it, it's safer that we keep it disabled for all.
5d1eb4c4
ML
1861 *
1862 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1863 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1864 */
d9b3a066 1865static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
5d1eb4c4
ML
1866{
1867#if IS_ENABLED(CONFIG_X86)
1868 struct cpuinfo_x86 *c = &cpu_data(0);
1869
d9b3a066
ML
1870 /* eGPU change speeds based on USB4 fabric conditions */
1871 if (dev_is_removable(adev->dev))
1872 return true;
1873
5d1eb4c4
ML
1874 if (c->x86_vendor == X86_VENDOR_INTEL)
1875 return false;
1876#endif
1877 return true;
1878}
1879
c770ef19
KF
1880static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1881{
1882#if IS_ENABLED(CONFIG_X86)
1883 struct cpuinfo_x86 *c = &cpu_data(0);
1884
1885 if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
1886 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
1887 return false;
1888
1889 if (c->x86 == 6 &&
1890 adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1891 switch (c->x86_model) {
1892 case VFM_MODEL(INTEL_ALDERLAKE):
1893 case VFM_MODEL(INTEL_ALDERLAKE_L):
1894 case VFM_MODEL(INTEL_RAPTORLAKE):
1895 case VFM_MODEL(INTEL_RAPTORLAKE_P):
1896 case VFM_MODEL(INTEL_RAPTORLAKE_S):
1897 return true;
1898 default:
1899 return false;
1900 }
1901 } else {
1902 return false;
1903 }
1904#else
1905 return false;
1906#endif
1907}
1908
0ab5d711
ML
1909/**
1910 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1911 *
1912 * @adev: amdgpu_device pointer
1913 *
1914 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1915 * be set for this device.
1916 *
1917 * Returns true if it should be used or false if not.
1918 */
1919bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1920{
1921 switch (amdgpu_aspm) {
1922 case -1:
1923 break;
1924 case 0:
1925 return false;
1926 case 1:
1927 return true;
1928 default:
1929 return false;
1930 }
1a6513de
ML
1931 if (adev->flags & AMD_IS_APU)
1932 return false;
c770ef19 1933 if (amdgpu_device_aspm_support_quirk(adev))
2757a848 1934 return false;
0ab5d711
ML
1935 return pcie_aspm_enabled(adev->pdev);
1936}
1937
d38ceaf9
AD
1938/* if we get transitioned to only one device, take VGA back */
1939/**
06ec9070 1940 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1941 *
bf44e8ce 1942 * @pdev: PCI device pointer
d38ceaf9
AD
1943 * @state: enable/disable vga decode
1944 *
1945 * Enable/disable vga decode (all asics).
1946 * Returns VGA resource flags.
1947 */
bf44e8ce
CH
1948static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1949 bool state)
d38ceaf9 1950{
bf44e8ce 1951 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1952
d38ceaf9
AD
1953 amdgpu_asic_set_vga_state(adev, state);
1954 if (state)
1955 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1956 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1957 else
1958 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1959}
1960
e3ecdffa
AD
1961/**
1962 * amdgpu_device_check_block_size - validate the vm block size
1963 *
1964 * @adev: amdgpu_device pointer
1965 *
1966 * Validates the vm block size specified via module parameter.
1967 * The vm block size defines number of bits in page table versus page directory,
1968 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1969 * page table and the remaining bits are in the page directory.
1970 */
06ec9070 1971static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1972{
1973 /* defines number of bits in page table versus page directory,
1974 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1975 * page table and the remaining bits are in the page directory
1976 */
bab4fee7
JZ
1977 if (amdgpu_vm_block_size == -1)
1978 return;
a1adf8be 1979
bab4fee7 1980 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1981 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1982 amdgpu_vm_block_size);
97489129 1983 amdgpu_vm_block_size = -1;
a1adf8be 1984 }
a1adf8be
CZ
1985}
1986
e3ecdffa
AD
1987/**
1988 * amdgpu_device_check_vm_size - validate the vm size
1989 *
1990 * @adev: amdgpu_device pointer
1991 *
1992 * Validates the vm size in GB specified via module parameter.
1993 * The VM size is the size of the GPU virtual memory space in GB.
1994 */
06ec9070 1995static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1996{
64dab074
AD
1997 /* no need to check the default value */
1998 if (amdgpu_vm_size == -1)
1999 return;
2000
83ca145d
ZJ
2001 if (amdgpu_vm_size < 1) {
2002 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
2003 amdgpu_vm_size);
f3368128 2004 amdgpu_vm_size = -1;
83ca145d 2005 }
83ca145d
ZJ
2006}
2007
7951e376
RZ
2008static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
2009{
2010 struct sysinfo si;
a9d4fe2f 2011 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
2012 uint64_t total_memory;
2013 uint64_t dram_size_seven_GB = 0x1B8000000;
2014 uint64_t dram_size_three_GB = 0xB8000000;
2015
2016 if (amdgpu_smu_memory_pool_size == 0)
2017 return;
2018
2019 if (!is_os_64) {
a3e510fd 2020 dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
7951e376
RZ
2021 goto def_value;
2022 }
2023 si_meminfo(&si);
2024 total_memory = (uint64_t)si.totalram * si.mem_unit;
2025
2026 if ((amdgpu_smu_memory_pool_size == 1) ||
2027 (amdgpu_smu_memory_pool_size == 2)) {
2028 if (total_memory < dram_size_three_GB)
2029 goto def_value1;
2030 } else if ((amdgpu_smu_memory_pool_size == 4) ||
2031 (amdgpu_smu_memory_pool_size == 8)) {
2032 if (total_memory < dram_size_seven_GB)
2033 goto def_value1;
2034 } else {
a3e510fd 2035 dev_warn(adev->dev, "Smu memory pool size not supported\n");
7951e376
RZ
2036 goto def_value;
2037 }
2038 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
2039
2040 return;
2041
2042def_value1:
a3e510fd 2043 dev_warn(adev->dev, "No enough system memory\n");
7951e376
RZ
2044def_value:
2045 adev->pm.smu_prv_buffer_size = 0;
2046}
2047
9f6a7857
HR
2048static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
2049{
2050 if (!(adev->flags & AMD_IS_APU) ||
2051 adev->asic_type < CHIP_RAVEN)
2052 return 0;
2053
2054 switch (adev->asic_type) {
2055 case CHIP_RAVEN:
2056 if (adev->pdev->device == 0x15dd)
2057 adev->apu_flags |= AMD_APU_IS_RAVEN;
2058 if (adev->pdev->device == 0x15d8)
2059 adev->apu_flags |= AMD_APU_IS_PICASSO;
2060 break;
2061 case CHIP_RENOIR:
2062 if ((adev->pdev->device == 0x1636) ||
2063 (adev->pdev->device == 0x164c))
2064 adev->apu_flags |= AMD_APU_IS_RENOIR;
2065 else
2066 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
2067 break;
2068 case CHIP_VANGOGH:
2069 adev->apu_flags |= AMD_APU_IS_VANGOGH;
2070 break;
2071 case CHIP_YELLOW_CARP:
2072 break;
d0f56dc2 2073 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
2074 if ((adev->pdev->device == 0x13FE) ||
2075 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
2076 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
2077 break;
9f6a7857 2078 default:
4eaf21b7 2079 break;
9f6a7857
HR
2080 }
2081
2082 return 0;
2083}
2084
d38ceaf9 2085/**
06ec9070 2086 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
2087 *
2088 * @adev: amdgpu_device pointer
2089 *
2090 * Validates certain module parameters and updates
2091 * the associated values used by the driver (all asics).
2092 */
912dfc84 2093static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 2094{
96595204
SS
2095 int i;
2096
5b011235
CZ
2097 if (amdgpu_sched_jobs < 4) {
2098 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2099 amdgpu_sched_jobs);
2100 amdgpu_sched_jobs = 4;
47fc644f 2101 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
2102 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2103 amdgpu_sched_jobs);
2104 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2105 }
d38ceaf9 2106
83e74db6 2107 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
2108 /* gart size must be greater or equal to 32M */
2109 dev_warn(adev->dev, "gart size (%d) too small\n",
2110 amdgpu_gart_size);
83e74db6 2111 amdgpu_gart_size = -1;
d38ceaf9
AD
2112 }
2113
36d38372 2114 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 2115 /* gtt size must be greater or equal to 32M */
36d38372
CK
2116 dev_warn(adev->dev, "gtt size (%d) too small\n",
2117 amdgpu_gtt_size);
2118 amdgpu_gtt_size = -1;
d38ceaf9
AD
2119 }
2120
d07f14be
RH
2121 /* valid range is between 4 and 9 inclusive */
2122 if (amdgpu_vm_fragment_size != -1 &&
2123 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
2124 dev_warn(adev->dev, "valid range is between 4 and 9\n");
2125 amdgpu_vm_fragment_size = -1;
2126 }
2127
5d5bd5e3
KW
2128 if (amdgpu_sched_hw_submission < 2) {
2129 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2130 amdgpu_sched_hw_submission);
2131 amdgpu_sched_hw_submission = 2;
2132 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2133 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2134 amdgpu_sched_hw_submission);
2135 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2136 }
2137
2656fd23
AG
2138 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2139 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2140 amdgpu_reset_method = -1;
2141 }
2142
7951e376
RZ
2143 amdgpu_device_check_smu_prv_buffer_size(adev);
2144
06ec9070 2145 amdgpu_device_check_vm_size(adev);
d38ceaf9 2146
06ec9070 2147 amdgpu_device_check_block_size(adev);
6a7f76e7 2148
19aede77 2149 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 2150
2e0454b7
AD
2151 for (i = 0; i < MAX_XCP; i++) {
2152 switch (amdgpu_enforce_isolation) {
2153 case -1:
2154 case 0:
2155 default:
2156 /* disable */
2157 adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
2158 break;
2159 case 1:
2160 /* enable */
2161 adev->enforce_isolation[i] =
2162 AMDGPU_ENFORCE_ISOLATION_ENABLE;
2163 break;
2164 case 2:
2165 /* enable legacy mode */
2166 adev->enforce_isolation[i] =
2167 AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
2168 break;
68071eb0
SS
2169 case 3:
2170 /* enable only process isolation without submitting cleaner shader */
2171 adev->enforce_isolation[i] =
2172 AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
2173 break;
2e0454b7
AD
2174 }
2175 }
96595204 2176
e3c00faa 2177 return 0;
d38ceaf9
AD
2178}
2179
2180/**
2181 * amdgpu_switcheroo_set_state - set switcheroo state
2182 *
2183 * @pdev: pci dev pointer
1694467b 2184 * @state: vga_switcheroo state
d38ceaf9 2185 *
12024b17 2186 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
2187 * the asics before or after it is powered up using ACPI methods.
2188 */
8aba21b7
LT
2189static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2190 enum vga_switcheroo_state state)
d38ceaf9
AD
2191{
2192 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 2193 int r;
d38ceaf9 2194
127ed492
LL
2195 if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
2196 state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
2197 return;
2198
2199 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 2200 pr_info("switched on\n");
d38ceaf9
AD
2201 /* don't suspend or resume card normally */
2202 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2203
8f66090b
TZ
2204 pci_set_power_state(pdev, PCI_D0);
2205 amdgpu_device_load_pci_state(pdev);
2206 r = pci_enable_device(pdev);
de185019 2207 if (r)
a3e510fd
LL
2208 dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
2209 r);
de185019 2210 amdgpu_device_resume(dev, true);
d38ceaf9 2211
d38ceaf9 2212 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 2213 } else {
a3e510fd 2214 dev_info(&pdev->dev, "switched off\n");
d38ceaf9 2215 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 2216 amdgpu_device_prepare(dev);
de185019 2217 amdgpu_device_suspend(dev, true);
8f66090b 2218 amdgpu_device_cache_pci_state(pdev);
de185019 2219 /* Shut down the device */
8f66090b
TZ
2220 pci_disable_device(pdev);
2221 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
2222 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2223 }
2224}
2225
2226/**
2227 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2228 *
2229 * @pdev: pci dev pointer
2230 *
2231 * Callback for the switcheroo driver. Check of the switcheroo
2232 * state can be changed.
2233 * Returns true if the state can be changed, false if not.
2234 */
2235static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2236{
2237 struct drm_device *dev = pci_get_drvdata(pdev);
2238
b8920e1e 2239 /*
d38ceaf9
AD
2240 * FIXME: open_count is protected by drm_global_mutex but that would lead to
2241 * locking inversion with the driver load path. And the access here is
2242 * completely racy anyway. So don't bother with locking for now.
2243 */
7e13ad89 2244 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
2245}
2246
2247static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2248 .set_gpu_state = amdgpu_switcheroo_set_state,
2249 .reprobe = NULL,
2250 .can_switch = amdgpu_switcheroo_can_switch,
2251};
2252
e3ecdffa
AD
2253/**
2254 * amdgpu_device_ip_set_clockgating_state - set the CG state
2255 *
87e3f136 2256 * @dev: amdgpu_device pointer
e3ecdffa
AD
2257 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2258 * @state: clockgating state (gate or ungate)
2259 *
2260 * Sets the requested clockgating state for all instances of
2261 * the hardware IP specified.
2262 * Returns the error code from the last instance.
2263 */
43fa561f 2264int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
2265 enum amd_ip_block_type block_type,
2266 enum amd_clockgating_state state)
d38ceaf9 2267{
43fa561f 2268 struct amdgpu_device *adev = dev;
d38ceaf9
AD
2269 int i, r = 0;
2270
2271 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2272 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2273 continue;
c722865a
RZ
2274 if (adev->ip_blocks[i].version->type != block_type)
2275 continue;
2276 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2277 continue;
2278 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
f2ba8c3d 2279 &adev->ip_blocks[i], state);
c722865a 2280 if (r)
a3e510fd
LL
2281 dev_err(adev->dev,
2282 "set_clockgating_state of IP block <%s> failed %d\n",
2283 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
2284 }
2285 return r;
2286}
2287
e3ecdffa
AD
2288/**
2289 * amdgpu_device_ip_set_powergating_state - set the PG state
2290 *
87e3f136 2291 * @dev: amdgpu_device pointer
e3ecdffa
AD
2292 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2293 * @state: powergating state (gate or ungate)
2294 *
2295 * Sets the requested powergating state for all instances of
2296 * the hardware IP specified.
2297 * Returns the error code from the last instance.
2298 */
43fa561f 2299int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
2300 enum amd_ip_block_type block_type,
2301 enum amd_powergating_state state)
d38ceaf9 2302{
43fa561f 2303 struct amdgpu_device *adev = dev;
d38ceaf9
AD
2304 int i, r = 0;
2305
2306 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2307 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2308 continue;
c722865a
RZ
2309 if (adev->ip_blocks[i].version->type != block_type)
2310 continue;
2311 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2312 continue;
2313 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
80d80511 2314 &adev->ip_blocks[i], state);
c722865a 2315 if (r)
a3e510fd
LL
2316 dev_err(adev->dev,
2317 "set_powergating_state of IP block <%s> failed %d\n",
2318 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
2319 }
2320 return r;
2321}
2322
e3ecdffa
AD
2323/**
2324 * amdgpu_device_ip_get_clockgating_state - get the CG state
2325 *
2326 * @adev: amdgpu_device pointer
2327 * @flags: clockgating feature flags
2328 *
2329 * Walks the list of IPs on the device and updates the clockgating
2330 * flags for each IP.
2331 * Updates @flags with the feature flags for each hardware IP where
2332 * clockgating is enabled.
2333 */
2990a1fc 2334void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 2335 u64 *flags)
6cb2d4e4
HR
2336{
2337 int i;
2338
2339 for (i = 0; i < adev->num_ip_blocks; i++) {
2340 if (!adev->ip_blocks[i].status.valid)
2341 continue;
2342 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
3521276a
SK
2343 adev->ip_blocks[i].version->funcs->get_clockgating_state(
2344 &adev->ip_blocks[i], flags);
6cb2d4e4
HR
2345 }
2346}
2347
e3ecdffa
AD
2348/**
2349 * amdgpu_device_ip_wait_for_idle - wait for idle
2350 *
2351 * @adev: amdgpu_device pointer
2352 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2353 *
2354 * Waits for the request hardware IP to be idle.
2355 * Returns 0 for success or a negative error code on failure.
2356 */
2990a1fc
AD
2357int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2358 enum amd_ip_block_type block_type)
5dbbb60b
AD
2359{
2360 int i, r;
2361
2362 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2363 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2364 continue;
a1255107 2365 if (adev->ip_blocks[i].version->type == block_type) {
780002b6
SK
2366 if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2367 r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2368 &adev->ip_blocks[i]);
2369 if (r)
2370 return r;
2371 }
5dbbb60b
AD
2372 break;
2373 }
2374 }
2375 return 0;
2376
2377}
2378
e3ecdffa 2379/**
dc443aa4 2380 * amdgpu_device_ip_is_valid - is the hardware IP enabled
e3ecdffa
AD
2381 *
2382 * @adev: amdgpu_device pointer
2383 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2384 *
dc443aa4
AK
2385 * Check if the hardware IP is enable or not.
2386 * Returns true if it the IP is enable, false if not.
e3ecdffa 2387 */
dc443aa4
AK
2388bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2389 enum amd_ip_block_type block_type)
5dbbb60b
AD
2390{
2391 int i;
2392
2393 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2394 if (adev->ip_blocks[i].version->type == block_type)
dc443aa4 2395 return adev->ip_blocks[i].status.valid;
5dbbb60b 2396 }
dc443aa4 2397 return false;
5dbbb60b
AD
2398
2399}
2400
e3ecdffa
AD
2401/**
2402 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2403 *
2404 * @adev: amdgpu_device pointer
87e3f136 2405 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
2406 *
2407 * Returns a pointer to the hardware IP block structure
2408 * if it exists for the asic, otherwise NULL.
2409 */
2990a1fc
AD
2410struct amdgpu_ip_block *
2411amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2412 enum amd_ip_block_type type)
d38ceaf9
AD
2413{
2414 int i;
2415
2416 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 2417 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
2418 return &adev->ip_blocks[i];
2419
2420 return NULL;
2421}
2422
2423/**
2990a1fc 2424 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
2425 *
2426 * @adev: amdgpu_device pointer
5fc3aeeb 2427 * @type: enum amd_ip_block_type
d38ceaf9
AD
2428 * @major: major version
2429 * @minor: minor version
2430 *
2431 * return 0 if equal or greater
2432 * return 1 if smaller or the ip_block doesn't exist
2433 */
2990a1fc
AD
2434int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2435 enum amd_ip_block_type type,
2436 u32 major, u32 minor)
d38ceaf9 2437{
2990a1fc 2438 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 2439
a1255107
AD
2440 if (ip_block && ((ip_block->version->major > major) ||
2441 ((ip_block->version->major == major) &&
2442 (ip_block->version->minor >= minor))))
d38ceaf9
AD
2443 return 0;
2444
2445 return 1;
2446}
2447
a1255107 2448/**
2990a1fc 2449 * amdgpu_device_ip_block_add
a1255107
AD
2450 *
2451 * @adev: amdgpu_device pointer
2452 * @ip_block_version: pointer to the IP to add
2453 *
2454 * Adds the IP block driver information to the collection of IPs
2455 * on the asic.
2456 */
2990a1fc
AD
2457int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2458 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2459{
2460 if (!ip_block_version)
2461 return -EINVAL;
2462
7bd939d0
LG
2463 switch (ip_block_version->type) {
2464 case AMD_IP_BLOCK_TYPE_VCN:
2465 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2466 return 0;
2467 break;
2468 case AMD_IP_BLOCK_TYPE_JPEG:
2469 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2470 return 0;
2471 break;
2472 default:
2473 break;
2474 }
2475
a0db1ea0
LL
2476 dev_info(adev->dev, "detected ip block number %d <%s>\n",
2477 adev->num_ip_blocks, ip_block_version->funcs->name);
a0bae357 2478
37b99322
SK
2479 adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2480
a1255107
AD
2481 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2482
2483 return 0;
2484}
2485
e3ecdffa
AD
2486/**
2487 * amdgpu_device_enable_virtual_display - enable virtual display feature
2488 *
2489 * @adev: amdgpu_device pointer
2490 *
2491 * Enabled the virtual display feature if the user has enabled it via
2492 * the module parameter virtual_display. This feature provides a virtual
2493 * display hardware on headless boards or in virtualized environments.
2494 * This function parses and validates the configuration string specified by
a567db80 2495 * the user and configures the virtual display configuration (number of
e3ecdffa
AD
2496 * virtual connectors, crtcs, etc.) specified.
2497 */
483ef985 2498static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2499{
2500 adev->enable_virtual_display = false;
2501
2502 if (amdgpu_virtual_display) {
8f66090b 2503 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2504 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2505
2506 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2507 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2508 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2509 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2510 if (!strcmp("all", pciaddname)
2511 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2512 long num_crtc;
2513 int res = -1;
2514
9accf2fd 2515 adev->enable_virtual_display = true;
0f66356d
ED
2516
2517 if (pciaddname_tmp)
2518 res = kstrtol(pciaddname_tmp, 10,
2519 &num_crtc);
2520
2521 if (!res) {
2522 if (num_crtc < 1)
2523 num_crtc = 1;
2524 if (num_crtc > 6)
2525 num_crtc = 6;
2526 adev->mode_info.num_crtc = num_crtc;
2527 } else {
2528 adev->mode_info.num_crtc = 1;
2529 }
9accf2fd
ED
2530 break;
2531 }
2532 }
2533
a3e510fd
LL
2534 dev_info(
2535 adev->dev,
2536 "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2537 amdgpu_virtual_display, pci_address_name,
2538 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2539
2540 kfree(pciaddstr);
2541 }
2542}
2543
25263da3
AD
2544void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2545{
2546 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2547 adev->mode_info.num_crtc = 1;
2548 adev->enable_virtual_display = true;
a3e510fd
LL
2549 dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
2550 adev->enable_virtual_display,
2551 adev->mode_info.num_crtc);
25263da3
AD
2552 }
2553}
2554
e3ecdffa
AD
2555/**
2556 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2557 *
2558 * @adev: amdgpu_device pointer
2559 *
2560 * Parses the asic configuration parameters specified in the gpu info
a567db80 2561 * firmware and makes them available to the driver for use in configuring
e3ecdffa
AD
2562 * the asic.
2563 * Returns 0 on success, -EINVAL on failure.
2564 */
e2a75f88
AD
2565static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2566{
e2a75f88 2567 const char *chip_name;
e2a75f88
AD
2568 int err;
2569 const struct gpu_info_firmware_header_v1_0 *hdr;
2570
ab4fe3e1
HR
2571 adev->firmware.gpu_info_fw = NULL;
2572
fb915c87
AD
2573 if (adev->mman.discovery_bin)
2574 return 0;
258620d0 2575
e2a75f88 2576 switch (adev->asic_type) {
e2a75f88
AD
2577 default:
2578 return 0;
2579 case CHIP_VEGA10:
2580 chip_name = "vega10";
2581 break;
3f76dced
AD
2582 case CHIP_VEGA12:
2583 chip_name = "vega12";
2584 break;
2d2e5e7e 2585 case CHIP_RAVEN:
54f78a76 2586 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2587 chip_name = "raven2";
54f78a76 2588 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2589 chip_name = "picasso";
54c4d17e
FX
2590 else
2591 chip_name = "raven";
2d2e5e7e 2592 break;
65e60f6e
LM
2593 case CHIP_ARCTURUS:
2594 chip_name = "arcturus";
2595 break;
42b325e5
XY
2596 case CHIP_NAVI12:
2597 chip_name = "navi12";
2598 break;
e2a75f88
AD
2599 }
2600
a777c9d7 2601 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
ea5d4934 2602 AMDGPU_UCODE_OPTIONAL,
a777c9d7 2603 "amdgpu/%s_gpu_info.bin", chip_name);
e2a75f88
AD
2604 if (err) {
2605 dev_err(adev->dev,
a777c9d7
YW
2606 "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2607 chip_name);
e2a75f88
AD
2608 goto out;
2609 }
2610
ab4fe3e1 2611 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2612 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2613
2614 switch (hdr->version_major) {
2615 case 1:
2616 {
2617 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2618 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2619 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2620
cc375d8c 2621 /*
a567db80 2622 * Should be dropped when DAL no longer needs it.
cc375d8c
TY
2623 */
2624 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2625 goto parse_soc_bounding_box;
2626
b5ab16bf
AD
2627 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2628 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2629 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2630 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2631 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2632 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2633 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2634 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2635 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2636 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2637 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2638 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2639 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2640 adev->gfx.cu_info.max_waves_per_simd =
2641 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2642 adev->gfx.cu_info.max_scratch_slots_per_cu =
2643 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2644 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2645 if (hdr->version_minor >= 1) {
35c2e910
HZ
2646 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2647 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2648 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2649 adev->gfx.config.num_sc_per_sh =
2650 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2651 adev->gfx.config.num_packer_per_sc =
2652 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2653 }
ec51d3fa
XY
2654
2655parse_soc_bounding_box:
ec51d3fa
XY
2656 /*
2657 * soc bounding box info is not integrated in disocovery table,
258620d0 2658 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2659 */
48321c3d
HW
2660 if (hdr->version_minor == 2) {
2661 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2662 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2663 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2664 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2665 }
e2a75f88
AD
2666 break;
2667 }
2668 default:
2669 dev_err(adev->dev,
2670 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2671 err = -EINVAL;
2672 goto out;
2673 }
2674out:
e2a75f88
AD
2675 return err;
2676}
2677
e3ecdffa
AD
2678/**
2679 * amdgpu_device_ip_early_init - run early init for hardware IPs
2680 *
2681 * @adev: amdgpu_device pointer
2682 *
2683 * Early initialization pass for hardware IPs. The hardware IPs that make
2684 * up each asic are discovered each IP's early_init callback is run. This
2685 * is the first stage in initializing the asic.
2686 * Returns 0 on success, negative error code on failure.
2687 */
06ec9070 2688static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2689{
3b37e272 2690 struct amdgpu_ip_block *ip_block;
901e2be2 2691 struct pci_dev *parent;
7e0aa706
LL
2692 bool total, skip_bios;
2693 uint32_t bios_flags;
aaa36a97 2694 int i, r;
d38ceaf9 2695
483ef985 2696 amdgpu_device_enable_virtual_display(adev);
a6be7570 2697
00a979f3 2698 if (amdgpu_sriov_vf(adev)) {
00a979f3 2699 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2700 if (r)
2701 return r;
00a979f3
WS
2702 }
2703
d38ceaf9 2704 switch (adev->asic_type) {
33f34802
KW
2705#ifdef CONFIG_DRM_AMDGPU_SI
2706 case CHIP_VERDE:
2707 case CHIP_TAHITI:
2708 case CHIP_PITCAIRN:
2709 case CHIP_OLAND:
2710 case CHIP_HAINAN:
295d0daf 2711 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2712 r = si_set_ip_blocks(adev);
2713 if (r)
2714 return r;
2715 break;
2716#endif
a2e73f56
AD
2717#ifdef CONFIG_DRM_AMDGPU_CIK
2718 case CHIP_BONAIRE:
2719 case CHIP_HAWAII:
2720 case CHIP_KAVERI:
2721 case CHIP_KABINI:
2722 case CHIP_MULLINS:
e1ad2d53 2723 if (adev->flags & AMD_IS_APU)
a2e73f56 2724 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2725 else
2726 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2727
2728 r = cik_set_ip_blocks(adev);
2729 if (r)
2730 return r;
2731 break;
2732#endif
da87c30b
AD
2733 case CHIP_TOPAZ:
2734 case CHIP_TONGA:
2735 case CHIP_FIJI:
2736 case CHIP_POLARIS10:
2737 case CHIP_POLARIS11:
2738 case CHIP_POLARIS12:
2739 case CHIP_VEGAM:
2740 case CHIP_CARRIZO:
2741 case CHIP_STONEY:
2742 if (adev->flags & AMD_IS_APU)
2743 adev->family = AMDGPU_FAMILY_CZ;
2744 else
2745 adev->family = AMDGPU_FAMILY_VI;
2746
2747 r = vi_set_ip_blocks(adev);
2748 if (r)
2749 return r;
2750 break;
d38ceaf9 2751 default:
63352b7f
AD
2752 r = amdgpu_discovery_set_ip_blocks(adev);
2753 if (r)
2754 return r;
2755 break;
d38ceaf9
AD
2756 }
2757
daafa303
AM
2758 /* Check for IP version 9.4.3 with A0 hardware */
2759 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2760 !amdgpu_device_get_rev_id(adev)) {
2761 dev_err(adev->dev, "Unsupported A0 hardware\n");
2762 return -ENODEV; /* device unsupported - no device error */
2763 }
2764
901e2be2
AD
2765 if (amdgpu_has_atpx() &&
2766 (amdgpu_is_atpx_hybrid() ||
2767 amdgpu_has_atpx_dgpu_power_cntl()) &&
2768 ((adev->flags & AMD_IS_APU) == 0) &&
7b1c6263 2769 !dev_is_removable(&adev->pdev->dev))
901e2be2
AD
2770 adev->flags |= AMD_IS_PX;
2771
85ac2021 2772 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2773 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2774 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2775 }
901e2be2 2776
3b94fb10 2777 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2778 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2779 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2780 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2781 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
d9b3a066 2782 if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
fbf1035b 2783 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2784
855a2a02
SZ
2785 adev->virt.is_xgmi_node_migrate_enabled = false;
2786 if (amdgpu_sriov_vf(adev)) {
2787 adev->virt.is_xgmi_node_migrate_enabled =
2788 amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2789 }
2790
ced69502 2791 total = true;
d38ceaf9 2792 for (i = 0; i < adev->num_ip_blocks; i++) {
146b085e
SK
2793 ip_block = &adev->ip_blocks[i];
2794
d38ceaf9 2795 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
a3e510fd
LL
2796 dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2797 adev->ip_blocks[i].version->funcs->name);
a1255107 2798 adev->ip_blocks[i].status.valid = false;
146b085e
SK
2799 } else if (ip_block->version->funcs->early_init) {
2800 r = ip_block->version->funcs->early_init(ip_block);
2801 if (r == -ENOENT) {
2802 adev->ip_blocks[i].status.valid = false;
2803 } else if (r) {
a3e510fd
LL
2804 dev_err(adev->dev,
2805 "early_init of IP block <%s> failed %d\n",
2806 adev->ip_blocks[i].version->funcs->name,
2807 r);
146b085e 2808 total = false;
974e6b64 2809 } else {
a1255107 2810 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2811 }
146b085e
SK
2812 } else {
2813 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2814 }
21a249ca
AD
2815 /* get the vbios after the asic_funcs are set up */
2816 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2817 r = amdgpu_device_parse_gpu_info_fw(adev);
2818 if (r)
2819 return r;
2820
7e0aa706
LL
2821 bios_flags = amdgpu_device_get_vbios_flags(adev);
2822 skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
21a249ca 2823 /* Read BIOS */
7e0aa706 2824 if (!skip_bios) {
6e8ca38e
LL
2825 bool optional =
2826 !!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2827 if (!amdgpu_get_bios(adev) && !optional)
9535a86a 2828 return -EINVAL;
21a249ca 2829
6e8ca38e
LL
2830 if (optional && !adev->bios)
2831 dev_info(
2832 adev->dev,
2833 "VBIOS image optional, proceeding without VBIOS image");
2834
2835 if (adev->bios) {
2836 r = amdgpu_atombios_init(adev);
2837 if (r) {
2838 dev_err(adev->dev,
2839 "amdgpu_atombios_init failed\n");
2840 amdgpu_vf_error_put(
2841 adev,
2842 AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2843 0, 0);
2844 return r;
2845 }
9535a86a 2846 }
21a249ca 2847 }
77eabc6f
PJZ
2848
2849 /*get pf2vf msg info at it's earliest time*/
2850 if (amdgpu_sriov_vf(adev))
2851 amdgpu_virt_init_data_exchange(adev);
2852
21a249ca 2853 }
d38ceaf9 2854 }
ced69502
ML
2855 if (!total)
2856 return -ENODEV;
d38ceaf9 2857
35750679
LL
2858 if (adev->gmc.xgmi.supported)
2859 amdgpu_xgmi_early_init(adev);
2860
3b37e272
YZ
2861 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2862 if (ip_block->status.valid != false)
2863 amdgpu_amdkfd_device_probe(adev);
2864
395d1fb9
NH
2865 adev->cg_flags &= amdgpu_cg_mask;
2866 adev->pg_flags &= amdgpu_pg_mask;
2867
d38ceaf9
AD
2868 return 0;
2869}
2870
0a4f2520
RZ
2871static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2872{
2873 int i, r;
2874
2875 for (i = 0; i < adev->num_ip_blocks; i++) {
2876 if (!adev->ip_blocks[i].status.sw)
2877 continue;
2878 if (adev->ip_blocks[i].status.hw)
2879 continue;
14f2fe34
LL
2880 if (!amdgpu_ip_member_of_hwini(
2881 adev, adev->ip_blocks[i].version->type))
2882 continue;
0a4f2520 2883 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2884 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520 2885 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
58608034 2886 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
0a4f2520 2887 if (r) {
a3e510fd
LL
2888 dev_err(adev->dev,
2889 "hw_init of IP block <%s> failed %d\n",
2890 adev->ip_blocks[i].version->funcs->name,
2891 r);
0a4f2520
RZ
2892 return r;
2893 }
2894 adev->ip_blocks[i].status.hw = true;
2895 }
2896 }
2897
2898 return 0;
2899}
2900
2901static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2902{
2903 int i, r;
2904
2905 for (i = 0; i < adev->num_ip_blocks; i++) {
2906 if (!adev->ip_blocks[i].status.sw)
2907 continue;
2908 if (adev->ip_blocks[i].status.hw)
2909 continue;
14f2fe34
LL
2910 if (!amdgpu_ip_member_of_hwini(
2911 adev, adev->ip_blocks[i].version->type))
2912 continue;
58608034 2913 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
0a4f2520 2914 if (r) {
a3e510fd
LL
2915 dev_err(adev->dev,
2916 "hw_init of IP block <%s> failed %d\n",
2917 adev->ip_blocks[i].version->funcs->name, r);
0a4f2520
RZ
2918 return r;
2919 }
2920 adev->ip_blocks[i].status.hw = true;
2921 }
2922
2923 return 0;
2924}
2925
7a3e0bb2
RZ
2926static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2927{
2928 int r = 0;
2929 int i;
80f41f84 2930 uint32_t smu_version;
7a3e0bb2
RZ
2931
2932 if (adev->asic_type >= CHIP_VEGA10) {
2933 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2934 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2935 continue;
2936
14f2fe34
LL
2937 if (!amdgpu_ip_member_of_hwini(adev,
2938 AMD_IP_BLOCK_TYPE_PSP))
2939 break;
2940
e3c1b071 2941 if (!adev->ip_blocks[i].status.sw)
2942 continue;
2943
482f0e53
ML
2944 /* no need to do the fw loading again if already done*/
2945 if (adev->ip_blocks[i].status.hw == true)
2946 break;
2947
53b3f8f4 2948 if (amdgpu_in_reset(adev) || adev->in_suspend) {
502d7630
SK
2949 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2950 if (r)
482f0e53 2951 return r;
482f0e53 2952 } else {
58608034 2953 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
482f0e53 2954 if (r) {
a3e510fd
LL
2955 dev_err(adev->dev,
2956 "hw_init of IP block <%s> failed %d\n",
2957 adev->ip_blocks[i]
2958 .version->funcs->name,
2959 r);
482f0e53 2960 return r;
7a3e0bb2 2961 }
502d7630 2962 adev->ip_blocks[i].status.hw = true;
7a3e0bb2 2963 }
482f0e53 2964 break;
7a3e0bb2
RZ
2965 }
2966 }
482f0e53 2967
8973d9ec
ED
2968 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2969 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2970
80f41f84 2971 return r;
7a3e0bb2
RZ
2972}
2973
5fd8518d
AG
2974static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2975{
796a9f55
PS
2976 struct drm_sched_init_args args = {
2977 .ops = &amdgpu_sched_ops,
2978 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
2979 .timeout_wq = adev->reset_domain->wq,
2980 .dev = adev->dev,
2981 };
5fd8518d
AG
2982 long timeout;
2983 int r, i;
2984
2985 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2986 struct amdgpu_ring *ring = adev->rings[i];
2987
2988 /* No need to setup the GPU scheduler for rings that don't need it */
2989 if (!ring || ring->no_scheduler)
2990 continue;
2991
2992 switch (ring->funcs->type) {
2993 case AMDGPU_RING_TYPE_GFX:
2994 timeout = adev->gfx_timeout;
2995 break;
2996 case AMDGPU_RING_TYPE_COMPUTE:
2997 timeout = adev->compute_timeout;
2998 break;
2999 case AMDGPU_RING_TYPE_SDMA:
3000 timeout = adev->sdma_timeout;
3001 break;
3002 default:
3003 timeout = adev->video_timeout;
3004 break;
3005 }
3006
796a9f55
PS
3007 args.timeout = timeout;
3008 args.credit_limit = ring->num_hw_submission;
3009 args.score = ring->sched_score;
3010 args.name = ring->name;
3011
3012 r = drm_sched_init(&ring->sched, &args);
5fd8518d 3013 if (r) {
a3e510fd
LL
3014 dev_err(adev->dev,
3015 "Failed to create scheduler on ring %s.\n",
3016 ring->name);
5fd8518d
AG
3017 return r;
3018 }
037b98a2
AD
3019 r = amdgpu_uvd_entity_init(adev, ring);
3020 if (r) {
a3e510fd
LL
3021 dev_err(adev->dev,
3022 "Failed to create UVD scheduling entity on ring %s.\n",
3023 ring->name);
037b98a2
AD
3024 return r;
3025 }
3026 r = amdgpu_vce_entity_init(adev, ring);
3027 if (r) {
a3e510fd
LL
3028 dev_err(adev->dev,
3029 "Failed to create VCE scheduling entity on ring %s.\n",
3030 ring->name);
037b98a2
AD
3031 return r;
3032 }
5fd8518d
AG
3033 }
3034
4dbc17b4
HZ
3035 if (adev->xcp_mgr)
3036 amdgpu_xcp_update_partition_sched_list(adev);
d425c6f4 3037
5fd8518d
AG
3038 return 0;
3039}
3040
3041
e3ecdffa
AD
3042/**
3043 * amdgpu_device_ip_init - run init for hardware IPs
3044 *
3045 * @adev: amdgpu_device pointer
3046 *
3047 * Main initialization pass for hardware IPs. The list of all the hardware
3048 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
3049 * are run. sw_init initializes the software state associated with each IP
3050 * and hw_init initializes the hardware associated with each IP.
3051 * Returns 0 on success, negative error code on failure.
3052 */
06ec9070 3053static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9 3054{
631af731 3055 bool init_badpage;
d38ceaf9
AD
3056 int i, r;
3057
c030f2e4 3058 r = amdgpu_ras_init(adev);
3059 if (r)
3060 return r;
3061
d38ceaf9 3062 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3063 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3064 continue;
df6e463d
SK
3065 if (adev->ip_blocks[i].version->funcs->sw_init) {
3066 r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
3067 if (r) {
a3e510fd
LL
3068 dev_err(adev->dev,
3069 "sw_init of IP block <%s> failed %d\n",
3070 adev->ip_blocks[i].version->funcs->name,
3071 r);
df6e463d
SK
3072 goto init_failed;
3073 }
2c1a2784 3074 }
a1255107 3075 adev->ip_blocks[i].status.sw = true;
bfca0289 3076
14f2fe34
LL
3077 if (!amdgpu_ip_member_of_hwini(
3078 adev, adev->ip_blocks[i].version->type))
3079 continue;
3080
c1c39032
AD
3081 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
3082 /* need to do common hw init early so everything is set up for gmc */
58608034 3083 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
c1c39032 3084 if (r) {
a3e510fd
LL
3085 dev_err(adev->dev, "hw_init %d failed %d\n", i,
3086 r);
c1c39032
AD
3087 goto init_failed;
3088 }
3089 adev->ip_blocks[i].status.hw = true;
3090 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3091 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
3092 /* Try to reserve bad pages early */
3093 if (amdgpu_sriov_vf(adev))
3094 amdgpu_virt_exchange_data(adev);
3095
7ccfd79f 3096 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 3097 if (r) {
a3e510fd
LL
3098 dev_err(adev->dev,
3099 "amdgpu_mem_scratch_init failed %d\n",
3100 r);
72d3f592 3101 goto init_failed;
2c1a2784 3102 }
58608034 3103 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2c1a2784 3104 if (r) {
a3e510fd
LL
3105 dev_err(adev->dev, "hw_init %d failed %d\n", i,
3106 r);
72d3f592 3107 goto init_failed;
2c1a2784 3108 }
06ec9070 3109 r = amdgpu_device_wb_init(adev);
2c1a2784 3110 if (r) {
a3e510fd
LL
3111 dev_err(adev->dev,
3112 "amdgpu_device_wb_init failed %d\n", r);
72d3f592 3113 goto init_failed;
2c1a2784 3114 }
a1255107 3115 adev->ip_blocks[i].status.hw = true;
2493664f
ML
3116
3117 /* right after GMC hw init, we create CSA */
02ff519e 3118 if (adev->gfx.mcbp) {
1e256e27 3119 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
3120 AMDGPU_GEM_DOMAIN_VRAM |
3121 AMDGPU_GEM_DOMAIN_GTT,
3122 AMDGPU_CSA_SIZE);
2493664f 3123 if (r) {
a3e510fd
LL
3124 dev_err(adev->dev,
3125 "allocate CSA failed %d\n", r);
72d3f592 3126 goto init_failed;
2493664f
ML
3127 }
3128 }
c8031019
APS
3129
3130 r = amdgpu_seq64_init(adev);
3131 if (r) {
a3e510fd
LL
3132 dev_err(adev->dev, "allocate seq64 failed %d\n",
3133 r);
c8031019
APS
3134 goto init_failed;
3135 }
d38ceaf9
AD
3136 }
3137 }
3138
c9ffa427 3139 if (amdgpu_sriov_vf(adev))
22c16d25 3140 amdgpu_virt_init_data_exchange(adev);
c9ffa427 3141
533aed27
AG
3142 r = amdgpu_ib_pool_init(adev);
3143 if (r) {
3144 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
3145 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
3146 goto init_failed;
3147 }
3148
c8963ea4
RZ
3149 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
3150 if (r)
72d3f592 3151 goto init_failed;
0a4f2520
RZ
3152
3153 r = amdgpu_device_ip_hw_init_phase1(adev);
3154 if (r)
72d3f592 3155 goto init_failed;
0a4f2520 3156
7a3e0bb2
RZ
3157 r = amdgpu_device_fw_loading(adev);
3158 if (r)
72d3f592 3159 goto init_failed;
7a3e0bb2 3160
0a4f2520
RZ
3161 r = amdgpu_device_ip_hw_init_phase2(adev);
3162 if (r)
72d3f592 3163 goto init_failed;
d38ceaf9 3164
121a2bc6
AG
3165 /*
3166 * retired pages will be loaded from eeprom and reserved here,
3167 * it should be called after amdgpu_device_ip_hw_init_phase2 since
3168 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
3169 * for I2C communication which only true at this point.
b82e65a9
GC
3170 *
3171 * amdgpu_ras_recovery_init may fail, but the upper only cares the
3172 * failure from bad gpu situation and stop amdgpu init process
3173 * accordingly. For other failed cases, it will still release all
3174 * the resource and print error message, rather than returning one
3175 * negative value to upper level.
121a2bc6
AG
3176 *
3177 * Note: theoretically, this should be called before all vram allocations
3178 * to protect retired page from abusing
3179 */
631af731
LL
3180 init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3181 r = amdgpu_ras_recovery_init(adev, init_badpage);
b82e65a9
GC
3182 if (r)
3183 goto init_failed;
121a2bc6 3184
cfbb6b00
AG
3185 /**
3186 * In case of XGMI grab extra reference for reset domain for this device
3187 */
a4c63caf 3188 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 3189 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 3190 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
3191 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3192
dfd0287b
LH
3193 if (WARN_ON(!hive)) {
3194 r = -ENOENT;
3195 goto init_failed;
3196 }
3197
46c67660 3198 if (!hive->reset_domain ||
3199 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
3200 r = -ENOENT;
3201 amdgpu_put_xgmi_hive(hive);
3202 goto init_failed;
3203 }
3204
3205 /* Drop the early temporary reset domain we created for device */
3206 amdgpu_reset_put_reset_domain(adev->reset_domain);
3207 adev->reset_domain = hive->reset_domain;
9dfa4860 3208 amdgpu_put_xgmi_hive(hive);
cfbb6b00 3209 }
a4c63caf
AG
3210 }
3211 }
3212
5fd8518d
AG
3213 r = amdgpu_device_init_schedulers(adev);
3214 if (r)
3215 goto init_failed;
e3c1b071 3216
b7043800
AD
3217 if (adev->mman.buffer_funcs_ring->sched.ready)
3218 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3219
e3c1b071 3220 /* Don't init kfd if whole hive need to be reset during init */
5839d27d 3221 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
84b4dd3f 3222 kgd2kfd_init_zone_device(adev);
e3c1b071 3223 amdgpu_amdkfd_device_init(adev);
84b4dd3f 3224 }
c6332b97 3225
bd607166
KR
3226 amdgpu_fru_get_product_info(adev);
3227
a91d91b6
TY
3228 if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
3229 r = amdgpu_cper_init(adev);
92d5d2a0 3230
72d3f592 3231init_failed:
c6332b97 3232
72d3f592 3233 return r;
d38ceaf9
AD
3234}
3235
e3ecdffa
AD
3236/**
3237 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3238 *
3239 * @adev: amdgpu_device pointer
3240 *
3241 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
3242 * this function before a GPU reset. If the value is retained after a
a567db80 3243 * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
e3ecdffa 3244 */
06ec9070 3245static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
3246{
3247 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3248}
3249
e3ecdffa
AD
3250/**
3251 * amdgpu_device_check_vram_lost - check if vram is valid
3252 *
3253 * @adev: amdgpu_device pointer
3254 *
3255 * Checks the reset magic value written to the gart pointer in VRAM.
3256 * The driver calls this after a GPU reset to see if the contents of
3257 * VRAM is lost or now.
3258 * returns true if vram is lost, false if not.
3259 */
06ec9070 3260static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 3261{
dadce777
EQ
3262 if (memcmp(adev->gart.ptr, adev->reset_magic,
3263 AMDGPU_RESET_MAGIC_NUM))
3264 return true;
3265
53b3f8f4 3266 if (!amdgpu_in_reset(adev))
dadce777
EQ
3267 return false;
3268
3269 /*
3270 * For all ASICs with baco/mode1 reset, the VRAM is
3271 * always assumed to be lost.
3272 */
3273 switch (amdgpu_asic_reset_method(adev)) {
8ba904f5 3274 case AMD_RESET_METHOD_LINK:
dadce777
EQ
3275 case AMD_RESET_METHOD_BACO:
3276 case AMD_RESET_METHOD_MODE1:
3277 return true;
3278 default:
3279 return false;
3280 }
0c49e0b8
CZ
3281}
3282
e3ecdffa 3283/**
1112a46b 3284 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
3285 *
3286 * @adev: amdgpu_device pointer
b8b72130 3287 * @state: clockgating state (gate or ungate)
e3ecdffa 3288 *
e3ecdffa 3289 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
3290 * set_clockgating_state callbacks are run.
3291 * Late initialization pass enabling clockgating for hardware IPs.
3292 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
3293 * Returns 0 on success, negative error code on failure.
3294 */
fdd34271 3295
5d89bb2d
LL
3296int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3297 enum amd_clockgating_state state)
d38ceaf9 3298{
1112a46b 3299 int i, j, r;
d38ceaf9 3300
4a2ba394
SL
3301 if (amdgpu_emu_mode == 1)
3302 return 0;
3303
1112a46b
RZ
3304 for (j = 0; j < adev->num_ip_blocks; j++) {
3305 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 3306 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 3307 continue;
47198eb7 3308 /* skip CG for GFX, SDMA on S0ix */
5d70a549 3309 if (adev->in_s0ix &&
47198eb7
AD
3310 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3311 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 3312 continue;
4a446d55 3313 /* skip CG for VCE/UVD, it's handled specially */
a1255107 3314 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 3315 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 3316 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 3317 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 3318 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 3319 /* enable clockgating to save power */
f2ba8c3d 3320 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
1112a46b 3321 state);
4a446d55 3322 if (r) {
a3e510fd
LL
3323 dev_err(adev->dev,
3324 "set_clockgating_state(gate) of IP block <%s> failed %d\n",
3325 adev->ip_blocks[i].version->funcs->name,
3326 r);
4a446d55
AD
3327 return r;
3328 }
b0b00ff1 3329 }
d38ceaf9 3330 }
06b18f61 3331
c9f96fd5
RZ
3332 return 0;
3333}
3334
5d89bb2d
LL
3335int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3336 enum amd_powergating_state state)
c9f96fd5 3337{
1112a46b 3338 int i, j, r;
06b18f61 3339
c9f96fd5
RZ
3340 if (amdgpu_emu_mode == 1)
3341 return 0;
3342
1112a46b
RZ
3343 for (j = 0; j < adev->num_ip_blocks; j++) {
3344 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 3345 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 3346 continue;
47198eb7 3347 /* skip PG for GFX, SDMA on S0ix */
5d70a549 3348 if (adev->in_s0ix &&
47198eb7
AD
3349 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3350 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 3351 continue;
c9f96fd5
RZ
3352 /* skip CG for VCE/UVD, it's handled specially */
3353 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3354 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3355 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 3356 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
3357 adev->ip_blocks[i].version->funcs->set_powergating_state) {
3358 /* enable powergating to save power */
80d80511 3359 r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
1112a46b 3360 state);
c9f96fd5 3361 if (r) {
a3e510fd
LL
3362 dev_err(adev->dev,
3363 "set_powergating_state(gate) of IP block <%s> failed %d\n",
3364 adev->ip_blocks[i].version->funcs->name,
3365 r);
c9f96fd5
RZ
3366 return r;
3367 }
3368 }
3369 }
2dc80b00
S
3370 return 0;
3371}
3372
beff74bc
AD
3373static int amdgpu_device_enable_mgpu_fan_boost(void)
3374{
3375 struct amdgpu_gpu_instance *gpu_ins;
3376 struct amdgpu_device *adev;
3377 int i, ret = 0;
3378
3379 mutex_lock(&mgpu_info.mutex);
3380
3381 /*
3382 * MGPU fan boost feature should be enabled
3383 * only when there are two or more dGPUs in
3384 * the system
3385 */
3386 if (mgpu_info.num_dgpu < 2)
3387 goto out;
3388
3389 for (i = 0; i < mgpu_info.num_dgpu; i++) {
3390 gpu_ins = &(mgpu_info.gpu_ins[i]);
3391 adev = gpu_ins->adev;
3392 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 3393 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
3394 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3395 if (ret)
3396 break;
3397
3398 gpu_ins->mgpu_fan_enabled = 1;
3399 }
3400 }
3401
3402out:
3403 mutex_unlock(&mgpu_info.mutex);
3404
3405 return ret;
3406}
3407
e3ecdffa
AD
3408/**
3409 * amdgpu_device_ip_late_init - run late init for hardware IPs
3410 *
3411 * @adev: amdgpu_device pointer
3412 *
3413 * Late initialization pass for hardware IPs. The list of all the hardware
3414 * IPs that make up the asic is walked and the late_init callbacks are run.
3415 * late_init covers any special initialization that an IP requires
3416 * after all of the have been initialized or something that needs to happen
3417 * late in the init process.
3418 * Returns 0 on success, negative error code on failure.
3419 */
06ec9070 3420static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 3421{
60599a03 3422 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
3423 int i = 0, r;
3424
3425 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 3426 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
3427 continue;
3428 if (adev->ip_blocks[i].version->funcs->late_init) {
3138ab2c 3429 r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2dc80b00 3430 if (r) {
a3e510fd
LL
3431 dev_err(adev->dev,
3432 "late_init of IP block <%s> failed %d\n",
3433 adev->ip_blocks[i].version->funcs->name,
3434 r);
2dc80b00
S
3435 return r;
3436 }
2dc80b00 3437 }
73f847db 3438 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
3439 }
3440
867e24ca 3441 r = amdgpu_ras_late_init(adev);
3442 if (r) {
a3e510fd 3443 dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
867e24ca 3444 return r;
3445 }
3446
e283f4fb 3447 if (!amdgpu_reset_in_recovery(adev))
78347b65 3448 amdgpu_ras_set_error_query_ready(adev, true);
a891d239 3449
1112a46b
RZ
3450 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3451 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 3452
06ec9070 3453 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 3454
beff74bc
AD
3455 r = amdgpu_device_enable_mgpu_fan_boost();
3456 if (r)
a3e510fd 3457 dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
beff74bc 3458
4da8b639 3459 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
3460 if (amdgpu_passthrough(adev) &&
3461 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3462 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 3463 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
3464
3465 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3466 mutex_lock(&mgpu_info.mutex);
3467
3468 /*
3469 * Reset device p-state to low as this was booted with high.
3470 *
3471 * This should be performed only after all devices from the same
3472 * hive get initialized.
3473 *
3474 * However, it's unknown how many device in the hive in advance.
3475 * As this is counted one by one during devices initializations.
3476 *
3477 * So, we wait for all XGMI interlinked devices initialized.
3478 * This may bring some delays as those devices may come from
3479 * different hives. But that should be OK.
3480 */
3481 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3482 for (i = 0; i < mgpu_info.num_gpu; i++) {
3483 gpu_instance = &(mgpu_info.gpu_ins[i]);
3484 if (gpu_instance->adev->flags & AMD_IS_APU)
3485 continue;
3486
d84a430d
JK
3487 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3488 AMDGPU_XGMI_PSTATE_MIN);
60599a03 3489 if (r) {
a3e510fd
LL
3490 dev_err(adev->dev,
3491 "pstate setting failed (%d).\n",
3492 r);
60599a03
EQ
3493 break;
3494 }
3495 }
3496 }
3497
3498 mutex_unlock(&mgpu_info.mutex);
3499 }
3500
d38ceaf9
AD
3501 return 0;
3502}
3503
dad01f93
SK
3504static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3505{
a3e510fd 3506 struct amdgpu_device *adev = ip_block->adev;
dad01f93
SK
3507 int r;
3508
dac64cb3 3509 if (!ip_block->version->funcs->hw_fini) {
a3e510fd
LL
3510 dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
3511 ip_block->version->funcs->name);
dad01f93
SK
3512 } else {
3513 r = ip_block->version->funcs->hw_fini(ip_block);
3514 /* XXX handle errors */
3515 if (r) {
a3e510fd
LL
3516 dev_dbg(adev->dev,
3517 "hw_fini of IP block <%s> failed %d\n",
3518 ip_block->version->funcs->name, r);
dad01f93
SK
3519 }
3520 }
3521
3522 ip_block->status.hw = false;
3523}
3524
613aa3ea
LY
3525/**
3526 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3527 *
3528 * @adev: amdgpu_device pointer
3529 *
3530 * For ASICs need to disable SMC first
3531 */
3532static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3533{
dad01f93 3534 int i;
613aa3ea 3535
4e8303cf 3536 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
3537 return;
3538
3539 for (i = 0; i < adev->num_ip_blocks; i++) {
3540 if (!adev->ip_blocks[i].status.hw)
3541 continue;
3542 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
dad01f93 3543 amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
613aa3ea
LY
3544 break;
3545 }
3546 }
3547}
3548
e9669fb7 3549static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
3550{
3551 int i, r;
3552
e9669fb7
AG
3553 for (i = 0; i < adev->num_ip_blocks; i++) {
3554 if (!adev->ip_blocks[i].version->funcs->early_fini)
3555 continue;
5278a159 3556
90410d39 3557 r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
e9669fb7 3558 if (r) {
a3e510fd
LL
3559 dev_dbg(adev->dev,
3560 "early_fini of IP block <%s> failed %d\n",
3561 adev->ip_blocks[i].version->funcs->name, r);
e9669fb7
AG
3562 }
3563 }
c030f2e4 3564
05df1f01 3565 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
3566 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3567
54f7a24e 3568 amdgpu_amdkfd_suspend(adev, true);
c2c72221 3569 amdgpu_userq_suspend(adev);
7270e895 3570
a567db80 3571 /* Workaround for ASICs need to disable SMC first */
613aa3ea 3572 amdgpu_device_smu_fini_early(adev);
3e96dbfd 3573
d38ceaf9 3574 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3575 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 3576 continue;
8201a67a 3577
dad01f93 3578 amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
d38ceaf9
AD
3579 }
3580
6effad8a
GC
3581 if (amdgpu_sriov_vf(adev)) {
3582 if (amdgpu_virt_release_full_gpu(adev, false))
a3e510fd
LL
3583 dev_err(adev->dev,
3584 "failed to release exclusive mode on fini\n");
6effad8a
GC
3585 }
3586
e9669fb7
AG
3587 return 0;
3588}
3589
3590/**
3591 * amdgpu_device_ip_fini - run fini for hardware IPs
3592 *
3593 * @adev: amdgpu_device pointer
3594 *
3595 * Main teardown pass for hardware IPs. The list of all the hardware
3596 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3597 * are run. hw_fini tears down the hardware associated with each IP
3598 * and sw_fini tears down any software state associated with each IP.
3599 * Returns 0 on success, negative error code on failure.
3600 */
3601static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3602{
3603 int i, r;
3604
663a8776 3605 amdgpu_cper_fini(adev);
92d5d2a0 3606
e9669fb7
AG
3607 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3608 amdgpu_virt_release_ras_err_handler_data(adev);
3609
e9669fb7
AG
3610 if (adev->gmc.xgmi.num_physical_nodes > 1)
3611 amdgpu_xgmi_remove_device(adev);
3612
c004d44e 3613 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3614
d38ceaf9 3615 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3616 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3617 continue;
c12aba3a
ML
3618
3619 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3620 amdgpu_ucode_free_bo(adev);
1e256e27 3621 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3622 amdgpu_device_wb_fini(adev);
7ccfd79f 3623 amdgpu_device_mem_scratch_fini(adev);
533aed27 3624 amdgpu_ib_pool_fini(adev);
c8031019 3625 amdgpu_seq64_fini(adev);
e7afa85a 3626 amdgpu_doorbell_fini(adev);
c12aba3a 3627 }
278b8fbf
SK
3628 if (adev->ip_blocks[i].version->funcs->sw_fini) {
3629 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3630 /* XXX handle errors */
3631 if (r) {
a3e510fd
LL
3632 dev_dbg(adev->dev,
3633 "sw_fini of IP block <%s> failed %d\n",
3634 adev->ip_blocks[i].version->funcs->name,
3635 r);
278b8fbf 3636 }
2c1a2784 3637 }
a1255107
AD
3638 adev->ip_blocks[i].status.sw = false;
3639 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3640 }
3641
a6dcfd9c 3642 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3643 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3644 continue;
a1255107 3645 if (adev->ip_blocks[i].version->funcs->late_fini)
47d827f9 3646 adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
a1255107 3647 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3648 }
3649
c030f2e4 3650 amdgpu_ras_fini(adev);
3651
d38ceaf9
AD
3652 return 0;
3653}
3654
e3ecdffa 3655/**
beff74bc 3656 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3657 *
1112a46b 3658 * @work: work_struct.
e3ecdffa 3659 */
beff74bc 3660static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3661{
3662 struct amdgpu_device *adev =
beff74bc 3663 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3664 int r;
3665
3666 r = amdgpu_ib_ring_tests(adev);
3667 if (r)
a3e510fd 3668 dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2dc80b00
S
3669}
3670
1e317b99
RZ
3671static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3672{
3673 struct amdgpu_device *adev =
3674 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3675
90a92662
MD
3676 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3677 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3678
ff69bba0 3679 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
90a92662 3680 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3681}
3682
e3ecdffa 3683/**
e7854a03 3684 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3685 *
3686 * @adev: amdgpu_device pointer
3687 *
3688 * Main suspend function for hardware IPs. The list of all the hardware
3689 * IPs that make up the asic is walked, clockgating is disabled and the
3690 * suspend callbacks are run. suspend puts the hardware and software state
3691 * in each IP into a state suitable for suspend.
3692 * Returns 0 on success, negative error code on failure.
3693 */
e7854a03
AD
3694static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3695{
3696 int i, r;
3697
50ec83f0
AD
3698 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3699 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3700
b31d6ada
EQ
3701 /*
3702 * Per PMFW team's suggestion, driver needs to handle gfxoff
3703 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3704 * scenario. Add the missing df cstate disablement here.
3705 */
3706 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3707 dev_warn(adev->dev, "Failed to disallow df cstate");
3708
e7854a03
AD
3709 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3710 if (!adev->ip_blocks[i].status.valid)
3711 continue;
2b9f7848 3712
e7854a03 3713 /* displays are handled separately */
2b9f7848
ND
3714 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3715 continue;
3716
3717 /* XXX handle errors */
e095026f
SK
3718 r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3719 if (r)
2b9f7848 3720 return r;
e7854a03
AD
3721 }
3722
e7854a03
AD
3723 return 0;
3724}
3725
3726/**
3727 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3728 *
3729 * @adev: amdgpu_device pointer
3730 *
3731 * Main suspend function for hardware IPs. The list of all the hardware
3732 * IPs that make up the asic is walked, clockgating is disabled and the
3733 * suspend callbacks are run. suspend puts the hardware and software state
3734 * in each IP into a state suitable for suspend.
3735 * Returns 0 on success, negative error code on failure.
3736 */
3737static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3738{
3739 int i, r;
3740
557f42a2 3741 if (adev->in_s0ix)
bc143d8b 3742 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3743
d38ceaf9 3744 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3745 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3746 continue;
e7854a03
AD
3747 /* displays are handled in phase1 */
3748 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3749 continue;
bff77e86
LM
3750 /* PSP lost connection when err_event_athub occurs */
3751 if (amdgpu_ras_intr_triggered() &&
3752 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3753 adev->ip_blocks[i].status.hw = false;
3754 continue;
3755 }
e3c1b071 3756
3757 /* skip unnecessary suspend if we do not initialize them yet */
5839d27d
LL
3758 if (!amdgpu_ip_member_of_hwini(
3759 adev, adev->ip_blocks[i].version->type))
e3c1b071 3760 continue;
557f42a2 3761
35a54408
AD
3762 /* Since we skip suspend for S0i3, we need to cancel the delayed
3763 * idle work here as the suspend callback never gets called.
3764 */
3765 if (adev->in_s0ix &&
3766 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3767 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3768 cancel_delayed_work_sync(&adev->gfx.idle_work);
afa6646b 3769 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3770 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3771 * like at runtime. PSP is also part of the always on hardware
3772 * so no need to suspend it.
3773 */
557f42a2 3774 if (adev->in_s0ix &&
32ff160d 3775 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3776 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3777 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3778 continue;
3779
2a7798ea
AD
3780 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3781 if (adev->in_s0ix &&
4e8303cf
LL
3782 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3783 IP_VERSION(5, 0, 0)) &&
3784 (adev->ip_blocks[i].version->type ==
3785 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3786 continue;
3787
e11c7750
TH
3788 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3789 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3790 * from this location and RLC Autoload automatically also gets loaded
3791 * from here based on PMFW -> PSP message during re-init sequence.
3792 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3793 * the TMR and reload FWs again for IMU enabled APU ASICs.
3794 */
3795 if (amdgpu_in_reset(adev) &&
3796 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3797 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3798 continue;
3799
d38ceaf9 3800 /* XXX handle errors */
e095026f 3801 r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
876923fb 3802 adev->ip_blocks[i].status.hw = false;
e095026f 3803
a3a09142 3804 /* handle putting the SMC in the appropriate state */
47fc644f 3805 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3806 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3807 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3808 if (r) {
a3e510fd
LL
3809 dev_err(adev->dev,
3810 "SMC failed to set mp1 state %d, %d\n",
3811 adev->mp1_state, r);
86b93fd6
JZ
3812 return r;
3813 }
a3a09142
AD
3814 }
3815 }
d38ceaf9
AD
3816 }
3817
3818 return 0;
3819}
3820
e7854a03
AD
3821/**
3822 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3823 *
3824 * @adev: amdgpu_device pointer
3825 *
3826 * Main suspend function for hardware IPs. The list of all the hardware
3827 * IPs that make up the asic is walked, clockgating is disabled and the
3828 * suspend callbacks are run. suspend puts the hardware and software state
3829 * in each IP into a state suitable for suspend.
3830 * Returns 0 on success, negative error code on failure.
3831 */
3832int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3833{
3834 int r;
3835
3c73683c
JC
3836 if (amdgpu_sriov_vf(adev)) {
3837 amdgpu_virt_fini_data_exchange(adev);
e7819644 3838 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3839 }
e7819644 3840
b7043800
AD
3841 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3842
e7854a03
AD
3843 r = amdgpu_device_ip_suspend_phase1(adev);
3844 if (r)
3845 return r;
3846 r = amdgpu_device_ip_suspend_phase2(adev);
3847
e7819644
YT
3848 if (amdgpu_sriov_vf(adev))
3849 amdgpu_virt_release_full_gpu(adev, false);
3850
e7854a03
AD
3851 return r;
3852}
3853
06ec9070 3854static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3855{
3856 int i, r;
3857
2cb681b6 3858 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3859 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3860 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3861 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3862 AMD_IP_BLOCK_TYPE_IH,
3863 };
a90ad3c2 3864
95ea3dbc 3865 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3866 int j;
3867 struct amdgpu_ip_block *block;
a90ad3c2 3868
4cd2a96d
J
3869 block = &adev->ip_blocks[i];
3870 block->status.hw = false;
2cb681b6 3871
4cd2a96d 3872 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3873
4cd2a96d 3874 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3875 !block->status.valid)
3876 continue;
3877
58608034 3878 r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
f3bb57b6
YY
3879 if (r) {
3880 dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3881 block->version->funcs->name);
c41d1cf6 3882 return r;
f3bb57b6 3883 }
482f0e53 3884 block->status.hw = true;
a90ad3c2
ML
3885 }
3886 }
3887
3888 return 0;
3889}
3890
06ec9070 3891static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2 3892{
f3bb57b6
YY
3893 struct amdgpu_ip_block *block;
3894 int i, r = 0;
a90ad3c2 3895
2cb681b6
ML
3896 static enum amd_ip_block_type ip_order[] = {
3897 AMD_IP_BLOCK_TYPE_SMC,
3898 AMD_IP_BLOCK_TYPE_DCE,
3899 AMD_IP_BLOCK_TYPE_GFX,
3900 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3901 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3902 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3903 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3904 AMD_IP_BLOCK_TYPE_VCN,
3905 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3906 };
a90ad3c2 3907
2cb681b6 3908 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
f3bb57b6 3909 block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
2cb681b6 3910
f3bb57b6
YY
3911 if (!block)
3912 continue;
2cb681b6 3913
f3bb57b6 3914 if (block->status.valid && !block->status.hw) {
502d7630 3915 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
f3bb57b6 3916 r = amdgpu_ip_block_resume(block);
502d7630 3917 } else {
f3bb57b6
YY
3918 r = block->version->funcs->hw_init(block);
3919 }
3920
3921 if (r) {
3922 dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3923 block->version->funcs->name);
3924 break;
502d7630 3925 }
f3bb57b6 3926 block->status.hw = true;
a90ad3c2
ML
3927 }
3928 }
3929
f3bb57b6 3930 return r;
a90ad3c2
ML
3931}
3932
e3ecdffa
AD
3933/**
3934 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3935 *
3936 * @adev: amdgpu_device pointer
3937 *
3938 * First resume function for hardware IPs. The list of all the hardware
3939 * IPs that make up the asic is walked and the resume callbacks are run for
3940 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3941 * after a suspend and updates the software state as necessary. This
3942 * function is also used for restoring the GPU after a GPU reset.
3943 * Returns 0 on success, negative error code on failure.
3944 */
06ec9070 3945static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3946{
3947 int i, r;
3948
a90ad3c2 3949 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3950 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3951 continue;
a90ad3c2 3952 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3953 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3954 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3955 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3956
502d7630
SK
3957 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3958 if (r)
fcf0649f 3959 return r;
a90ad3c2
ML
3960 }
3961 }
3962
3963 return 0;
3964}
3965
e3ecdffa
AD
3966/**
3967 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3968 *
3969 * @adev: amdgpu_device pointer
3970 *
73dae652 3971 * Second resume function for hardware IPs. The list of all the hardware
e3ecdffa
AD
3972 * IPs that make up the asic is walked and the resume callbacks are run for
3973 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3974 * functional state after a suspend and updates the software state as
3975 * necessary. This function is also used for restoring the GPU after a GPU
3976 * reset.
3977 * Returns 0 on success, negative error code on failure.
3978 */
06ec9070 3979static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3980{
3981 int i, r;
3982
3983 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3984 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3985 continue;
fcf0649f 3986 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3987 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2 3988 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
73dae652 3989 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
7a3e0bb2 3990 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3991 continue;
502d7630
SK
3992 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3993 if (r)
d38ceaf9
AD
3994 return r;
3995 }
3996
3997 return 0;
3998}
3999
73dae652
AD
4000/**
4001 * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
4002 *
4003 * @adev: amdgpu_device pointer
4004 *
4005 * Third resume function for hardware IPs. The list of all the hardware
4006 * IPs that make up the asic is walked and the resume callbacks are run for
4007 * all DCE. resume puts the hardware into a functional state after a suspend
4008 * and updates the software state as necessary. This function is also used
4009 * for restoring the GPU after a GPU reset.
4010 *
4011 * Returns 0 on success, negative error code on failure.
4012 */
4013static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
4014{
4015 int i, r;
4016
4017 for (i = 0; i < adev->num_ip_blocks; i++) {
4018 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
4019 continue;
4020 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
4021 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
4022 if (r)
4023 return r;
4024 }
4025 }
4026
4027 return 0;
4028}
4029
e3ecdffa
AD
4030/**
4031 * amdgpu_device_ip_resume - run resume for hardware IPs
4032 *
4033 * @adev: amdgpu_device pointer
4034 *
4035 * Main resume function for hardware IPs. The hardware IPs
4036 * are split into two resume functions because they are
b8920e1e 4037 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
4038 * steps need to be take between them. In this case (S3/S4) they are
4039 * run sequentially.
4040 * Returns 0 on success, negative error code on failure.
4041 */
06ec9070 4042static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
4043{
4044 int r;
4045
06ec9070 4046 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
4047 if (r)
4048 return r;
7a3e0bb2
RZ
4049
4050 r = amdgpu_device_fw_loading(adev);
4051 if (r)
4052 return r;
4053
06ec9070 4054 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f 4055
b7043800
AD
4056 if (adev->mman.buffer_funcs_ring->sched.ready)
4057 amdgpu_ttm_set_buffer_funcs_status(adev, true);
4058
73dae652
AD
4059 if (r)
4060 return r;
4061
4062 amdgpu_fence_driver_hw_init(adev);
4063
4064 r = amdgpu_device_ip_resume_phase3(adev);
4065
fcf0649f
CZ
4066 return r;
4067}
4068
e3ecdffa
AD
4069/**
4070 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
4071 *
4072 * @adev: amdgpu_device pointer
4073 *
4074 * Query the VBIOS data tables to determine if the board supports SR-IOV.
4075 */
4e99a44e 4076static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 4077{
6867e1b5
ML
4078 if (amdgpu_sriov_vf(adev)) {
4079 if (adev->is_atom_fw) {
58ff791a 4080 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
4081 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4082 } else {
4083 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
4084 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4085 }
4086
4087 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
4088 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 4089 }
048765ad
AR
4090}
4091
e3ecdffa
AD
4092/**
4093 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
4094 *
0b7f1355 4095 * @pdev : pci device context
e3ecdffa
AD
4096 * @asic_type: AMD asic type
4097 *
4098 * Check if there is DC (new modesetting infrastructre) support for an asic.
4099 * returns true if DC has support, false if not.
4100 */
0b7f1355
LL
4101bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
4102 enum amd_asic_type asic_type)
4562236b
HW
4103{
4104 switch (asic_type) {
0637d417
AD
4105#ifdef CONFIG_DRM_AMDGPU_SI
4106 case CHIP_HAINAN:
4107#endif
4108 case CHIP_TOPAZ:
4109 /* chips with no display hardware */
4110 return false;
4562236b 4111#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
4112 case CHIP_TAHITI:
4113 case CHIP_PITCAIRN:
4114 case CHIP_VERDE:
4115 case CHIP_OLAND:
2d32ffd6
AD
4116 /*
4117 * We have systems in the wild with these ASICs that require
4118 * LVDS and VGA support which is not supported with DC.
4119 *
4120 * Fallback to the non-DC driver here by default so as not to
4121 * cause regressions.
4122 */
4123#if defined(CONFIG_DRM_AMD_DC_SI)
4124 return amdgpu_dc > 0;
4125#else
4126 return false;
64200c46 4127#endif
4562236b 4128 case CHIP_BONAIRE:
0d6fbccb 4129 case CHIP_KAVERI:
367e6687
AD
4130 case CHIP_KABINI:
4131 case CHIP_MULLINS:
d9fda248
HW
4132 /*
4133 * We have systems in the wild with these ASICs that require
b5a0168e 4134 * VGA support which is not supported with DC.
d9fda248
HW
4135 *
4136 * Fallback to the non-DC driver here by default so as not to
4137 * cause regressions.
4138 */
4139 return amdgpu_dc > 0;
f7f12b25 4140 default:
fd187853 4141 return amdgpu_dc != 0;
f7f12b25 4142#else
4562236b 4143 default:
93b09a9a 4144 if (amdgpu_dc > 0)
a3e510fd 4145 dev_info_once(
0b7f1355 4146 &pdev->dev,
a3e510fd 4147 "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 4148 return false;
f7f12b25 4149#endif
4562236b
HW
4150 }
4151}
4152
4153/**
4154 * amdgpu_device_has_dc_support - check if dc is supported
4155 *
982a820b 4156 * @adev: amdgpu_device pointer
4562236b
HW
4157 *
4158 * Returns true for supported, false for not supported
4159 */
4160bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
4161{
25263da3 4162 if (adev->enable_virtual_display ||
abaf210c 4163 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
4164 return false;
4165
0b7f1355 4166 return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
4562236b
HW
4167}
4168
d4535e2c
AG
4169static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
4170{
4171 struct amdgpu_device *adev =
4172 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 4173 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 4174
c6a6e2db
AG
4175 /* It's a bug to not have a hive within this function */
4176 if (WARN_ON(!hive))
4177 return;
4178
4179 /*
4180 * Use task barrier to synchronize all xgmi reset works across the
4181 * hive. task_barrier_enter and task_barrier_exit will block
4182 * until all the threads running the xgmi reset works reach
4183 * those points. task_barrier_full will do both blocks.
4184 */
4185 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
4186
4187 task_barrier_enter(&hive->tb);
127ed492 4188 adev->asic_reset_res = amdgpu_device_baco_enter(adev);
c6a6e2db
AG
4189
4190 if (adev->asic_reset_res)
4191 goto fail;
4192
4193 task_barrier_exit(&hive->tb);
127ed492 4194 adev->asic_reset_res = amdgpu_device_baco_exit(adev);
c6a6e2db
AG
4195
4196 if (adev->asic_reset_res)
4197 goto fail;
43c4d576 4198
21226f02 4199 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
4200 } else {
4201
4202 task_barrier_full(&hive->tb);
4203 adev->asic_reset_res = amdgpu_asic_reset(adev);
4204 }
ce316fa5 4205
c6a6e2db 4206fail:
d4535e2c 4207 if (adev->asic_reset_res)
a3e510fd
LL
4208 dev_warn(adev->dev,
4209 "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4210 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 4211 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
4212}
4213
71f98027
AD
4214static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
4215{
4216 char *input = amdgpu_lockup_timeout;
4217 char *timeout_setting = NULL;
4218 int index = 0;
4219 long timeout;
4220 int ret = 0;
4221
4222 /*
ec8fbb44 4223 * By default timeout for jobs is 10 sec
71f98027 4224 */
ec8fbb44 4225 adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
71f98027 4226 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
71f98027 4227
f440ff44 4228 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 4229 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 4230 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
4231 ret = kstrtol(timeout_setting, 0, &timeout);
4232 if (ret)
4233 return ret;
4234
4235 if (timeout == 0) {
4236 index++;
4237 continue;
4238 } else if (timeout < 0) {
4239 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
4240 dev_warn(adev->dev, "lockup timeout disabled");
4241 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
4242 } else {
4243 timeout = msecs_to_jiffies(timeout);
4244 }
4245
4246 switch (index++) {
4247 case 0:
4248 adev->gfx_timeout = timeout;
4249 break;
4250 case 1:
4251 adev->compute_timeout = timeout;
4252 break;
4253 case 2:
4254 adev->sdma_timeout = timeout;
4255 break;
4256 case 3:
4257 adev->video_timeout = timeout;
4258 break;
4259 default:
4260 break;
4261 }
4262 }
4263 /*
4264 * There is only one value specified and
4265 * it should apply to all non-compute jobs.
4266 */
bcccee89 4267 if (index == 1) {
71f98027 4268 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
4269 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
4270 adev->compute_timeout = adev->gfx_timeout;
4271 }
71f98027
AD
4272 }
4273
4274 return ret;
4275}
d4535e2c 4276
4a74c38c
PY
4277/**
4278 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4279 *
4280 * @adev: amdgpu_device pointer
4281 *
4282 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4283 */
4284static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4285{
4286 struct iommu_domain *domain;
4287
4288 domain = iommu_get_domain_for_dev(adev->dev);
4289 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4290 adev->ram_is_direct_mapped = true;
4291}
4292
75f0efbc
RJ
4293#if defined(CONFIG_HSA_AMD_P2P)
4294/**
4295 * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4296 *
4297 * @adev: amdgpu_device pointer
4298 *
4299 * return if IOMMU remapping bar address
4300 */
4301static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4302{
4303 struct iommu_domain *domain;
4304
4305 domain = iommu_get_domain_for_dev(adev->dev);
4306 if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4307 domain->type == IOMMU_DOMAIN_DMA_FQ))
4308 return true;
4309
4310 return false;
4311}
4312#endif
4313
02ff519e
AD
4314static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4315{
4316 if (amdgpu_mcbp == 1)
4317 adev->gfx.mcbp = true;
1e9e15dc
JZ
4318 else if (amdgpu_mcbp == 0)
4319 adev->gfx.mcbp = false;
50a7c876 4320
02ff519e
AD
4321 if (amdgpu_sriov_vf(adev))
4322 adev->gfx.mcbp = true;
4323
4324 if (adev->gfx.mcbp)
a3e510fd 4325 dev_info(adev->dev, "MCBP is enabled\n");
02ff519e
AD
4326}
4327
d38ceaf9
AD
4328/**
4329 * amdgpu_device_init - initialize the driver
4330 *
4331 * @adev: amdgpu_device pointer
d38ceaf9
AD
4332 * @flags: driver flags
4333 *
4334 * Initializes the driver info and hw (all asics).
4335 * Returns 0 for success or an error on failure.
4336 * Called at driver startup.
4337 */
4338int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
4339 uint32_t flags)
4340{
8aba21b7 4341 struct pci_dev *pdev = adev->pdev;
d38ceaf9 4342 int r, i;
b98c6299 4343 bool px = false;
95844d20 4344 u32 max_MBps;
59e9fff1 4345 int tmp;
d38ceaf9
AD
4346
4347 adev->shutdown = false;
d38ceaf9 4348 adev->flags = flags;
4e66d7d2
YZ
4349
4350 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4351 adev->asic_type = amdgpu_force_asic_type;
4352 else
4353 adev->asic_type = flags & AMD_ASIC_MASK;
4354
d38ceaf9 4355 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 4356 if (amdgpu_emu_mode == 1)
8bdab6bb 4357 adev->usec_timeout *= 10;
770d13b1 4358 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
4359 adev->accel_working = false;
4360 adev->num_rings = 0;
68ce8b24 4361 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
4362 adev->mman.buffer_funcs = NULL;
4363 adev->mman.buffer_funcs_ring = NULL;
4364 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 4365 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 4366 adev->gmc.gmc_funcs = NULL;
7bd939d0 4367 adev->harvest_ip_mask = 0x0;
f54d1867 4368 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 4369 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
4370
4371 adev->smc_rreg = &amdgpu_invalid_rreg;
4372 adev->smc_wreg = &amdgpu_invalid_wreg;
4373 adev->pcie_rreg = &amdgpu_invalid_rreg;
4374 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
4375 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4376 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
4377 adev->pciep_rreg = &amdgpu_invalid_rreg;
4378 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
4379 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4380 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
4381 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4382 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
4383 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4384 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4385 adev->didt_rreg = &amdgpu_invalid_rreg;
4386 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
4387 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4388 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
4389 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4390 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4391
a3e510fd
LL
4392 dev_info(
4393 adev->dev,
4394 "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4395 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4396 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
4397
4398 /* mutex initialization are all done here so we
b8920e1e
SS
4399 * can recall function without having locking issues
4400 */
0e5ca0d1 4401 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
4402 mutex_init(&adev->pm.mutex);
4403 mutex_init(&adev->gfx.gpu_clock_mutex);
4404 mutex_init(&adev->srbm_mutex);
b8866c26 4405 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 4406 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 4407 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 4408 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 4409 mutex_init(&adev->mn_lock);
e23b74aa 4410 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 4411 hash_init(adev->mn_hash);
32eaeae0 4412 mutex_init(&adev->psp.mutex);
bd052211 4413 mutex_init(&adev->notifier_lock);
8cda7a4f 4414 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 4415 mutex_init(&adev->benchmark_mutex);
76acba7b 4416 mutex_init(&adev->gfx.reset_sem_mutex);
e189be9b
SS
4417 /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4418 mutex_init(&adev->enforce_isolation_mutex);
bd22e44a
CK
4419 for (i = 0; i < MAX_XCP; ++i) {
4420 adev->isolation[i].spearhead = dma_fence_get_stub();
4421 amdgpu_sync_create(&adev->isolation[i].active);
4422 amdgpu_sync_create(&adev->isolation[i].prev);
4423 }
28fc3172 4424 mutex_init(&adev->gfx.userq_sch_mutex);
553673a3 4425 mutex_init(&adev->gfx.workload_profile_mutex);
ca6575a3 4426 mutex_init(&adev->vcn.workload_profile_mutex);
4ce60dba 4427 mutex_init(&adev->userq_mutex);
d38ceaf9 4428
ab3b9de6 4429 amdgpu_device_init_apu_flags(adev);
9f6a7857 4430
912dfc84
EQ
4431 r = amdgpu_device_check_arguments(adev);
4432 if (r)
4433 return r;
d38ceaf9 4434
d38ceaf9
AD
4435 spin_lock_init(&adev->mmio_idx_lock);
4436 spin_lock_init(&adev->smc_idx_lock);
4437 spin_lock_init(&adev->pcie_idx_lock);
4438 spin_lock_init(&adev->uvd_ctx_idx_lock);
4439 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 4440 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 4441 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 4442 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 4443 spin_lock_init(&adev->mm_stats.lock);
dc0297f3 4444 spin_lock_init(&adev->virt.rlcg_reg_lock);
497d7cee 4445 spin_lock_init(&adev->wb.lock);
d38ceaf9 4446
89498437
APS
4447 xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
4448
655ce9cb 4449 INIT_LIST_HEAD(&adev->reset_list);
4450
6492e1b0 4451 INIT_LIST_HEAD(&adev->ras_list);
4452
3e38b634
EQ
4453 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4454
4ce60dba
AD
4455 INIT_LIST_HEAD(&adev->userq_mgr_list);
4456
beff74bc
AD
4457 INIT_DELAYED_WORK(&adev->delayed_init_work,
4458 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
4459 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4460 amdgpu_device_delay_enable_gfx_off);
afefd6f2
SS
4461 /*
4462 * Initialize the enforce_isolation work structures for each XCP
4463 * partition. This work handler is responsible for enforcing shader
4464 * isolation on AMD GPUs. It counts the number of emitted fences for
4465 * each GFX and compute ring. If there are any fences, it schedules
4466 * the `enforce_isolation_work` to be run after a delay. If there are
4467 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4468 * runqueue.
4469 */
4470 for (i = 0; i < MAX_XCP; i++) {
4471 INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4472 amdgpu_gfx_enforce_isolation_handler);
4473 adev->gfx.enforce_isolation[i].adev = adev;
4474 adev->gfx.enforce_isolation[i].xcp_id = i;
4475 }
2dc80b00 4476
d4535e2c
AG
4477 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4478
d23ee13f 4479 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
4480 adev->gfx.gfx_off_residency = 0;
4481 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 4482 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 4483
b265bdbd
EQ
4484 atomic_set(&adev->throttling_logging_enabled, 1);
4485 /*
4486 * If throttling continues, logging will be performed every minute
4487 * to avoid log flooding. "-1" is subtracted since the thermal
4488 * throttling interrupt comes every second. Thus, the total logging
4489 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4490 * for throttling interrupt) = 60 seconds.
4491 */
4492 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
84a2947e 4493
b265bdbd
EQ
4494 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4495
0fa49558
AX
4496 /* Registers mapping */
4497 /* TODO: block userspace mapping of io register */
da69c161
KW
4498 if (adev->asic_type >= CHIP_BONAIRE) {
4499 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4500 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4501 } else {
4502 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4503 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4504 }
d38ceaf9 4505
6c08e0ef
EQ
4506 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4507 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4508
d38ceaf9 4509 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 4510 if (!adev->rmmio)
d38ceaf9 4511 return -ENOMEM;
b8920e1e 4512
a3e510fd
LL
4513 dev_info(adev->dev, "register mmio base: 0x%08X\n",
4514 (uint32_t)adev->rmmio_base);
4515 dev_info(adev->dev, "register mmio size: %u\n",
4516 (unsigned int)adev->rmmio_size);
d38ceaf9 4517
436afdfa
PY
4518 /*
4519 * Reset domain needs to be present early, before XGMI hive discovered
a567db80 4520 * (if any) and initialized to use reset sem and in_gpu reset flag
436afdfa
PY
4521 * early on during init and before calling to RREG32.
4522 */
4523 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
03c6284d
MJ
4524 if (!adev->reset_domain)
4525 return -ENOMEM;
436afdfa 4526
3aa0115d 4527 /* detect hw virtualization here */
a91d91b6 4528 amdgpu_virt_init(adev);
3aa0115d 4529
04e85958
TL
4530 amdgpu_device_get_pcie_info(adev);
4531
dffa11b4
ML
4532 r = amdgpu_device_get_job_timeout_settings(adev);
4533 if (r) {
4534 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
03c6284d 4535 return r;
a190d1c7
XY
4536 }
4537
bf909454
PEPP
4538 amdgpu_device_set_mcbp(adev);
4539
14f2fe34
LL
4540 /*
4541 * By default, use default mode where all blocks are expected to be
4542 * initialized. At present a 'swinit' of blocks is required to be
4543 * completed before the need for a different level is detected.
4544 */
4545 amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
d38ceaf9 4546 /* early init functions */
06ec9070 4547 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 4548 if (r)
03c6284d 4549 return r;
d38ceaf9 4550
9deacd6c
AD
4551 /*
4552 * No need to remove conflicting FBs for non-display class devices.
4553 * This prevents the sysfb from being freed accidently.
4554 */
4555 if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
4556 (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
4557 /* Get rid of things like offb */
4558 r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
4559 if (r)
4560 return r;
4561 }
b7cdb41e 4562
4d33e704
SK
4563 /* Enable TMZ based on IP_VERSION */
4564 amdgpu_gmc_tmz_set(adev);
4565
3e2dacca
DS
4566 if (amdgpu_sriov_vf(adev) &&
4567 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4568 /* VF MMIO access (except mailbox range) from CPU
4569 * will be blocked during sriov runtime
4570 */
4571 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4572
957b0787 4573 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
4574 /* Need to get xgmi info early to decide the reset behavior*/
4575 if (adev->gmc.xgmi.supported) {
4576 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4577 if (r)
03c6284d 4578 return r;
4a0165f0
VS
4579 }
4580
8e6d0b69 4581 /* enable PCIE atomic ops */
b4520bfd
GW
4582 if (amdgpu_sriov_vf(adev)) {
4583 if (adev->virt.fw_reserve.p_pf2vf)
4584 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4585 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4586 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
4587 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4588 * internal path natively support atomics, set have_atomics_support to true.
4589 */
b4520bfd 4590 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
4591 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4592 IP_VERSION(9, 0, 0))) {
0e768043 4593 adev->have_atomics_support = true;
b4520bfd 4594 } else {
8e6d0b69 4595 adev->have_atomics_support =
4596 !pci_enable_atomic_ops_to_root(adev->pdev,
4597 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4598 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
4599 }
4600
8e6d0b69 4601 if (!adev->have_atomics_support)
4602 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4603
6585661d 4604 /* doorbell bar mapping and doorbell index init*/
43c064db 4605 amdgpu_doorbell_init(adev);
6585661d 4606
9475a943
SL
4607 if (amdgpu_emu_mode == 1) {
4608 /* post the asic on emulation mode */
4609 emu_soc_asic_init(adev);
bfca0289 4610 goto fence_driver_init;
9475a943 4611 }
bfca0289 4612
04442bf7
LL
4613 amdgpu_reset_init(adev);
4614
4e99a44e 4615 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
4616 if (adev->bios)
4617 amdgpu_device_detect_sriov_bios(adev);
048765ad 4618
95e8e59e
AD
4619 /* check if we need to reset the asic
4620 * E.g., driver was not cleanly unloaded previously, etc.
4621 */
f14899fd 4622 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 4623 if (adev->gmc.xgmi.num_physical_nodes) {
4624 dev_info(adev->dev, "Pending hive reset.\n");
5839d27d
LL
4625 amdgpu_set_init_level(adev,
4626 AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
7c1d9e10
KF
4627 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4628 !amdgpu_device_has_display_hardware(adev)) {
4629 r = psp_gpu_reset(adev);
e3c1b071 4630 } else {
7c1d9e10
KF
4631 tmp = amdgpu_reset_method;
4632 /* It should do a default reset when loading or reloading the driver,
4633 * regardless of the module parameter reset_method.
4634 */
4635 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4636 r = amdgpu_asic_reset(adev);
4637 amdgpu_reset_method = tmp;
4638 }
4639
4640 if (r) {
4641 dev_err(adev->dev, "asic reset on init failed\n");
4642 goto failed;
95e8e59e
AD
4643 }
4644 }
4645
d38ceaf9 4646 /* Post card if necessary */
39c640c0 4647 if (amdgpu_device_need_post(adev)) {
d38ceaf9 4648 if (!adev->bios) {
bec86378 4649 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
4650 r = -EINVAL;
4651 goto failed;
d38ceaf9 4652 }
a3e510fd 4653 dev_info(adev->dev, "GPU posting now...\n");
4d2997ab 4654 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
4655 if (r) {
4656 dev_err(adev->dev, "gpu post error!\n");
4657 goto failed;
4658 }
d38ceaf9
AD
4659 }
4660
9535a86a
SZ
4661 if (adev->bios) {
4662 if (adev->is_atom_fw) {
4663 /* Initialize clocks */
4664 r = amdgpu_atomfirmware_get_clock_info(adev);
4665 if (r) {
4666 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4667 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4668 goto failed;
4669 }
4670 } else {
4671 /* Initialize clocks */
4672 r = amdgpu_atombios_get_clock_info(adev);
4673 if (r) {
4674 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4675 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4676 goto failed;
4677 }
4678 /* init i2c buses */
1c0b144b 4679 amdgpu_i2c_init(adev);
a5bde2f9 4680 }
2c1a2784 4681 }
d38ceaf9 4682
bfca0289 4683fence_driver_init:
d38ceaf9 4684 /* Fence driver */
067f44c8 4685 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4686 if (r) {
067f44c8 4687 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4688 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4689 goto failed;
2c1a2784 4690 }
d38ceaf9
AD
4691
4692 /* init the mode config */
4a580877 4693 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4694
06ec9070 4695 r = amdgpu_device_ip_init(adev);
d38ceaf9 4696 if (r) {
06ec9070 4697 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4698 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4699 goto release_ras_con;
d38ceaf9
AD
4700 }
4701
8d35a259
LG
4702 amdgpu_fence_driver_hw_init(adev);
4703
d69b8971
YZ
4704 dev_info(adev->dev,
4705 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4706 adev->gfx.config.max_shader_engines,
4707 adev->gfx.config.max_sh_per_se,
4708 adev->gfx.config.max_cu_per_sh,
4709 adev->gfx.cu_info.number);
4710
d38ceaf9
AD
4711 adev->accel_working = true;
4712
e59c0205
AX
4713 amdgpu_vm_check_compute_bug(adev);
4714
95844d20
MO
4715 /* Initialize the buffer migration limit. */
4716 if (amdgpu_moverate >= 0)
4717 max_MBps = amdgpu_moverate;
4718 else
4719 max_MBps = 8; /* Allow 8 MB/s. */
4720 /* Get a log2 for easy divisions. */
4721 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4722
b0adca4d
EQ
4723 /*
4724 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4725 * Otherwise the mgpu fan boost feature will be skipped due to the
4726 * gpu instance is counted less.
4727 */
4728 amdgpu_register_gpu_instance(adev);
4729
d38ceaf9
AD
4730 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4731 * explicit gating rather than handling it automatically.
4732 */
5839d27d 4733 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
e3c1b071 4734 r = amdgpu_device_ip_late_init(adev);
4735 if (r) {
4736 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4737 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4738 goto release_ras_con;
e3c1b071 4739 }
4740 /* must succeed. */
4741 amdgpu_ras_resume(adev);
4742 queue_delayed_work(system_wq, &adev->delayed_init_work,
4743 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4744 }
d38ceaf9 4745
38eecbe0
CL
4746 if (amdgpu_sriov_vf(adev)) {
4747 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4748 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4749 }
2c738637 4750
90bcb9b5
EQ
4751 /*
4752 * Place those sysfs registering after `late_init`. As some of those
4753 * operations performed in `late_init` might affect the sysfs
4754 * interfaces creating.
4755 */
4756 r = amdgpu_atombios_sysfs_init(adev);
4757 if (r)
4758 drm_err(&adev->ddev,
4759 "registering atombios sysfs failed (%d).\n", r);
4760
4761 r = amdgpu_pm_sysfs_init(adev);
4762 if (r)
a3e510fd 4763 dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
90bcb9b5
EQ
4764
4765 r = amdgpu_ucode_sysfs_init(adev);
4766 if (r) {
4767 adev->ucode_sysfs_en = false;
a3e510fd 4768 dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
90bcb9b5
EQ
4769 } else
4770 adev->ucode_sysfs_en = true;
4771
9c05636c 4772 r = amdgpu_device_attr_sysfs_init(adev);
5aea5327 4773 if (r)
77f3a5cd 4774 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4775
76da73f0
LL
4776 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4777 if (r)
4778 dev_err(adev->dev,
4779 "Could not create amdgpu board attributes\n");
4780
7957ec80 4781 amdgpu_fru_sysfs_init(adev);
af39e6f4 4782 amdgpu_reg_state_sysfs_init(adev);
cbbab292 4783 amdgpu_xcp_sysfs_init(adev);
7957ec80 4784
d155bef0
AB
4785 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4786 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4787 if (r)
4788 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4789
c1dd4aa6
AG
4790 /* Have stored pci confspace at hand for restore in sudden PCI error */
4791 if (amdgpu_device_cache_pci_state(adev->pdev))
4792 pci_restore_state(pdev);
4793
8c3dd61c
KHF
4794 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4795 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4796 * ignore it
4797 */
8c3dd61c 4798 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4799 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4800
127ed492 4801 px = amdgpu_device_supports_px(adev);
d37a3929 4802
7b1c6263 4803 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4804 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4805 vga_switcheroo_register_client(adev->pdev,
4806 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4807
4808 if (px)
8c3dd61c 4809 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4810
5839d27d 4811 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
631af731 4812 amdgpu_xgmi_reset_on_init(adev);
e3c1b071 4813
4a74c38c
PY
4814 amdgpu_device_check_iommu_direct_map(adev);
4815
2965e635
ML
4816 adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4817 r = register_pm_notifier(&adev->pm_nb);
4818 if (r)
4819 goto failed;
4820
d38ceaf9 4821 return 0;
83ba126a 4822
970fd197 4823release_ras_con:
38eecbe0
CL
4824 if (amdgpu_sriov_vf(adev))
4825 amdgpu_virt_release_full_gpu(adev, true);
4826
4827 /* failed in exclusive mode due to timeout */
4828 if (amdgpu_sriov_vf(adev) &&
4829 !amdgpu_sriov_runtime(adev) &&
4830 amdgpu_virt_mmio_blocked(adev) &&
4831 !amdgpu_virt_wait_reset(adev)) {
4832 dev_err(adev->dev, "VF exclusive mode timeout\n");
4833 /* Don't send request since VF is inactive. */
4834 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4835 adev->virt.ops = NULL;
4836 r = -EAGAIN;
4837 }
970fd197
SY
4838 amdgpu_release_ras_context(adev);
4839
83ba126a 4840failed:
89041940 4841 amdgpu_vf_error_trans_all(adev);
8840a387 4842
83ba126a 4843 return r;
d38ceaf9
AD
4844}
4845
07775fc1
AG
4846static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4847{
62d5f9f7 4848
07775fc1
AG
4849 /* Clear all CPU mappings pointing to this device */
4850 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4851
4852 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4853 amdgpu_doorbell_fini(adev);
07775fc1
AG
4854
4855 iounmap(adev->rmmio);
4856 adev->rmmio = NULL;
4857 if (adev->mman.aper_base_kaddr)
4858 iounmap(adev->mman.aper_base_kaddr);
4859 adev->mman.aper_base_kaddr = NULL;
4860
4861 /* Memory manager related */
a0ba1279 4862 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4863 arch_phys_wc_del(adev->gmc.vram_mtrr);
4864 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4865 }
4866}
4867
d38ceaf9 4868/**
bbe04dec 4869 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4870 *
4871 * @adev: amdgpu_device pointer
4872 *
4873 * Tear down the driver info (all asics).
4874 * Called at driver shutdown.
4875 */
72c8c97b 4876void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4877{
aac89168 4878 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4879 flush_delayed_work(&adev->delayed_init_work);
af76ca8e
VZ
4880
4881 if (adev->mman.initialized)
4882 drain_workqueue(adev->mman.bdev.wq);
d0d13fe8 4883 adev->shutdown = true;
9f875167 4884
2965e635
ML
4885 unregister_pm_notifier(&adev->pm_nb);
4886
752c683d
ML
4887 /* make sure IB test finished before entering exclusive mode
4888 * to avoid preemption on IB test
b8920e1e 4889 */
519b8b76 4890 if (amdgpu_sriov_vf(adev)) {
752c683d 4891 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4892 amdgpu_virt_fini_data_exchange(adev);
4893 }
752c683d 4894
e5b03032
ML
4895 /* disable all interrupts */
4896 amdgpu_irq_disable_all(adev);
47fc644f 4897 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4898 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4899 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4900 else
4a580877 4901 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4902 }
8d35a259 4903 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4904
53e9d836 4905 if (adev->pm.sysfs_initialized)
7c868b59 4906 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4907 if (adev->ucode_sysfs_en)
4908 amdgpu_ucode_sysfs_fini(adev);
9c05636c 4909 amdgpu_device_attr_sysfs_fini(adev);
7957ec80 4910 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4911
af39e6f4 4912 amdgpu_reg_state_sysfs_fini(adev);
cbbab292 4913 amdgpu_xcp_sysfs_fini(adev);
af39e6f4 4914
232d1d43
SY
4915 /* disable ras feature must before hw fini */
4916 amdgpu_ras_pre_fini(adev);
4917
b7043800
AD
4918 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4919
e9669fb7 4920 amdgpu_device_ip_fini_early(adev);
d10d0daa 4921
a3848df6
YW
4922 amdgpu_irq_fini_hw(adev);
4923
b6fd6e0f
SK
4924 if (adev->mman.initialized)
4925 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4926
d10d0daa 4927 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4928
39934d3e
VP
4929 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4930 amdgpu_device_unmap_mmio(adev);
87172e89 4931
72c8c97b
AG
4932}
4933
4934void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4935{
bd22e44a 4936 int i, idx;
d37a3929 4937 bool px;
62d5f9f7 4938
a5c5d8d5 4939 amdgpu_device_ip_fini(adev);
b61badd2 4940 amdgpu_fence_driver_sw_fini(adev);
b31d3063 4941 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4942 adev->accel_working = false;
68ce8b24 4943 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
bd22e44a
CK
4944 for (i = 0; i < MAX_XCP; ++i) {
4945 dma_fence_put(adev->isolation[i].spearhead);
4946 amdgpu_sync_free(&adev->isolation[i].active);
4947 amdgpu_sync_free(&adev->isolation[i].prev);
4948 }
04442bf7
LL
4949
4950 amdgpu_reset_fini(adev);
4951
d38ceaf9 4952 /* free i2c buses */
1c0b144b 4953 amdgpu_i2c_fini(adev);
bfca0289 4954
6e8ca38e
LL
4955 if (adev->bios) {
4956 if (amdgpu_emu_mode != 1)
4957 amdgpu_atombios_fini(adev);
4958 amdgpu_bios_release(adev);
4959 }
d37a3929 4960
8a2b5139
LL
4961 kfree(adev->fru_info);
4962 adev->fru_info = NULL;
4963
b5aaa82e
FC
4964 kfree(adev->xcp_mgr);
4965 adev->xcp_mgr = NULL;
4966
127ed492 4967 px = amdgpu_device_supports_px(adev);
d37a3929 4968
7b1c6263 4969 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4970 apple_gmux_detect(NULL, NULL)))
84c8b22e 4971 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4972
4973 if (px)
83ba126a 4974 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4975
38d6be81 4976 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4977 vga_client_unregister(adev->pdev);
e9bc1bf7 4978
62d5f9f7
LS
4979 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4980
4981 iounmap(adev->rmmio);
4982 adev->rmmio = NULL;
62d5f9f7
LS
4983 drm_dev_exit(idx);
4984 }
4985
d155bef0
AB
4986 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4987 amdgpu_pmu_fini(adev);
72de33f8 4988 if (adev->mman.discovery_bin)
a190d1c7 4989 amdgpu_discovery_fini(adev);
72c8c97b 4990
cfbb6b00
AG
4991 amdgpu_reset_put_reset_domain(adev->reset_domain);
4992 adev->reset_domain = NULL;
4993
72c8c97b
AG
4994 kfree(adev->pci_state);
4995
d38ceaf9
AD
4996}
4997
58144d28
ND
4998/**
4999 * amdgpu_device_evict_resources - evict device resources
5000 * @adev: amdgpu device object
5001 *
5002 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
5003 * of the vram memory type. Mainly used for evicting device resources
5004 * at suspend time.
5005 *
5006 */
7863c155 5007static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 5008{
7863c155
ML
5009 int ret;
5010
c2ee5c2f
ML
5011 /* No need to evict vram on APUs unless going to S4 */
5012 if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
7863c155 5013 return 0;
58144d28 5014
7863c155 5015 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
924dda02 5016 if (ret) {
a3e510fd 5017 dev_warn(adev->dev, "evicting device resources failed\n");
924dda02
SZ
5018 return ret;
5019 }
5020
5021 if (adev->in_s4) {
5022 ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
5023 if (ret)
5024 dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
5025 }
7863c155 5026 return ret;
58144d28 5027}
d38ceaf9
AD
5028
5029/*
5030 * Suspend & resume.
5031 */
2965e635
ML
5032/**
5033 * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
5034 * @nb: notifier block
5035 * @mode: suspend mode
5036 * @data: data
5037 *
5038 * This function is called when the system is about to suspend or hibernate.
06f2dcc2
AD
5039 * It is used to set the appropriate flags so that eviction can be optimized
5040 * in the pm prepare callback.
2965e635
ML
5041 */
5042static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
5043 void *data)
5044{
5045 struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
2965e635
ML
5046
5047 switch (mode) {
5048 case PM_HIBERNATION_PREPARE:
5049 adev->in_s4 = true;
06f2dcc2
AD
5050 break;
5051 case PM_POST_HIBERNATION:
5052 adev->in_s4 = false;
2965e635
ML
5053 break;
5054 }
5055
5056 return NOTIFY_DONE;
5057}
5058
5095d541
ML
5059/**
5060 * amdgpu_device_prepare - prepare for device suspend
5061 *
5062 * @dev: drm dev pointer
5063 *
5064 * Prepare to put the hw in the suspend state (all asics).
5065 * Returns 0 for success or an error on failure.
5066 * Called at driver suspend.
5067 */
5068int amdgpu_device_prepare(struct drm_device *dev)
5069{
5070 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 5071 int i, r;
5095d541
ML
5072
5073 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5074 return 0;
5075
5076 /* Evict the majority of BOs before starting suspend sequence */
5077 r = amdgpu_device_evict_resources(adev);
5078 if (r)
ce8f7d95 5079 return r;
5095d541 5080
0355b24b
ML
5081 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
5082
cb11ca32
ML
5083 for (i = 0; i < adev->num_ip_blocks; i++) {
5084 if (!adev->ip_blocks[i].status.valid)
5085 continue;
5086 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
5087 continue;
94b2e07a 5088 r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
cb11ca32 5089 if (r)
ce8f7d95 5090 return r;
cb11ca32
ML
5091 }
5092
5095d541
ML
5093 return 0;
5094}
5095
64c3e4a8
ML
5096/**
5097 * amdgpu_device_complete - complete power state transition
5098 *
5099 * @dev: drm dev pointer
5100 *
5101 * Undo the changes from amdgpu_device_prepare. This will be
5102 * called on all resume transitions, including those that failed.
5103 */
5104void amdgpu_device_complete(struct drm_device *dev)
5105{
5106 struct amdgpu_device *adev = drm_to_adev(dev);
5107 int i;
5108
5109 for (i = 0; i < adev->num_ip_blocks; i++) {
5110 if (!adev->ip_blocks[i].status.valid)
5111 continue;
5112 if (!adev->ip_blocks[i].version->funcs->complete)
5113 continue;
5114 adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
5115 }
5116}
5117
d38ceaf9 5118/**
810ddc3a 5119 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 5120 *
87e3f136 5121 * @dev: drm dev pointer
4cf50bae 5122 * @notify_clients: notify in-kernel DRM clients
d38ceaf9
AD
5123 *
5124 * Puts the hw in the suspend state (all asics).
5125 * Returns 0 for success or an error on failure.
5126 * Called at driver suspend.
5127 */
4cf50bae 5128int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
d38ceaf9 5129{
a2e15b0e 5130 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 5131 int r = 0;
d38ceaf9 5132
d38ceaf9
AD
5133 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5134 return 0;
5135
44779b43 5136 adev->in_suspend = true;
3fa8f89d 5137
d7274ec7 5138 if (amdgpu_sriov_vf(adev)) {
54f7a24e
ED
5139 if (!adev->in_s0ix && !adev->in_runpm)
5140 amdgpu_amdkfd_suspend_process(adev);
d7274ec7
BZ
5141 amdgpu_virt_fini_data_exchange(adev);
5142 r = amdgpu_virt_request_full_gpu(adev, false);
5143 if (r)
5144 return r;
5145 }
5146
127ed492 5147 if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
a3e510fd 5148 dev_warn(adev->dev, "smart shift update failed\n");
3fa8f89d 5149
4cf50bae
TZ
5150 if (notify_clients)
5151 drm_client_dev_suspend(adev_to_drm(adev), false);
5f818173 5152
beff74bc 5153 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 5154
5e6932fe 5155 amdgpu_ras_suspend(adev);
5156
2196927b 5157 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 5158
c2c72221 5159 if (!adev->in_s0ix) {
54f7a24e 5160 amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
c2c72221 5161 amdgpu_userq_suspend(adev);
c2c72221 5162 }
94fa5660 5163
7863c155
ML
5164 r = amdgpu_device_evict_resources(adev);
5165 if (r)
5166 return r;
d38ceaf9 5167
dab96d8b
AD
5168 amdgpu_ttm_set_buffer_funcs_status(adev, false);
5169
8d35a259 5170 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 5171
2196927b 5172 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 5173
d7274ec7
BZ
5174 if (amdgpu_sriov_vf(adev))
5175 amdgpu_virt_release_full_gpu(adev, false);
5176
2e9b1523
PY
5177 r = amdgpu_dpm_notify_rlc_state(adev, false);
5178 if (r)
5179 return r;
5180
d38ceaf9
AD
5181 return 0;
5182}
5183
855a2a02
SZ
5184static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
5185{
5186 int r;
5187 unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
5188
4108c2be
SZ
5189 /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
5190 * may not work. The access could be blocked by nBIF protection as VF isn't in
5191 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
5192 * so that QEMU reprograms MSIX table.
5193 */
5194 amdgpu_restore_msix(adev);
5195
855a2a02
SZ
5196 r = adev->gfxhub.funcs->get_xgmi_info(adev);
5197 if (r)
5198 return r;
5199
5200 dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
5201 prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
5202
5203 adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
5204 adev->vm_manager.vram_base_offset +=
5205 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
5206
5207 return 0;
5208}
5209
d38ceaf9 5210/**
810ddc3a 5211 * amdgpu_device_resume - initiate device resume
d38ceaf9 5212 *
87e3f136 5213 * @dev: drm dev pointer
4cf50bae 5214 * @notify_clients: notify in-kernel DRM clients
d38ceaf9
AD
5215 *
5216 * Bring the hw back to operating state (all asics).
5217 * Returns 0 for success or an error on failure.
5218 * Called at driver resume.
5219 */
4cf50bae 5220int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
d38ceaf9 5221{
1348969a 5222 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 5223 int r = 0;
d38ceaf9 5224
d7274ec7
BZ
5225 if (amdgpu_sriov_vf(adev)) {
5226 r = amdgpu_virt_request_full_gpu(adev, true);
5227 if (r)
5228 return r;
5229 }
5230
855a2a02
SZ
5231 if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
5232 r = amdgpu_virt_resume(adev);
5233 if (r)
5234 goto exit;
5235 }
5236
d38ceaf9
AD
5237 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5238 return 0;
5239
62498733 5240 if (adev->in_s0ix)
bc143d8b 5241 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 5242
d38ceaf9 5243 /* post card */
39c640c0 5244 if (amdgpu_device_need_post(adev)) {
4d2997ab 5245 r = amdgpu_device_asic_init(adev);
74b0b157 5246 if (r)
aac89168 5247 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 5248 }
d38ceaf9 5249
06ec9070 5250 r = amdgpu_device_ip_resume(adev);
d7274ec7 5251
e6707218 5252 if (r) {
aac89168 5253 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 5254 goto exit;
e6707218 5255 }
5ceb54c6 5256
c004d44e 5257 if (!adev->in_s0ix) {
54f7a24e 5258 r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5d3a2d95 5259 if (r)
3c22c1ea 5260 goto exit;
56801cb8 5261
c2c72221
AD
5262 r = amdgpu_userq_resume(adev);
5263 if (r)
5264 goto exit;
5d3a2d95 5265 }
756e6880 5266
8ed79c40
TH
5267 r = amdgpu_device_ip_late_init(adev);
5268 if (r)
5269 goto exit;
5270
5271 queue_delayed_work(system_wq, &adev->delayed_init_work,
5272 msecs_to_jiffies(AMDGPU_RESUME_MS));
3c22c1ea
SF
5273exit:
5274 if (amdgpu_sriov_vf(adev)) {
5275 amdgpu_virt_init_data_exchange(adev);
5276 amdgpu_virt_release_full_gpu(adev, true);
54f7a24e
ED
5277
5278 if (!adev->in_s0ix && !r && !adev->in_runpm)
5279 r = amdgpu_amdkfd_resume_process(adev);
3c22c1ea
SF
5280 }
5281
5282 if (r)
5283 return r;
5284
96a5d8d4 5285 /* Make sure IB tests flushed */
beff74bc 5286 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 5287
4cf50bae
TZ
5288 if (notify_clients)
5289 drm_client_dev_resume(adev_to_drm(adev), false);
d38ceaf9 5290
5e6932fe 5291 amdgpu_ras_resume(adev);
5292
d09ef243
AD
5293 if (adev->mode_info.num_crtc) {
5294 /*
5295 * Most of the connector probing functions try to acquire runtime pm
5296 * refs to ensure that the GPU is powered on when connector polling is
5297 * performed. Since we're calling this from a runtime PM callback,
5298 * trying to acquire rpm refs will cause us to deadlock.
5299 *
5300 * Since we're guaranteed to be holding the rpm lock, it's safe to
5301 * temporarily disable the rpm helpers so this doesn't deadlock us.
5302 */
23a1a9e5 5303#ifdef CONFIG_PM
d09ef243 5304 dev->dev->power.disable_depth++;
23a1a9e5 5305#endif
d09ef243
AD
5306 if (!adev->dc_enabled)
5307 drm_helper_hpd_irq_event(dev);
5308 else
5309 drm_kms_helper_hotplug_event(dev);
23a1a9e5 5310#ifdef CONFIG_PM
d09ef243 5311 dev->dev->power.disable_depth--;
23a1a9e5 5312#endif
d09ef243 5313 }
95a16160
APS
5314
5315 amdgpu_vram_mgr_clear_reset_blocks(adev);
44779b43
RZ
5316 adev->in_suspend = false;
5317
127ed492 5318 if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
a3e510fd 5319 dev_warn(adev->dev, "smart shift update failed\n");
3fa8f89d 5320
4d3b9ae5 5321 return 0;
d38ceaf9
AD
5322}
5323
e3ecdffa
AD
5324/**
5325 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
5326 *
5327 * @adev: amdgpu_device pointer
5328 *
5329 * The list of all the hardware IPs that make up the asic is walked and
5330 * the check_soft_reset callbacks are run. check_soft_reset determines
5331 * if the asic is still hung or not.
5332 * Returns true if any of the IPs are still in a hung state, false if not.
5333 */
06ec9070 5334static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
5335{
5336 int i;
5337 bool asic_hang = false;
5338
f993d628
ML
5339 if (amdgpu_sriov_vf(adev))
5340 return true;
5341
8bc04c29
AD
5342 if (amdgpu_asic_need_full_reset(adev))
5343 return true;
5344
63fbf42f 5345 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 5346 if (!adev->ip_blocks[i].status.valid)
63fbf42f 5347 continue;
a1255107
AD
5348 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5349 adev->ip_blocks[i].status.hang =
6a9456e0
SK
5350 adev->ip_blocks[i].version->funcs->check_soft_reset(
5351 &adev->ip_blocks[i]);
a1255107 5352 if (adev->ip_blocks[i].status.hang) {
aac89168 5353 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
5354 asic_hang = true;
5355 }
5356 }
5357 return asic_hang;
5358}
5359
e3ecdffa
AD
5360/**
5361 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5362 *
5363 * @adev: amdgpu_device pointer
5364 *
5365 * The list of all the hardware IPs that make up the asic is walked and the
5366 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
5367 * handles any IP specific hardware or software state changes that are
5368 * necessary for a soft reset to succeed.
5369 * Returns 0 on success, negative error code on failure.
5370 */
06ec9070 5371static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
5372{
5373 int i, r = 0;
5374
5375 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 5376 if (!adev->ip_blocks[i].status.valid)
d31a501e 5377 continue;
a1255107
AD
5378 if (adev->ip_blocks[i].status.hang &&
5379 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
9d5ee7ce 5380 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
d31a501e
CZ
5381 if (r)
5382 return r;
5383 }
5384 }
5385
5386 return 0;
5387}
5388
e3ecdffa
AD
5389/**
5390 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5391 *
5392 * @adev: amdgpu_device pointer
5393 *
5394 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
5395 * reset is necessary to recover.
5396 * Returns true if a full asic reset is required, false if not.
5397 */
06ec9070 5398static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 5399{
da146d3b
AD
5400 int i;
5401
8bc04c29
AD
5402 if (amdgpu_asic_need_full_reset(adev))
5403 return true;
5404
da146d3b 5405 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 5406 if (!adev->ip_blocks[i].status.valid)
da146d3b 5407 continue;
a1255107
AD
5408 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5409 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5410 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
5411 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 5413 if (adev->ip_blocks[i].status.hang) {
aac89168 5414 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
5415 return true;
5416 }
5417 }
35d782fe
CZ
5418 }
5419 return false;
5420}
5421
e3ecdffa
AD
5422/**
5423 * amdgpu_device_ip_soft_reset - do a soft reset
5424 *
5425 * @adev: amdgpu_device pointer
5426 *
5427 * The list of all the hardware IPs that make up the asic is walked and the
5428 * soft_reset callbacks are run if the block is hung. soft_reset handles any
5429 * IP specific hardware or software state changes that are necessary to soft
5430 * reset the IP.
5431 * Returns 0 on success, negative error code on failure.
5432 */
06ec9070 5433static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
5434{
5435 int i, r = 0;
5436
5437 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 5438 if (!adev->ip_blocks[i].status.valid)
35d782fe 5439 continue;
a1255107
AD
5440 if (adev->ip_blocks[i].status.hang &&
5441 adev->ip_blocks[i].version->funcs->soft_reset) {
0ef2a1e7 5442 r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
35d782fe
CZ
5443 if (r)
5444 return r;
5445 }
5446 }
5447
5448 return 0;
5449}
5450
e3ecdffa
AD
5451/**
5452 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5453 *
5454 * @adev: amdgpu_device pointer
5455 *
5456 * The list of all the hardware IPs that make up the asic is walked and the
5457 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
5458 * handles any IP specific hardware or software state changes that are
5459 * necessary after the IP has been soft reset.
5460 * Returns 0 on success, negative error code on failure.
5461 */
06ec9070 5462static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
5463{
5464 int i, r = 0;
5465
5466 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 5467 if (!adev->ip_blocks[i].status.valid)
35d782fe 5468 continue;
a1255107
AD
5469 if (adev->ip_blocks[i].status.hang &&
5470 adev->ip_blocks[i].version->funcs->post_soft_reset)
e15ec812 5471 r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
35d782fe
CZ
5472 if (r)
5473 return r;
5474 }
5475
5476 return 0;
5477}
5478
e3ecdffa 5479/**
06ec9070 5480 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 5481 *
982a820b 5482 * @adev: amdgpu_device pointer
25c01191 5483 * @reset_context: amdgpu reset context pointer
5740682e
ML
5484 *
5485 * do VF FLR and reinitialize Asic
3f48c681 5486 * return 0 means succeeded otherwise failed
e3ecdffa
AD
5487 */
5488static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
25c01191 5489 struct amdgpu_reset_context *reset_context)
5740682e
ML
5490{
5491 int r;
a5f67c93 5492 struct amdgpu_hive_info *hive = NULL;
428890a3 5493
25c01191 5494 if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
cbda2758
VC
5495 if (!amdgpu_ras_get_fed_status(adev))
5496 amdgpu_virt_ready_to_reset(adev);
5c0a1cdd 5497 amdgpu_virt_wait_reset(adev);
25c01191 5498 clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5740682e 5499 r = amdgpu_virt_request_full_gpu(adev, true);
25c01191 5500 } else {
5740682e 5501 r = amdgpu_virt_reset_gpu(adev);
25c01191 5502 }
5740682e
ML
5503 if (r)
5504 return r;
25c01191 5505
e1ee2111 5506 amdgpu_ras_clear_err_state(adev);
f734b213 5507 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 5508
83f24a8f
HC
5509 /* some sw clean up VF needs to do before recover */
5510 amdgpu_virt_post_reset(adev);
5511
a90ad3c2 5512 /* Resume IP prior to SMC */
06ec9070 5513 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e 5514 if (r)
6e4aa08f 5515 return r;
a90ad3c2 5516
c9ffa427 5517 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 5518
7a3e0bb2
RZ
5519 r = amdgpu_device_fw_loading(adev);
5520 if (r)
5521 return r;
5522
a90ad3c2 5523 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 5524 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e 5525 if (r)
6e4aa08f 5526 return r;
a90ad3c2 5527
a5f67c93
ZL
5528 hive = amdgpu_get_xgmi_hive(adev);
5529 /* Update PSP FW topology after reset */
5530 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5531 r = amdgpu_xgmi_update_topology(hive, adev);
a5f67c93
ZL
5532 if (hive)
5533 amdgpu_put_xgmi_hive(hive);
6e4aa08f
YL
5534 if (r)
5535 return r;
a5f67c93 5536
6e4aa08f
YL
5537 r = amdgpu_ib_ring_tests(adev);
5538 if (r)
5539 return r;
a90ad3c2 5540
7181faaa 5541 if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
e3526257 5542 amdgpu_inc_vram_lost(adev);
a90ad3c2 5543
6e4aa08f
YL
5544 /* need to be called during full access so we can't do it later like
5545 * bare-metal does.
5546 */
5547 amdgpu_amdkfd_post_reset(adev);
5548 amdgpu_virt_release_full_gpu(adev, true);
7258fa31 5549
4752cac3
YL
5550 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5551 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5552 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5f571c61 5553 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
d7f5c13e 5554 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4752cac3
YL
5555 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5556 amdgpu_ras_resume(adev);
84a2947e
VS
5557
5558 amdgpu_virt_ras_telemetry_post_reset(adev);
5559
6e4aa08f 5560 return 0;
a90ad3c2
ML
5561}
5562
9a1cddd6 5563/**
0859eb54 5564 * amdgpu_device_has_job_running - check if there is any unfinished job
9a1cddd6 5565 *
982a820b 5566 * @adev: amdgpu_device pointer
9a1cddd6 5567 *
0859eb54
SF
5568 * check if there is any job running on the device when guest driver receives
5569 * FLR notification from host driver. If there are still jobs running, then
5570 * the guest driver will not respond the FLR reset. Instead, let the job hit
5571 * the timeout and guest driver then issue the reset request.
9a1cddd6 5572 */
5573bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5574{
5575 int i;
9a1cddd6 5576
5577 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5578 struct amdgpu_ring *ring = adev->rings[i];
5579
9749c868 5580 if (!amdgpu_ring_sched_ready(ring))
9a1cddd6 5581 continue;
5582
0859eb54 5583 if (amdgpu_fence_count_emitted(ring))
9a1cddd6 5584 return true;
5585 }
5586 return false;
5587}
5588
12938fad
CK
5589/**
5590 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5591 *
982a820b 5592 * @adev: amdgpu_device pointer
12938fad
CK
5593 *
5594 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5595 * a hung GPU.
5596 */
5597bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5598{
12938fad 5599
3ba7b418
AG
5600 if (amdgpu_gpu_recovery == 0)
5601 goto disabled;
5602
1a11a65d
YC
5603 /* Skip soft reset check in fatal error mode */
5604 if (!amdgpu_ras_is_poison_mode_supported(adev))
5605 return true;
5606
3ba7b418
AG
5607 if (amdgpu_sriov_vf(adev))
5608 return true;
5609
5610 if (amdgpu_gpu_recovery == -1) {
5611 switch (adev->asic_type) {
b3523c45
AD
5612#ifdef CONFIG_DRM_AMDGPU_SI
5613 case CHIP_VERDE:
5614 case CHIP_TAHITI:
5615 case CHIP_PITCAIRN:
5616 case CHIP_OLAND:
5617 case CHIP_HAINAN:
5618#endif
5619#ifdef CONFIG_DRM_AMDGPU_CIK
5620 case CHIP_KAVERI:
5621 case CHIP_KABINI:
5622 case CHIP_MULLINS:
5623#endif
5624 case CHIP_CARRIZO:
5625 case CHIP_STONEY:
5626 case CHIP_CYAN_SKILLFISH:
3ba7b418 5627 goto disabled;
b3523c45
AD
5628 default:
5629 break;
3ba7b418 5630 }
12938fad
CK
5631 }
5632
5633 return true;
3ba7b418
AG
5634
5635disabled:
aac89168 5636 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 5637 return false;
12938fad
CK
5638}
5639
5c03e584
FX
5640int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5641{
47fc644f
SS
5642 u32 i;
5643 int ret = 0;
5c03e584 5644
6e8ca38e
LL
5645 if (adev->bios)
5646 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 5647
47fc644f 5648 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 5649
e779af8e
LL
5650 /* Cache the state before bus master disable. The saved config space
5651 * values are used in other cases like restore after mode-2 reset.
5652 */
5653 amdgpu_device_cache_pci_state(adev->pdev);
5654
47fc644f
SS
5655 /* disable BM */
5656 pci_clear_master(adev->pdev);
5c03e584 5657
47fc644f
SS
5658 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5659 dev_info(adev->dev, "GPU smu mode1 reset\n");
5660 ret = amdgpu_dpm_mode1_reset(adev);
5661 } else {
5662 dev_info(adev->dev, "GPU psp mode1 reset\n");
5663 ret = psp_gpu_reset(adev);
5664 }
5c03e584 5665
47fc644f 5666 if (ret)
7d442437 5667 goto mode1_reset_failed;
5c03e584 5668
47fc644f 5669 amdgpu_device_load_pci_state(adev->pdev);
7656168a
LL
5670 ret = amdgpu_psp_wait_for_bootloader(adev);
5671 if (ret)
7d442437 5672 goto mode1_reset_failed;
5c03e584 5673
47fc644f
SS
5674 /* wait for asic to come out of reset */
5675 for (i = 0; i < adev->usec_timeout; i++) {
5676 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 5677
47fc644f
SS
5678 if (memsize != 0xffffffff)
5679 break;
5680 udelay(1);
5681 }
5c03e584 5682
7d442437
HZ
5683 if (i >= adev->usec_timeout) {
5684 ret = -ETIMEDOUT;
5685 goto mode1_reset_failed;
5686 }
5687
6e8ca38e
LL
5688 if (adev->bios)
5689 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
7656168a 5690
7d442437
HZ
5691 return 0;
5692
5693mode1_reset_failed:
5694 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 5695 return ret;
5c03e584 5696}
5c6dd71e 5697
8ba904f5
CS
5698int amdgpu_device_link_reset(struct amdgpu_device *adev)
5699{
5700 int ret = 0;
5701
5702 dev_info(adev->dev, "GPU link reset\n");
5703
5704 if (!adev->pcie_reset_ctx.occurs_dpc)
5705 ret = amdgpu_dpm_link_reset(adev);
5706
5707 if (ret)
5708 goto link_reset_failed;
5709
5710 ret = amdgpu_psp_wait_for_bootloader(adev);
5711 if (ret)
5712 goto link_reset_failed;
5713
5714 return 0;
5715
5716link_reset_failed:
5717 dev_err(adev->dev, "GPU link reset failed\n");
5718 return ret;
5719}
5720
e3c1b071 5721int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 5722 struct amdgpu_reset_context *reset_context)
26bc5340 5723{
5c1e6fa4 5724 int i, r = 0;
04442bf7 5725 struct amdgpu_job *job = NULL;
13d8850a 5726 struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
04442bf7
LL
5727 bool need_full_reset =
5728 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5729
5730 if (reset_context->reset_req_dev == adev)
5731 job = reset_context->job;
71182665 5732
f83cec3b
VS
5733 if (amdgpu_sriov_vf(adev))
5734 amdgpu_virt_pre_reset(adev);
b602ca5f 5735
9e225fb9
AG
5736 amdgpu_fence_driver_isr_toggle(adev, true);
5737
71182665 5738 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
5739 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5740 struct amdgpu_ring *ring = adev->rings[i];
5741
9749c868 5742 if (!amdgpu_ring_sched_ready(ring))
0875dc9e 5743 continue;
5740682e 5744
b8920e1e
SS
5745 /* Clear job fence from fence drv to avoid force_completion
5746 * leave NULL and vm flush fence in fence drv
5747 */
5c1e6fa4 5748 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5749
2f9d4084
ML
5750 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5751 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5752 }
d38ceaf9 5753
9e225fb9
AG
5754 amdgpu_fence_driver_isr_toggle(adev, false);
5755
ff99849b 5756 if (job && job->vm)
222b5f04
AG
5757 drm_sched_increase_karma(&job->base);
5758
04442bf7 5759 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5760 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5761 if (r == -EOPNOTSUPP)
404b277b
LL
5762 r = 0;
5763 else
04442bf7
LL
5764 return r;
5765
1d721ed6 5766 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5767 if (!amdgpu_sriov_vf(adev)) {
5768
5769 if (!need_full_reset)
5770 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5771
360cd081
LG
5772 if (!need_full_reset && amdgpu_gpu_recovery &&
5773 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5774 amdgpu_device_ip_pre_soft_reset(adev);
5775 r = amdgpu_device_ip_soft_reset(adev);
5776 amdgpu_device_ip_post_soft_reset(adev);
5777 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5778 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5779 need_full_reset = true;
5780 }
5781 }
5782
13d8850a 5783 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
13d8850a
SK
5784 dev_info(tmp_adev->dev, "Dumping IP State\n");
5785 /* Trigger ip dump before we reset the asic */
5786 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5787 if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5788 tmp_adev->ip_blocks[i].version->funcs
fa73462d 5789 ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
13d8850a
SK
5790 dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5791 }
5792
26bc5340
AG
5793 if (need_full_reset)
5794 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5795 if (need_full_reset)
5796 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5797 else
5798 clear_bit(AMDGPU_NEED_FULL_RESET,
5799 &reset_context->flags);
26bc5340
AG
5800 }
5801
5802 return r;
5803}
5804
6e37ae8b 5805int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
26bc5340 5806{
6e37ae8b
LL
5807 struct list_head *device_list_handle;
5808 bool full_reset, vram_lost = false;
5809 struct amdgpu_device *tmp_adev;
a86e0c0e 5810 int r, init_level;
ce316fa5 5811
6e37ae8b 5812 device_list_handle = reset_context->reset_device_list;
26bc5340 5813
6e37ae8b
LL
5814 if (!device_list_handle)
5815 return -EINVAL;
43c4d576 5816
6e37ae8b 5817 full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
00eaa571 5818
a86e0c0e
LL
5819 /**
5820 * If it's reset on init, it's default init level, otherwise keep level
5821 * as recovery level.
5822 */
5823 if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5824 init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5825 else
5826 init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5827
6e37ae8b 5828 r = 0;
655ce9cb 5829 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
a86e0c0e 5830 amdgpu_set_init_level(tmp_adev, init_level);
6e37ae8b 5831 if (full_reset) {
26bc5340 5832 /* post card */
e1ee2111 5833 amdgpu_ras_clear_err_state(tmp_adev);
e3c1b071 5834 r = amdgpu_device_asic_init(tmp_adev);
5835 if (r) {
aac89168 5836 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5837 } else {
26bc5340 5838 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5839
26bc5340
AG
5840 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5841 if (r)
5842 goto out;
5843
5844 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785 5845
ea137071 5846 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
6122f5c7 5847 amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
a7691785 5848
26bc5340 5849 if (vram_lost) {
a3e510fd
LL
5850 dev_info(
5851 tmp_adev->dev,
5852 "VRAM is lost due to GPU reset!\n");
e3526257 5853 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5854 }
5855
26bc5340
AG
5856 r = amdgpu_device_fw_loading(tmp_adev);
5857 if (r)
5858 return r;
5859
c45e38f2
LL
5860 r = amdgpu_xcp_restore_partition_mode(
5861 tmp_adev->xcp_mgr);
5862 if (r)
5863 goto out;
5864
26bc5340
AG
5865 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5866 if (r)
5867 goto out;
5868
b7043800
AD
5869 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5870 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5871
73dae652
AD
5872 r = amdgpu_device_ip_resume_phase3(tmp_adev);
5873 if (r)
5874 goto out;
5875
26bc5340
AG
5876 if (vram_lost)
5877 amdgpu_device_fill_reset_magic(tmp_adev);
5878
fdafb359
EQ
5879 /*
5880 * Add this ASIC as tracked as reset was already
5881 * complete successfully.
5882 */
5883 amdgpu_register_gpu_instance(tmp_adev);
5884
04442bf7
LL
5885 if (!reset_context->hive &&
5886 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5887 amdgpu_xgmi_add_device(tmp_adev);
5888
7c04ca50 5889 r = amdgpu_device_ip_late_init(tmp_adev);
5890 if (r)
5891 goto out;
5892
4cf50bae 5893 drm_client_dev_resume(adev_to_drm(tmp_adev), false);
565d1941 5894
e8fbaf03
GC
5895 /*
5896 * The GPU enters bad state once faulty pages
5897 * by ECC has reached the threshold, and ras
5898 * recovery is scheduled next. So add one check
5899 * here to break recovery if it indeed exceeds
5900 * bad page threshold, and remind user to
5901 * retire this GPU or setting one bigger
5902 * bad_page_threshold value to fix this once
5903 * probing driver again.
5904 */
dd3e2962 5905 if (!amdgpu_ras_is_rma(tmp_adev)) {
e8fbaf03
GC
5906 /* must succeed. */
5907 amdgpu_ras_resume(tmp_adev);
5908 } else {
5909 r = -EINVAL;
5910 goto out;
5911 }
e79a04d5 5912
26bc5340 5913 /* Update PSP FW topology after reset */
04442bf7
LL
5914 if (reset_context->hive &&
5915 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5916 r = amdgpu_xgmi_update_topology(
5917 reset_context->hive, tmp_adev);
26bc5340
AG
5918 }
5919 }
5920
26bc5340
AG
5921out:
5922 if (!r) {
a86e0c0e
LL
5923 /* IP init is complete now, set level as default */
5924 amdgpu_set_init_level(tmp_adev,
5925 AMDGPU_INIT_LEVEL_DEFAULT);
26bc5340
AG
5926 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5927 r = amdgpu_ib_ring_tests(tmp_adev);
5928 if (r) {
5929 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5930 r = -EAGAIN;
5931 goto end;
5932 }
5933 }
5934
7181faaa 5935 if (r)
26bc5340
AG
5936 tmp_adev->asic_reset_res = r;
5937 }
5938
5939end:
6e37ae8b
LL
5940 return r;
5941}
5942
5943int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5944 struct amdgpu_reset_context *reset_context)
5945{
5946 struct amdgpu_device *tmp_adev = NULL;
5947 bool need_full_reset, skip_hw_reset;
5948 int r = 0;
5949
5950 /* Try reset handler method first */
5951 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5952 reset_list);
5953
5954 reset_context->reset_device_list = device_list_handle;
5955 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5956 /* If reset handler not implemented, continue; otherwise return */
5957 if (r == -EOPNOTSUPP)
5958 r = 0;
5959 else
5960 return r;
5961
5962 /* Reset handler not implemented, use the default method */
5963 need_full_reset =
5964 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5965 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5966
5967 /*
5968 * ASIC reset has to be done on all XGMI hive nodes ASAP
5969 * to allow proper links negotiation in FW (within 1 sec)
5970 */
5971 if (!skip_hw_reset && need_full_reset) {
5972 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5973 /* For XGMI run all resets in parallel to speed up the process */
5974 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5975 if (!queue_work(system_unbound_wq,
5976 &tmp_adev->xgmi_reset_work))
5977 r = -EALREADY;
5978 } else
5979 r = amdgpu_asic_reset(tmp_adev);
5980
5981 if (r) {
5982 dev_err(tmp_adev->dev,
5983 "ASIC reset failed with error, %d for drm dev, %s",
5984 r, adev_to_drm(tmp_adev)->unique);
5985 goto out;
5986 }
5987 }
5988
5989 /* For XGMI wait for all resets to complete before proceed */
5990 if (!r) {
5991 list_for_each_entry(tmp_adev, device_list_handle,
5992 reset_list) {
5993 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5994 flush_work(&tmp_adev->xgmi_reset_work);
5995 r = tmp_adev->asic_reset_res;
5996 if (r)
5997 break;
5998 }
5999 }
6000 }
6001 }
6002
6003 if (!r && amdgpu_ras_intr_triggered()) {
6004 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6005 amdgpu_ras_reset_error_count(tmp_adev,
6006 AMDGPU_RAS_BLOCK__MMHUB);
6007 }
6008
6009 amdgpu_ras_intr_cleared();
6010 }
6011
6012 r = amdgpu_device_reinit_after_reset(reset_context);
6013 if (r == -EAGAIN)
04442bf7
LL
6014 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6015 else
6016 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6e37ae8b
LL
6017
6018out:
26bc5340
AG
6019 return r;
6020}
6021
e923be99 6022static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 6023{
5740682e 6024
a3a09142
AD
6025 switch (amdgpu_asic_reset_method(adev)) {
6026 case AMD_RESET_METHOD_MODE1:
8ba904f5 6027 case AMD_RESET_METHOD_LINK:
a3a09142
AD
6028 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
6029 break;
6030 case AMD_RESET_METHOD_MODE2:
6031 adev->mp1_state = PP_MP1_STATE_RESET;
6032 break;
6033 default:
6034 adev->mp1_state = PP_MP1_STATE_NONE;
6035 break;
6036 }
26bc5340 6037}
d38ceaf9 6038
e923be99 6039static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 6040{
89041940 6041 amdgpu_vf_error_trans_all(adev);
a3a09142 6042 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
6043}
6044
3f12acc8
EQ
6045static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
6046{
6047 struct pci_dev *p = NULL;
6048
6049 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
6050 adev->pdev->bus->number, 1);
6051 if (p) {
6052 pm_runtime_enable(&(p->dev));
6053 pm_runtime_resume(&(p->dev));
6054 }
b85e285e
YY
6055
6056 pci_dev_put(p);
3f12acc8
EQ
6057}
6058
6059static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
6060{
6061 enum amd_reset_method reset_method;
6062 struct pci_dev *p = NULL;
6063 u64 expires;
6064
6065 /*
6066 * For now, only BACO and mode1 reset are confirmed
6067 * to suffer the audio issue without proper suspended.
6068 */
6069 reset_method = amdgpu_asic_reset_method(adev);
6070 if ((reset_method != AMD_RESET_METHOD_BACO) &&
6071 (reset_method != AMD_RESET_METHOD_MODE1))
6072 return -EINVAL;
6073
6074 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
6075 adev->pdev->bus->number, 1);
6076 if (!p)
6077 return -ENODEV;
6078
6079 expires = pm_runtime_autosuspend_expiration(&(p->dev));
6080 if (!expires)
6081 /*
6082 * If we cannot get the audio device autosuspend delay,
6083 * a fixed 4S interval will be used. Considering 3S is
6084 * the audio controller default autosuspend delay setting.
6085 * 4S used here is guaranteed to cover that.
6086 */
54b7feb9 6087 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
6088
6089 while (!pm_runtime_status_suspended(&(p->dev))) {
6090 if (!pm_runtime_suspend(&(p->dev)))
6091 break;
6092
6093 if (expires < ktime_get_mono_fast_ns()) {
6094 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 6095 pci_dev_put(p);
3f12acc8
EQ
6096 /* TODO: abort the succeeding gpu reset? */
6097 return -ETIMEDOUT;
6098 }
6099 }
6100
6101 pm_runtime_disable(&(p->dev));
6102
b85e285e 6103 pci_dev_put(p);
3f12acc8
EQ
6104 return 0;
6105}
6106
d193b12b 6107static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
6108{
6109 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
6110
6111#if defined(CONFIG_DEBUG_FS)
6112 if (!amdgpu_sriov_vf(adev))
6113 cancel_work(&adev->reset_work);
6114#endif
6115
6116 if (adev->kfd.dev)
6117 cancel_work(&adev->kfd.reset_work);
6118
6119 if (amdgpu_sriov_vf(adev))
6120 cancel_work(&adev->virt.flr_work);
6121
6122 if (con && adev->ras_enabled)
6123 cancel_work(&con->recovery_work);
6124
6125}
6126
dfe9c3cd
LL
6127static int amdgpu_device_health_check(struct list_head *device_list_handle)
6128{
6129 struct amdgpu_device *tmp_adev;
6130 int ret = 0;
dfe9c3cd
LL
6131
6132 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
04141c05 6133 ret |= amdgpu_device_bus_status_check(tmp_adev);
dfe9c3cd
LL
6134 }
6135
6136 return ret;
6137}
6138
785c536c
LL
6139static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
6140 struct list_head *device_list,
6141 struct amdgpu_hive_info *hive)
26bc5340 6142{
26bc5340 6143 struct amdgpu_device *tmp_adev = NULL;
785c536c 6144 int r;
26bc5340 6145
9e94d22c
EQ
6146 /*
6147 * Build list of devices to reset.
6148 * In case we are in XGMI hive mode, resort the device list
6149 * to put adev in the 1st position.
6150 */
b1f7810b 6151 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
83d29a5f 6152 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
11bb3376 6153 list_add_tail(&tmp_adev->reset_list, device_list);
087a3e13 6154 if (adev->shutdown)
83d29a5f 6155 tmp_adev->shutdown = true;
8ba904f5
CS
6156 if (adev->pcie_reset_ctx.occurs_dpc)
6157 tmp_adev->pcie_reset_ctx.in_link_reset = true;
83d29a5f 6158 }
11bb3376
CS
6159 if (!list_is_first(&adev->reset_list, device_list))
6160 list_rotate_to_front(&adev->reset_list, device_list);
26bc5340 6161 } else {
11bb3376 6162 list_add_tail(&adev->reset_list, device_list);
26bc5340
AG
6163 }
6164
8ba904f5 6165 if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
785c536c 6166 r = amdgpu_device_health_check(device_list);
dfe9c3cd 6167 if (r)
11bb3376 6168 return r;
dfe9c3cd
LL
6169 }
6170
785c536c
LL
6171 return 0;
6172}
6173
6174static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
6175 struct list_head *device_list)
6176{
6177 struct amdgpu_device *tmp_adev = NULL;
6178
6179 if (list_empty(device_list))
6180 return;
6181 tmp_adev =
6182 list_first_entry(device_list, struct amdgpu_device, reset_list);
3675c2f2 6183 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
785c536c 6184}
e923be99 6185
785c536c
LL
6186static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
6187 struct list_head *device_list)
6188{
6189 struct amdgpu_device *tmp_adev = NULL;
f287a3c5 6190
785c536c
LL
6191 if (list_empty(device_list))
6192 return;
6193 tmp_adev =
6194 list_first_entry(device_list, struct amdgpu_device, reset_list);
6195 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
6196}
6197
3b3afba4
CS
6198static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
6199 struct amdgpu_job *job,
6200 struct amdgpu_reset_context *reset_context,
6201 struct list_head *device_list,
6202 struct amdgpu_hive_info *hive,
6203 bool need_emergency_restart)
785c536c
LL
6204{
6205 struct amdgpu_device *tmp_adev = NULL;
3b3afba4 6206 int i;
785c536c
LL
6207
6208 /* block all schedulers and reset given job's ring */
6209 list_for_each_entry(tmp_adev, device_list, reset_list) {
e923be99 6210 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 6211
3f12acc8
EQ
6212 /*
6213 * Try to put the audio codec into suspend state
6214 * before gpu reset started.
6215 *
6216 * Due to the power domain of the graphics device
6217 * is shared with AZ power domain. Without this,
6218 * we may change the audio hardware from behind
6219 * the audio driver's back. That will trigger
6220 * some audio codec errors.
6221 */
6222 if (!amdgpu_device_suspend_display_audio(tmp_adev))
11bb3376 6223 tmp_adev->pcie_reset_ctx.audio_suspended = true;
3f12acc8 6224
9e94d22c
EQ
6225 amdgpu_ras_set_error_query_ready(tmp_adev, false);
6226
52fb44cf
EQ
6227 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
6228
dbe2c4c8 6229 amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
9e94d22c 6230
12ffa55d 6231 /*
a567db80 6232 * Mark these ASICs to be reset as untracked first
12ffa55d
AG
6233 * And add them back after reset completed
6234 */
6235 amdgpu_unregister_gpu_instance(tmp_adev);
6236
4cf50bae 6237 drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
565d1941 6238
f1c1314b 6239 /* disable ras on ALL IPs */
bb5c7235 6240 if (!need_emergency_restart &&
8ba904f5 6241 (!adev->pcie_reset_ctx.occurs_dpc) &&
b823821f 6242 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 6243 amdgpu_ras_suspend(tmp_adev);
6244
1d721ed6
AG
6245 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6246 struct amdgpu_ring *ring = tmp_adev->rings[i];
6247
9749c868 6248 if (!amdgpu_ring_sched_ready(ring))
1d721ed6
AG
6249 continue;
6250
0b2d2c2e 6251 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 6252
bb5c7235 6253 if (need_emergency_restart)
7c6e68c7 6254 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 6255 }
8f8c80f4 6256 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6 6257 }
11bb3376 6258}
7c6e68c7 6259
11bb3376
CS
6260static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
6261 struct list_head *device_list,
6262 struct amdgpu_reset_context *reset_context)
6263{
6264 struct amdgpu_device *tmp_adev = NULL;
6265 int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
6266 int r = 0;
1d721ed6 6267
26bc5340 6268retry: /* Rest of adevs pre asic reset from XGMI hive. */
11bb3376 6269 list_for_each_entry(tmp_adev, device_list, reset_list) {
8ba904f5
CS
6270 if (adev->pcie_reset_ctx.occurs_dpc)
6271 tmp_adev->no_hw_access = true;
f1549c09 6272 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
8ba904f5
CS
6273 if (adev->pcie_reset_ctx.occurs_dpc)
6274 tmp_adev->no_hw_access = false;
26bc5340
AG
6275 /*TODO Should we stop ?*/
6276 if (r) {
aac89168 6277 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 6278 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
6279 tmp_adev->asic_reset_res = r;
6280 }
6281 }
6282
6283 /* Actual ASIC resets if needed.*/
4f30d920 6284 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340 6285 if (amdgpu_sriov_vf(adev)) {
086809c8
EP
6286
6287 /* Bail out of reset early */
6288 if (amdgpu_ras_is_rma(adev))
6289 return -ENODEV;
6290
cbda2758
VC
6291 if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
6292 dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
6293 amdgpu_ras_set_fed(adev, true);
6294 set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
6295 }
6296
25c01191 6297 r = amdgpu_device_reset_sriov(adev, reset_context);
6e4aa08f
YL
6298 if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
6299 amdgpu_virt_release_full_gpu(adev, true);
6300 goto retry;
6301 }
26bc5340
AG
6302 if (r)
6303 adev->asic_reset_res = r;
6304 } else {
11bb3376 6305 r = amdgpu_do_asic_reset(device_list, reset_context);
b98a1648 6306 if (r && r == -EAGAIN)
26bc5340
AG
6307 goto retry;
6308 }
6309
11bb3376 6310 list_for_each_entry(tmp_adev, device_list, reset_list) {
f4322b9f
YL
6311 /*
6312 * Drop any pending non scheduler resets queued before reset is done.
6313 * Any reset scheduled after this point would be valid. Scheduler resets
6314 * were already dropped during drm_sched_stop and no new ones can come
6315 * in before drm_sched_start.
6316 */
6317 amdgpu_device_stop_pending_resets(tmp_adev);
6318 }
6319
11bb3376
CS
6320 return r;
6321}
6322
6323static int amdgpu_device_sched_resume(struct list_head *device_list,
6324 struct amdgpu_reset_context *reset_context,
6325 bool job_signaled)
6326{
6327 struct amdgpu_device *tmp_adev = NULL;
6328 int i, r = 0;
1d721ed6 6329
26bc5340 6330 /* Post ASIC reset for all devs .*/
11bb3376 6331 list_for_each_entry(tmp_adev, device_list, reset_list) {
7c6e68c7 6332
1d721ed6
AG
6333 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6334 struct amdgpu_ring *ring = tmp_adev->rings[i];
6335
9749c868 6336 if (!amdgpu_ring_sched_ready(ring))
1d721ed6
AG
6337 continue;
6338
b2ef8087 6339 drm_sched_start(&ring->sched, 0);
1d721ed6
AG
6340 }
6341
b8920e1e 6342 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 6343 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 6344
7258fa31
SK
6345 if (tmp_adev->asic_reset_res)
6346 r = tmp_adev->asic_reset_res;
6347
1d721ed6 6348 tmp_adev->asic_reset_res = 0;
26bc5340
AG
6349
6350 if (r) {
b3a3c9a6
TZ
6351 /* bad news, how to tell it to userspace ?
6352 * for ras error, we should report GPU bad status instead of
6353 * reset failure
6354 */
6355 if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
6356 !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
6357 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
6358 atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
6359 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
6360 } else {
12ffa55d 6361 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
127ed492
LL
6362 if (amdgpu_acpi_smart_shift_update(tmp_adev,
6363 AMDGPU_SS_DEV_D0))
a3e510fd
LL
6364 dev_warn(tmp_adev->dev,
6365 "smart shift update failed\n");
26bc5340 6366 }
7c6e68c7 6367 }
26bc5340 6368
11bb3376
CS
6369 return r;
6370}
6371
6372static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
6373 struct list_head *device_list,
6374 bool need_emergency_restart)
6375{
6376 struct amdgpu_device *tmp_adev = NULL;
6377
6378 list_for_each_entry(tmp_adev, device_list, reset_list) {
428890a3 6379 /* unlock kfd: SRIOV would do it separately */
c004d44e 6380 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 6381 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 6382
6383 /* kfd_post_reset will do nothing if kfd device is not initialized,
6384 * need to bring up kfd here if it's not be initialized before
6385 */
6386 if (!adev->kfd.init_complete)
6387 amdgpu_amdkfd_device_init(adev);
6388
11bb3376 6389 if (tmp_adev->pcie_reset_ctx.audio_suspended)
3f12acc8 6390 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
6391
6392 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
6393
6394 amdgpu_ras_set_error_query_ready(tmp_adev, true);
11bb3376 6395
26bc5340 6396 }
11bb3376
CS
6397}
6398
6399
6400/**
6401 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
6402 *
6403 * @adev: amdgpu_device pointer
6404 * @job: which job trigger hang
6405 * @reset_context: amdgpu reset context pointer
6406 *
6407 * Attempt to reset the GPU if it has hung (all asics).
6408 * Attempt to do soft-reset or full-reset and reinitialize Asic
6409 * Returns 0 for success or an error on failure.
6410 */
6411
6412int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
6413 struct amdgpu_job *job,
6414 struct amdgpu_reset_context *reset_context)
6415{
6416 struct list_head device_list;
6417 bool job_signaled = false;
6418 struct amdgpu_hive_info *hive = NULL;
6419 int r = 0;
6420 bool need_emergency_restart = false;
6421
6422 /*
6423 * If it reaches here because of hang/timeout and a RAS error is
6424 * detected at the same time, let RAS recovery take care of it.
6425 */
6426 if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
6427 !amdgpu_sriov_vf(adev) &&
6428 reset_context->src != AMDGPU_RESET_SRC_RAS) {
6429 dev_dbg(adev->dev,
6430 "Gpu recovery from source: %d yielding to RAS error recovery handling",
6431 reset_context->src);
6432 return 0;
6433 }
6434
6435 /*
6436 * Special case: RAS triggered and full reset isn't supported
6437 */
6438 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
6439
6440 /*
6441 * Flush RAM to disk so that after reboot
6442 * the user can read log and see why the system rebooted.
6443 */
6444 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
6445 amdgpu_ras_get_context(adev)->reboot) {
a3e510fd 6446 dev_warn(adev->dev, "Emergency reboot.");
11bb3376
CS
6447
6448 ksys_sync_helper();
6449 emergency_restart();
6450 }
6451
6452 dev_info(adev->dev, "GPU %s begin!\n",
6453 need_emergency_restart ? "jobs stop":"reset");
6454
6455 if (!amdgpu_sriov_vf(adev))
6456 hive = amdgpu_get_xgmi_hive(adev);
6457 if (hive)
6458 mutex_lock(&hive->hive_lock);
6459
6460 reset_context->job = job;
6461 reset_context->hive = hive;
6462 INIT_LIST_HEAD(&device_list);
6463
785c536c
LL
6464 if (amdgpu_device_recovery_prepare(adev, &device_list, hive))
6465 goto end_reset;
6466
6467 /* We need to lock reset domain only once both for XGMI and single device */
6468 amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6469
3b3afba4
CS
6470 amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
6471 hive, need_emergency_restart);
11bb3376
CS
6472 if (need_emergency_restart)
6473 goto skip_sched_resume;
6474 /*
6475 * Must check guilty signal here since after this point all old
6476 * HW fences are force signaled.
6477 *
6478 * job->base holds a reference to parent fence
6479 */
ebe43542 6480 if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
11bb3376
CS
6481 job_signaled = true;
6482 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
6483 goto skip_hw_reset;
6484 }
6485
6486 r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
6487 if (r)
785c536c 6488 goto reset_unlock;
11bb3376
CS
6489skip_hw_reset:
6490 r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
6491 if (r)
785c536c 6492 goto reset_unlock;
11bb3376
CS
6493skip_sched_resume:
6494 amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
785c536c
LL
6495reset_unlock:
6496 amdgpu_device_recovery_put_reset_lock(adev, &device_list);
dfe9c3cd 6497end_reset:
9e94d22c 6498 if (hive) {
9e94d22c 6499 mutex_unlock(&hive->hive_lock);
d95e8e97 6500 amdgpu_put_xgmi_hive(hive);
9e94d22c 6501 }
26bc5340 6502
f287a3c5 6503 if (r)
26bc5340 6504 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
6505
6506 atomic_set(&adev->reset_domain->reset_res, r);
6fe52b63 6507
a72002cb
AA
6508 if (!r) {
6509 struct amdgpu_task_info *ti = NULL;
6510
6511 if (job)
6512 ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
6513
6514 drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
6515 ti ? &ti->task : NULL);
6516
6517 amdgpu_vm_put_task_info(ti);
6518 }
6fe52b63 6519
d38ceaf9
AD
6520 return r;
6521}
6522
466a7d11
ML
6523/**
6524 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6525 *
6526 * @adev: amdgpu_device pointer
6527 * @speed: pointer to the speed of the link
6528 * @width: pointer to the width of the link
6529 *
6530 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6531 * first physical partner to an AMD dGPU.
6532 * This will exclude any virtual switches and links.
6533 */
6534static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6535 enum pci_bus_speed *speed,
6536 enum pcie_link_width *width)
6537{
6538 struct pci_dev *parent = adev->pdev;
6539
6540 if (!speed || !width)
6541 return;
6542
6543 *speed = PCI_SPEED_UNKNOWN;
6544 *width = PCIE_LNK_WIDTH_UNKNOWN;
6545
87dfeb47
AD
6546 if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6547 while ((parent = pci_upstream_bridge(parent))) {
6548 /* skip upstream/downstream switches internal to dGPU*/
6549 if (parent->vendor == PCI_VENDOR_ID_ATI)
6550 continue;
6551 *speed = pcie_get_speed_cap(parent);
6552 *width = pcie_get_width_cap(parent);
6553 break;
6554 }
6555 } else {
6556 /* use the current speeds rather than max if switching is not supported */
6557 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
466a7d11
ML
6558 }
6559}
6560
757e8b95
AD
6561/**
6562 * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
6563 *
6564 * @adev: amdgpu_device pointer
6565 * @speed: pointer to the speed of the link
6566 * @width: pointer to the width of the link
6567 *
6568 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6569 * AMD dGPU which may be a virtual upstream bridge.
6570 */
6571static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
6572 enum pci_bus_speed *speed,
6573 enum pcie_link_width *width)
6574{
6575 struct pci_dev *parent = adev->pdev;
6576
6577 if (!speed || !width)
6578 return;
6579
6580 parent = pci_upstream_bridge(parent);
6581 if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
6582 /* use the upstream/downstream switches internal to dGPU */
6583 *speed = pcie_get_speed_cap(parent);
6584 *width = pcie_get_width_cap(parent);
6585 while ((parent = pci_upstream_bridge(parent))) {
6586 if (parent->vendor == PCI_VENDOR_ID_ATI) {
6587 /* use the upstream/downstream switches internal to dGPU */
6588 *speed = pcie_get_speed_cap(parent);
6589 *width = pcie_get_width_cap(parent);
6590 }
6591 }
6592 } else {
6593 /* use the device itself */
dc915275
SS
6594 *speed = pcie_get_speed_cap(adev->pdev);
6595 *width = pcie_get_width_cap(adev->pdev);
757e8b95
AD
6596 }
6597}
6598
e3ecdffa
AD
6599/**
6600 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6601 *
6602 * @adev: amdgpu_device pointer
6603 *
a567db80 6604 * Fetches and stores in the driver the PCIE capabilities (gen speed
e3ecdffa
AD
6605 * and lanes) of the slot the device is in. Handles APUs and
6606 * virtualized environments where PCIE config space may not be available.
6607 */
5494d864 6608static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 6609{
c5313457 6610 enum pci_bus_speed speed_cap, platform_speed_cap;
757e8b95 6611 enum pcie_link_width platform_link_width, link_width;
d0dd7f0c 6612
cd474ba0
AD
6613 if (amdgpu_pcie_gen_cap)
6614 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 6615
cd474ba0
AD
6616 if (amdgpu_pcie_lane_cap)
6617 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 6618
cd474ba0 6619 /* covers APUs as well */
04e85958 6620 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
6621 if (adev->pm.pcie_gen_mask == 0)
6622 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6623 if (adev->pm.pcie_mlw_mask == 0)
6624 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 6625 return;
cd474ba0 6626 }
d0dd7f0c 6627
c5313457
HK
6628 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6629 return;
6630
466a7d11
ML
6631 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6632 &platform_link_width);
757e8b95 6633 amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
c5313457 6634
cd474ba0 6635 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330 6636 /* asic caps */
5d9a6330
AD
6637 if (speed_cap == PCI_SPEED_UNKNOWN) {
6638 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
6639 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6640 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 6641 } else {
2b3a1f51
FX
6642 if (speed_cap == PCIE_SPEED_32_0GT)
6643 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6644 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6645 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6646 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6647 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6648 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
6649 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6650 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6651 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6652 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6653 else if (speed_cap == PCIE_SPEED_8_0GT)
6654 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6655 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6656 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6657 else if (speed_cap == PCIE_SPEED_5_0GT)
6658 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6659 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6660 else
6661 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6662 }
6663 /* platform caps */
c5313457 6664 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
6665 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6666 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6667 } else {
2b3a1f51
FX
6668 if (platform_speed_cap == PCIE_SPEED_32_0GT)
6669 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6670 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6671 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6672 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6673 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6674 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
6675 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6676 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6677 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6678 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 6679 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
6680 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6681 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6682 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 6683 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
6684 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6685 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6686 else
6687 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6688
cd474ba0
AD
6689 }
6690 }
6691 if (adev->pm.pcie_mlw_mask == 0) {
757e8b95
AD
6692 /* asic caps */
6693 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6694 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6695 } else {
6696 switch (link_width) {
6697 case PCIE_LNK_X32:
6698 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6699 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6700 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6701 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6702 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6703 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6704 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6705 break;
6706 case PCIE_LNK_X16:
6707 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6708 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6709 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6710 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6711 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6712 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6713 break;
6714 case PCIE_LNK_X12:
6715 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6716 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6717 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6718 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6719 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6720 break;
6721 case PCIE_LNK_X8:
6722 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6723 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6724 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6725 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6726 break;
6727 case PCIE_LNK_X4:
6728 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6729 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6730 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6731 break;
6732 case PCIE_LNK_X2:
6733 adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6734 CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6735 break;
6736 case PCIE_LNK_X1:
6737 adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6738 break;
6739 default:
6740 break;
6741 }
6742 }
6743 /* platform caps */
c5313457 6744 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
6745 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6746 } else {
c5313457 6747 switch (platform_link_width) {
5d9a6330 6748 case PCIE_LNK_X32:
757e8b95
AD
6749 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6750 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6751 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6752 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6753 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6754 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6755 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6756 break;
5d9a6330 6757 case PCIE_LNK_X16:
757e8b95
AD
6758 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6759 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6760 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6761 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6762 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6763 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6764 break;
5d9a6330 6765 case PCIE_LNK_X12:
757e8b95
AD
6766 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6767 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6768 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6769 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6770 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6771 break;
5d9a6330 6772 case PCIE_LNK_X8:
757e8b95
AD
6773 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6774 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6775 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6776 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6777 break;
5d9a6330 6778 case PCIE_LNK_X4:
757e8b95
AD
6779 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6780 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6781 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6782 break;
5d9a6330 6783 case PCIE_LNK_X2:
757e8b95
AD
6784 adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6785 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
cd474ba0 6786 break;
5d9a6330 6787 case PCIE_LNK_X1:
757e8b95 6788 adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
cd474ba0
AD
6789 break;
6790 default:
6791 break;
6792 }
d0dd7f0c
AD
6793 }
6794 }
6795}
d38ceaf9 6796
08a2fd23
RE
6797/**
6798 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6799 *
6800 * @adev: amdgpu_device pointer
6801 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6802 *
6803 * Return true if @peer_adev can access (DMA) @adev through the PCIe
6804 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6805 * @peer_adev.
6806 */
6807bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6808 struct amdgpu_device *peer_adev)
6809{
6810#ifdef CONFIG_HSA_AMD_P2P
bb66ecbf
LL
6811 bool p2p_access =
6812 !adev->gmc.xgmi.connected_to_cpu &&
6813 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
8e29057e
RE
6814 if (!p2p_access)
6815 dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6816 pci_name(peer_adev->pdev));
08a2fd23 6817
75f0efbc
RJ
6818 bool is_large_bar = adev->gmc.visible_vram_size &&
6819 adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6820 bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6821
6822 if (!p2p_addressable) {
6823 uint64_t address_mask = peer_adev->dev->dma_mask ?
6824 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6825 resource_size_t aper_limit =
6826 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6827
6828 p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6829 aper_limit & address_mask);
6830 }
28b0ef92 6831 return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
08a2fd23
RE
6832#else
6833 return false;
6834#endif
6835}
6836
127ed492 6837int amdgpu_device_baco_enter(struct amdgpu_device *adev)
361dbd01 6838{
7a22677b 6839 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 6840
127ed492 6841 if (!amdgpu_device_supports_baco(adev))
361dbd01
AD
6842 return -ENOTSUPP;
6843
8ab0d6f0 6844 if (ras && adev->ras_enabled &&
acdae216 6845 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6846 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6847
9530273e 6848 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
6849}
6850
127ed492 6851int amdgpu_device_baco_exit(struct amdgpu_device *adev)
361dbd01 6852{
7a22677b 6853 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 6854 int ret = 0;
361dbd01 6855
127ed492 6856 if (!amdgpu_device_supports_baco(adev))
361dbd01
AD
6857 return -ENOTSUPP;
6858
9530273e
EQ
6859 ret = amdgpu_dpm_baco_exit(adev);
6860 if (ret)
6861 return ret;
7a22677b 6862
8ab0d6f0 6863 if (ras && adev->ras_enabled &&
acdae216 6864 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6865 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6866
0cdb3f97 6867 if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
1bece222
CL
6868 adev->nbio.funcs->clear_doorbell_interrupt)
6869 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6870
7a22677b 6871 return 0;
361dbd01 6872}
c9a6b82f
AG
6873
6874/**
6875 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6876 * @pdev: PCI device struct
6877 * @state: PCI channel state
6878 *
6879 * Description: Called when a PCI error is detected.
6880 *
6881 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6882 */
6883pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6884{
6885 struct drm_device *dev = pci_get_drvdata(pdev);
6886 struct amdgpu_device *adev = drm_to_adev(dev);
8ba904f5
CS
6887 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
6888 struct amdgpu_reset_context reset_context;
6889 struct list_head device_list;
c9a6b82f 6890
8ba904f5 6891 dev_info(adev->dev, "PCI error: detected callback!!\n");
c9a6b82f 6892
8ba904f5
CS
6893 if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6894 dev_warn(adev->dev, "No support for XGMI hive yet...\n");
6894305c
AG
6895 return PCI_ERS_RESULT_DISCONNECT;
6896 }
6897
e17e27f9
GC
6898 adev->pci_channel_state = state;
6899
c9a6b82f
AG
6900 switch (state) {
6901 case pci_channel_io_normal:
8ba904f5 6902 dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
c9a6b82f 6903 return PCI_ERS_RESULT_CAN_RECOVER;
8a11d283 6904 case pci_channel_io_frozen:
8ba904f5
CS
6905 /* Fatal error, prepare for slot reset */
6906 dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6907
6908 if (hive)
6909 mutex_lock(&hive->hive_lock);
6910 adev->pcie_reset_ctx.occurs_dpc = true;
6911 memset(&reset_context, 0, sizeof(reset_context));
6912 INIT_LIST_HEAD(&device_list);
6913
785c536c
LL
6914 amdgpu_device_recovery_prepare(adev, &device_list, hive);
6915 amdgpu_device_recovery_get_reset_lock(adev, &device_list);
3b3afba4
CS
6916 amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6917 hive, false);
8ba904f5
CS
6918 if (hive) {
6919 mutex_unlock(&hive->hive_lock);
6920 amdgpu_put_xgmi_hive(hive);
acd89fca 6921 }
c9a6b82f
AG
6922 return PCI_ERS_RESULT_NEED_RESET;
6923 case pci_channel_io_perm_failure:
6924 /* Permanent error, prepare for device removal */
8ba904f5 6925 dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
c9a6b82f
AG
6926 return PCI_ERS_RESULT_DISCONNECT;
6927 }
6928
6929 return PCI_ERS_RESULT_NEED_RESET;
6930}
6931
6932/**
6933 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6934 * @pdev: pointer to PCI device
6935 */
6936pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6937{
8ba904f5
CS
6938 struct drm_device *dev = pci_get_drvdata(pdev);
6939 struct amdgpu_device *adev = drm_to_adev(dev);
c9a6b82f 6940
8ba904f5 6941 dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
c9a6b82f
AG
6942
6943 /* TODO - dump whatever for debugging purposes */
6944
6945 /* This called only if amdgpu_pci_error_detected returns
6946 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6947 * works, no need to reset slot.
6948 */
6949
6950 return PCI_ERS_RESULT_RECOVERED;
6951}
6952
6953/**
6954 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6955 * @pdev: PCI device struct
6956 *
6957 * Description: This routine is called by the pci error recovery
6958 * code after the PCI slot has been reset, just before we
6959 * should resume normal operations.
6960 */
6961pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6962{
6963 struct drm_device *dev = pci_get_drvdata(pdev);
6964 struct amdgpu_device *adev = drm_to_adev(dev);
04442bf7 6965 struct amdgpu_reset_context reset_context;
732c6cef
CS
6966 struct amdgpu_device *tmp_adev;
6967 struct amdgpu_hive_info *hive;
7ac71382 6968 struct list_head device_list;
8ba904f5
CS
6969 int r = 0, i;
6970 u32 memsize;
601429cc
SY
6971
6972 /* PCI error slot reset should be skipped During RAS recovery */
5f571c61 6973 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
7e437167
TZ
6974 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6975 amdgpu_ras_in_recovery(adev))
601429cc 6976 return PCI_ERS_RESULT_RECOVERED;
c9a6b82f 6977
8ba904f5 6978 dev_info(adev->dev, "PCI error: slot reset callback!!\n");
c9a6b82f 6979
04442bf7
LL
6980 memset(&reset_context, 0, sizeof(reset_context));
6981
362c7b91 6982 /* wait for asic to come out of reset */
8ba904f5 6983 msleep(700);
362c7b91 6984
7ac71382 6985 /* Restore PCI confspace */
c1dd4aa6 6986 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6987
362c7b91
AG
6988 /* confirm ASIC came out of reset */
6989 for (i = 0; i < adev->usec_timeout; i++) {
6990 memsize = amdgpu_asic_get_config_memsize(adev);
6991
6992 if (memsize != 0xffffffff)
6993 break;
6994 udelay(1);
6995 }
6996 if (memsize == 0xffffffff) {
6997 r = -ETIME;
6998 goto out;
6999 }
7000
04442bf7
LL
7001 reset_context.method = AMD_RESET_METHOD_NONE;
7002 reset_context.reset_req_dev = adev;
7003 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
8ba904f5
CS
7004 set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
7005 INIT_LIST_HEAD(&device_list);
c9a6b82f 7006
8ba904f5
CS
7007 hive = amdgpu_get_xgmi_hive(adev);
7008 if (hive) {
7009 mutex_lock(&hive->hive_lock);
7010 reset_context.hive = hive;
7011 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7012 tmp_adev->pcie_reset_ctx.in_link_reset = true;
7013 list_add_tail(&tmp_adev->reset_list, &device_list);
7014 }
7015 } else {
7016 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
7017 list_add_tail(&adev->reset_list, &device_list);
7018 }
c9a6b82f 7019
8ba904f5 7020 r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
c9a6b82f 7021out:
c9a6b82f 7022 if (!r) {
c1dd4aa6
AG
7023 if (amdgpu_device_cache_pci_state(adev->pdev))
7024 pci_restore_state(adev->pdev);
8ba904f5 7025 dev_info(adev->dev, "PCIe error recovery succeeded\n");
c9a6b82f 7026 } else {
8ba904f5 7027 dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
732c6cef 7028 if (hive) {
8ba904f5
CS
7029 list_for_each_entry(tmp_adev, &device_list, reset_list)
7030 amdgpu_device_unset_mp1_state(tmp_adev);
8ba904f5 7031 }
785c536c 7032 amdgpu_device_recovery_put_reset_lock(adev, &device_list);
8ba904f5
CS
7033 }
7034
7035 if (hive) {
7036 mutex_unlock(&hive->hive_lock);
7037 amdgpu_put_xgmi_hive(hive);
c9a6b82f
AG
7038 }
7039
7040 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
7041}
7042
7043/**
7044 * amdgpu_pci_resume() - resume normal ops after PCI reset
7045 * @pdev: pointer to PCI device
7046 *
7047 * Called when the error recovery driver tells us that its
505199a3 7048 * OK to resume normal operation.
c9a6b82f
AG
7049 */
7050void amdgpu_pci_resume(struct pci_dev *pdev)
7051{
7052 struct drm_device *dev = pci_get_drvdata(pdev);
7053 struct amdgpu_device *adev = drm_to_adev(dev);
8ba904f5
CS
7054 struct list_head device_list;
7055 struct amdgpu_hive_info *hive = NULL;
7056 struct amdgpu_device *tmp_adev = NULL;
c9a6b82f 7057
8ba904f5 7058 dev_info(adev->dev, "PCI error: resume callback!!\n");
acd89fca 7059
e17e27f9
GC
7060 /* Only continue execution for the case of pci_channel_io_frozen */
7061 if (adev->pci_channel_state != pci_channel_io_frozen)
7062 return;
7063
8ba904f5 7064 INIT_LIST_HEAD(&device_list);
acd89fca 7065
8ba904f5
CS
7066 hive = amdgpu_get_xgmi_hive(adev);
7067 if (hive) {
7068 mutex_lock(&hive->hive_lock);
7069 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7070 tmp_adev->pcie_reset_ctx.in_link_reset = false;
7071 list_add_tail(&tmp_adev->reset_list, &device_list);
7072 }
7073 } else
7074 list_add_tail(&adev->reset_list, &device_list);
acd89fca 7075
8ba904f5
CS
7076 amdgpu_device_sched_resume(&device_list, NULL, NULL);
7077 amdgpu_device_gpu_resume(adev, &device_list, false);
785c536c 7078 amdgpu_device_recovery_put_reset_lock(adev, &device_list);
8ba904f5 7079 adev->pcie_reset_ctx.occurs_dpc = false;
acd89fca 7080
8ba904f5
CS
7081 if (hive) {
7082 mutex_unlock(&hive->hive_lock);
7083 amdgpu_put_xgmi_hive(hive);
7084 }
c9a6b82f 7085}
c1dd4aa6
AG
7086
7087bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
7088{
7089 struct drm_device *dev = pci_get_drvdata(pdev);
7090 struct amdgpu_device *adev = drm_to_adev(dev);
7091 int r;
7092
afe260df
VZ
7093 if (amdgpu_sriov_vf(adev))
7094 return false;
7095
c1dd4aa6
AG
7096 r = pci_save_state(pdev);
7097 if (!r) {
7098 kfree(adev->pci_state);
7099
7100 adev->pci_state = pci_store_saved_state(pdev);
7101
7102 if (!adev->pci_state) {
a3e510fd 7103 dev_err(adev->dev, "Failed to store PCI saved state");
c1dd4aa6
AG
7104 return false;
7105 }
7106 } else {
a3e510fd 7107 dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
c1dd4aa6
AG
7108 return false;
7109 }
7110
7111 return true;
7112}
7113
7114bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
7115{
7116 struct drm_device *dev = pci_get_drvdata(pdev);
7117 struct amdgpu_device *adev = drm_to_adev(dev);
7118 int r;
7119
7120 if (!adev->pci_state)
7121 return false;
7122
7123 r = pci_load_saved_state(pdev, adev->pci_state);
7124
7125 if (!r) {
7126 pci_restore_state(pdev);
7127 } else {
a3e510fd 7128 dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
c1dd4aa6
AG
7129 return false;
7130 }
7131
7132 return true;
7133}
7134
810085dd
EH
7135void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
7136 struct amdgpu_ring *ring)
7137{
7138#ifdef CONFIG_X86_64
b818a5d3 7139 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
7140 return;
7141#endif
7142 if (adev->gmc.xgmi.connected_to_cpu)
7143 return;
7144
7145 if (ring && ring->funcs->emit_hdp_flush)
7146 amdgpu_ring_emit_hdp_flush(ring);
7147 else
7148 amdgpu_asic_flush_hdp(adev, ring);
7149}
c1dd4aa6 7150
810085dd
EH
7151void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
7152 struct amdgpu_ring *ring)
7153{
7154#ifdef CONFIG_X86_64
b818a5d3 7155 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
7156 return;
7157#endif
7158 if (adev->gmc.xgmi.connected_to_cpu)
7159 return;
c1dd4aa6 7160
810085dd
EH
7161 amdgpu_asic_invalidate_hdp(adev, ring);
7162}
34f3a4a9 7163
89a7a870
AG
7164int amdgpu_in_reset(struct amdgpu_device *adev)
7165{
7166 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
7167}
7168
34f3a4a9
LY
7169/**
7170 * amdgpu_device_halt() - bring hardware to some kind of halt state
7171 *
7172 * @adev: amdgpu_device pointer
7173 *
7174 * Bring hardware to some kind of halt state so that no one can touch it
7175 * any more. It will help to maintain error context when error occurred.
7176 * Compare to a simple hang, the system will keep stable at least for SSH
7177 * access. Then it should be trivial to inspect the hardware state and
7178 * see what's going on. Implemented as following:
7179 *
7180 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
7181 * clears all CPU mappings to device, disallows remappings through page faults
7182 * 2. amdgpu_irq_disable_all() disables all interrupts
7183 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
7184 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
7185 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
7186 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
7187 * flush any in flight DMA operations
7188 */
7189void amdgpu_device_halt(struct amdgpu_device *adev)
7190{
7191 struct pci_dev *pdev = adev->pdev;
e0f943b4 7192 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 7193
2c1c7ba4 7194 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
7195 drm_dev_unplug(ddev);
7196
7197 amdgpu_irq_disable_all(adev);
7198
7199 amdgpu_fence_driver_hw_fini(adev);
7200
7201 adev->no_hw_access = true;
7202
7203 amdgpu_device_unmap_mmio(adev);
7204
7205 pci_disable_device(pdev);
7206 pci_wait_for_pending_transaction(pdev);
7207}
86700a40
XD
7208
7209u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
7210 u32 reg)
7211{
7212 unsigned long flags, address, data;
7213 u32 r;
7214
7215 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7216 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7217
7218 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7219 WREG32(address, reg * 4);
7220 (void)RREG32(address);
7221 r = RREG32(data);
7222 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7223 return r;
7224}
7225
7226void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
7227 u32 reg, u32 v)
7228{
7229 unsigned long flags, address, data;
7230
7231 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7232 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7233
7234 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7235 WREG32(address, reg * 4);
7236 (void)RREG32(address);
7237 WREG32(data, v);
7238 (void)RREG32(data);
7239 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
7240}
68ce8b24 7241
a6328c9c
CK
7242/**
7243 * amdgpu_device_get_gang - return a reference to the current gang
7244 * @adev: amdgpu_device pointer
7245 *
7246 * Returns: A new reference to the current gang leader.
7247 */
7248struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
7249{
7250 struct dma_fence *fence;
7251
7252 rcu_read_lock();
7253 fence = dma_fence_get_rcu_safe(&adev->gang_submit);
7254 rcu_read_unlock();
7255 return fence;
7256}
7257
68ce8b24
CK
7258/**
7259 * amdgpu_device_switch_gang - switch to a new gang
7260 * @adev: amdgpu_device pointer
7261 * @gang: the gang to switch to
7262 *
7263 * Try to switch to a new gang.
7264 * Returns: NULL if we switched to the new gang or a reference to the current
7265 * gang leader.
7266 */
7267struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
7268 struct dma_fence *gang)
7269{
7270 struct dma_fence *old = NULL;
7271
0d9a9509 7272 dma_fence_get(gang);
68ce8b24
CK
7273 do {
7274 dma_fence_put(old);
a6328c9c 7275 old = amdgpu_device_get_gang(adev);
68ce8b24
CK
7276 if (old == gang)
7277 break;
7278
0d9a9509
CK
7279 if (!dma_fence_is_signaled(old)) {
7280 dma_fence_put(gang);
68ce8b24 7281 return old;
0d9a9509 7282 }
68ce8b24
CK
7283
7284 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
7285 old, gang) != old);
7286
0d9a9509
CK
7287 /*
7288 * Drop it once for the exchanged reference in adev and once for the
7289 * thread local reference acquired in amdgpu_device_get_gang().
7290 */
7291 dma_fence_put(old);
68ce8b24
CK
7292 dma_fence_put(old);
7293 return NULL;
7294}
220c8cc8 7295
bd22e44a
CK
7296/**
7297 * amdgpu_device_enforce_isolation - enforce HW isolation
7298 * @adev: the amdgpu device pointer
7299 * @ring: the HW ring the job is supposed to run on
7300 * @job: the job which is about to be pushed to the HW ring
7301 *
7302 * Makes sure that only one client at a time can use the GFX block.
7303 * Returns: The dependency to wait on before the job can be pushed to the HW.
7304 * The function is called multiple times until NULL is returned.
7305 */
7306struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
7307 struct amdgpu_ring *ring,
7308 struct amdgpu_job *job)
7309{
7310 struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
7311 struct drm_sched_fence *f = job->base.s_fence;
7312 struct dma_fence *dep;
7313 void *owner;
7314 int r;
7315
7316 /*
7317 * For now enforce isolation only for the GFX block since we only need
7318 * the cleaner shader on those rings.
7319 */
7320 if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
7321 ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
7322 return NULL;
7323
7324 /*
7325 * All submissions where enforce isolation is false are handled as if
7326 * they come from a single client. Use ~0l as the owner to distinct it
7327 * from kernel submissions where the owner is NULL.
7328 */
7329 owner = job->enforce_isolation ? f->owner : (void *)~0l;
7330
7331 mutex_lock(&adev->enforce_isolation_mutex);
7332
7333 /*
7334 * The "spearhead" submission is the first one which changes the
7335 * ownership to its client. We always need to wait for it to be
7336 * pushed to the HW before proceeding with anything.
7337 */
7338 if (&f->scheduled != isolation->spearhead &&
7339 !dma_fence_is_signaled(isolation->spearhead)) {
7340 dep = isolation->spearhead;
7341 goto out_grab_ref;
7342 }
7343
7344 if (isolation->owner != owner) {
7345
7346 /*
7347 * Wait for any gang to be assembled before switching to a
7348 * different owner or otherwise we could deadlock the
7349 * submissions.
7350 */
7351 if (!job->gang_submit) {
7352 dep = amdgpu_device_get_gang(adev);
7353 if (!dma_fence_is_signaled(dep))
7354 goto out_return_dep;
7355 dma_fence_put(dep);
7356 }
7357
7358 dma_fence_put(isolation->spearhead);
7359 isolation->spearhead = dma_fence_get(&f->scheduled);
7360 amdgpu_sync_move(&isolation->active, &isolation->prev);
1bb1314d 7361 trace_amdgpu_isolation(isolation->owner, owner);
bd22e44a
CK
7362 isolation->owner = owner;
7363 }
7364
7365 /*
7366 * Specifying the ring here helps to pipeline submissions even when
7367 * isolation is enabled. If that is not desired for testing NULL can be
7368 * used instead of the ring to enforce a CPU round trip while switching
7369 * between clients.
7370 */
7371 dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
7372 r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
7373 if (r)
a3e510fd 7374 dev_warn(adev->dev, "OOM tracking isolation\n");
bd22e44a
CK
7375
7376out_grab_ref:
7377 dma_fence_get(dep);
7378out_return_dep:
7379 mutex_unlock(&adev->enforce_isolation_mutex);
7380 return dep;
7381}
7382
220c8cc8
AD
7383bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
7384{
7385 switch (adev->asic_type) {
7386#ifdef CONFIG_DRM_AMDGPU_SI
7387 case CHIP_HAINAN:
7388#endif
7389 case CHIP_TOPAZ:
7390 /* chips with no display hardware */
7391 return false;
7392#ifdef CONFIG_DRM_AMDGPU_SI
7393 case CHIP_TAHITI:
7394 case CHIP_PITCAIRN:
7395 case CHIP_VERDE:
7396 case CHIP_OLAND:
7397#endif
7398#ifdef CONFIG_DRM_AMDGPU_CIK
7399 case CHIP_BONAIRE:
7400 case CHIP_HAWAII:
7401 case CHIP_KAVERI:
7402 case CHIP_KABINI:
7403 case CHIP_MULLINS:
7404#endif
7405 case CHIP_TONGA:
7406 case CHIP_FIJI:
7407 case CHIP_POLARIS10:
7408 case CHIP_POLARIS11:
7409 case CHIP_POLARIS12:
7410 case CHIP_VEGAM:
7411 case CHIP_CARRIZO:
7412 case CHIP_STONEY:
7413 /* chips with display hardware */
7414 return true;
7415 default:
7416 /* IP discovery */
4e8303cf 7417 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
7418 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
7419 return false;
7420 return true;
7421 }
7422}
81283fee
JZ
7423
7424uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
7425 uint32_t inst, uint32_t reg_addr, char reg_name[],
7426 uint32_t expected_value, uint32_t mask)
7427{
7428 uint32_t ret = 0;
7429 uint32_t old_ = 0;
7430 uint32_t tmp_ = RREG32(reg_addr);
7431 uint32_t loop = adev->usec_timeout;
7432
7433 while ((tmp_ & (mask)) != (expected_value)) {
7434 if (old_ != tmp_) {
7435 loop = adev->usec_timeout;
7436 old_ = tmp_;
7437 } else
7438 udelay(1);
7439 tmp_ = RREG32(reg_addr);
7440 loop--;
7441 if (!loop) {
a3e510fd
LL
7442 dev_warn(
7443 adev->dev,
7444 "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
7445 inst, reg_name, (uint32_t)expected_value,
7446 (uint32_t)(tmp_ & (mask)));
81283fee
JZ
7447 ret = -ETIMEDOUT;
7448 break;
7449 }
7450 }
7451 return ret;
7452}
6c8d1f4b
J
7453
7454ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
7455{
7456 ssize_t size = 0;
7457
7458 if (!ring || !ring->adev)
7459 return size;
7460
7461 if (amdgpu_device_should_recover_gpu(ring->adev))
7462 size |= AMDGPU_RESET_TYPE_FULL;
7463
7464 if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
7465 !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
7466 size |= AMDGPU_RESET_TYPE_SOFT_RESET;
7467
7468 return size;
7469}
7470
7471ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
7472{
7473 ssize_t size = 0;
7474
7475 if (supported_reset == 0) {
7476 size += sysfs_emit_at(buf, size, "unsupported");
7477 size += sysfs_emit_at(buf, size, "\n");
7478 return size;
7479
7480 }
7481
7482 if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
7483 size += sysfs_emit_at(buf, size, "soft ");
7484
7485 if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
7486 size += sysfs_emit_at(buf, size, "queue ");
7487
7488 if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
7489 size += sysfs_emit_at(buf, size, "pipe ");
7490
7491 if (supported_reset & AMDGPU_RESET_TYPE_FULL)
7492 size += sysfs_emit_at(buf, size, "full ");
7493
7494 size += sysfs_emit_at(buf, size, "\n");
7495 return size;
7496}