drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/power_supply.h>
  29 #include <linux/kthread.h>
  30 #include <linux/module.h>
  31 #include <linux/console.h>
  32 #include <linux/slab.h>
  33 #include <linux/iommu.h>
  34 #include <linux/pci.h>
  35 #include <linux/devcoredump.h>
  36 #include <generated/utsrelease.h>
  37 #include <linux/pci-p2pdma.h>
  38 #include <linux/apple-gmux.h>
  39
  40 #include <drm/drm_aperture.h>
  41 #include <drm/drm_atomic_helper.h>
  42 #include <drm/drm_fb_helper.h>
  43 #include <drm/drm_probe_helper.h>
  44 #include <drm/amdgpu_drm.h>
  45 #include <linux/vgaarb.h>
  46 #include <linux/vga_switcheroo.h>
  47 #include <linux/efi.h>
  48 #include "amdgpu.h"
  49 #include "amdgpu_trace.h"
  50 #include "amdgpu_i2c.h"
  51 #include "atom.h"
  52 #include "amdgpu_atombios.h"
  53 #include "amdgpu_atomfirmware.h"
  54 #include "amd_pcie.h"
  55 #ifdef CONFIG_DRM_AMDGPU_SI
  56 #include "si.h"
  57 #endif
  58 #ifdef CONFIG_DRM_AMDGPU_CIK
  59 #include "cik.h"
  60 #endif
  61 #include "vi.h"
  62 #include "soc15.h"
  63 #include "nv.h"
  64 #include "bif/bif_4_1_d.h"
  65 #include <linux/firmware.h>
  66 #include "amdgpu_vf_error.h"
  67
  68 #include "amdgpu_amdkfd.h"
  69 #include "amdgpu_pm.h"
  70
  71 #include "amdgpu_xgmi.h"
  72 #include "amdgpu_ras.h"
  73 #include "amdgpu_pmu.h"
  74 #include "amdgpu_fru_eeprom.h"
  75 #include "amdgpu_reset.h"
  76
  77 #include <linux/suspend.h>
  78 #include <drm/task_barrier.h>
  79 #include <linux/pm_runtime.h>
  80
  81 #include <drm/drm_drv.h>
  82
  83 #if IS_ENABLED(CONFIG_X86)
  84 #include <asm/intel-family.h>
  85 #endif
  86
  87 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  88 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  89 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  90 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  91 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  92 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  93 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  94
  95 #define AMDGPU_RESUME_MS                2000
  96 #define AMDGPU_MAX_RETRY_LIMIT          2
  97 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
  98
  99 static const struct drm_driver amdgpu_kms_driver;
 100
 101 const char *amdgpu_asic_name[] = {
 102         "TAHITI",
 103         "PITCAIRN",
 104         "VERDE",
 105         "OLAND",
 106         "HAINAN",
 107         "BONAIRE",
 108         "KAVERI",
 109         "KABINI",
 110         "HAWAII",
 111         "MULLINS",
 112         "TOPAZ",
 113         "TONGA",
 114         "FIJI",
 115         "CARRIZO",
 116         "STONEY",
 117         "POLARIS10",
 118         "POLARIS11",
 119         "POLARIS12",
 120         "VEGAM",
 121         "VEGA10",
 122         "VEGA12",
 123         "VEGA20",
 124         "RAVEN",
 125         "ARCTURUS",
 126         "RENOIR",
 127         "ALDEBARAN",
 128         "NAVI10",
 129         "CYAN_SKILLFISH",
 130         "NAVI14",
 131         "NAVI12",
 132         "SIENNA_CICHLID",
 133         "NAVY_FLOUNDER",
 134         "VANGOGH",
 135         "DIMGREY_CAVEFISH",
 136         "BEIGE_GOBY",
 137         "YELLOW_CARP",
 138         "IP DISCOVERY",
 139         "LAST",
 140 };
 141
 142 /**
 143  * DOC: pcie_replay_count
 144  *
 145  * The amdgpu driver provides a sysfs API for reporting the total number
 146  * of PCIe replays (NAKs)
 147  * The file pcie_replay_count is used for this and returns the total
 148  * number of replays as a sum of the NAKs generated and NAKs received
 149  */
 150
 151 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 152                 struct device_attribute *attr, char *buf)
 153 {
 154         struct drm_device *ddev = dev_get_drvdata(dev);
 155         struct amdgpu_device *adev = drm_to_adev(ddev);
 156         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 157
 158         return sysfs_emit(buf, "%llu\n", cnt);
 159 }
 160
 161 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 162                 amdgpu_device_get_pcie_replay_count, NULL);
 163
 164 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 165
 166 /**
 167  * DOC: product_name
 168  *
 169  * The amdgpu driver provides a sysfs API for reporting the product name
 170  * for the device
 171  * The file serial_number is used for this and returns the product name
 172  * as returned from the FRU.
 173  * NOTE: This is only available for certain server cards
 174  */
 175
 176 static ssize_t amdgpu_device_get_product_name(struct device *dev,
 177                 struct device_attribute *attr, char *buf)
 178 {
 179         struct drm_device *ddev = dev_get_drvdata(dev);
 180         struct amdgpu_device *adev = drm_to_adev(ddev);
 181
 182         return sysfs_emit(buf, "%s\n", adev->product_name);
 183 }
 184
 185 static DEVICE_ATTR(product_name, S_IRUGO,
 186                 amdgpu_device_get_product_name, NULL);
 187
 188 /**
 189  * DOC: product_number
 190  *
 191  * The amdgpu driver provides a sysfs API for reporting the part number
 192  * for the device
 193  * The file serial_number is used for this and returns the part number
 194  * as returned from the FRU.
 195  * NOTE: This is only available for certain server cards
 196  */
 197
 198 static ssize_t amdgpu_device_get_product_number(struct device *dev,
 199                 struct device_attribute *attr, char *buf)
 200 {
 201         struct drm_device *ddev = dev_get_drvdata(dev);
 202         struct amdgpu_device *adev = drm_to_adev(ddev);
 203
 204         return sysfs_emit(buf, "%s\n", adev->product_number);
 205 }
 206
 207 static DEVICE_ATTR(product_number, S_IRUGO,
 208                 amdgpu_device_get_product_number, NULL);
 209
 210 /**
 211  * DOC: serial_number
 212  *
 213  * The amdgpu driver provides a sysfs API for reporting the serial number
 214  * for the device
 215  * The file serial_number is used for this and returns the serial number
 216  * as returned from the FRU.
 217  * NOTE: This is only available for certain server cards
 218  */
 219
 220 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
 221                 struct device_attribute *attr, char *buf)
 222 {
 223         struct drm_device *ddev = dev_get_drvdata(dev);
 224         struct amdgpu_device *adev = drm_to_adev(ddev);
 225
 226         return sysfs_emit(buf, "%s\n", adev->serial);
 227 }
 228
 229 static DEVICE_ATTR(serial_number, S_IRUGO,
 230                 amdgpu_device_get_serial_number, NULL);
 231
 232 /**
 233  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 234  *
 235  * @dev: drm_device pointer
 236  *
 237  * Returns true if the device is a dGPU with ATPX power control,
 238  * otherwise return false.
 239  */
 240 bool amdgpu_device_supports_px(struct drm_device *dev)
 241 {
 242         struct amdgpu_device *adev = drm_to_adev(dev);
 243
 244         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 245                 return true;
 246         return false;
 247 }
 248
 249 /**
 250  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 251  *
 252  * @dev: drm_device pointer
 253  *
 254  * Returns true if the device is a dGPU with ACPI power control,
 255  * otherwise return false.
 256  */
 257 bool amdgpu_device_supports_boco(struct drm_device *dev)
 258 {
 259         struct amdgpu_device *adev = drm_to_adev(dev);
 260
 261         if (adev->has_pr3 ||
 262             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 263                 return true;
 264         return false;
 265 }
 266
 267 /**
 268  * amdgpu_device_supports_baco - Does the device support BACO
 269  *
 270  * @dev: drm_device pointer
 271  *
 272  * Returns true if the device supporte BACO,
 273  * otherwise return false.
 274  */
 275 bool amdgpu_device_supports_baco(struct drm_device *dev)
 276 {
 277         struct amdgpu_device *adev = drm_to_adev(dev);
 278
 279         return amdgpu_asic_supports_baco(adev);
 280 }
 281
 282 /**
 283  * amdgpu_device_supports_smart_shift - Is the device dGPU with
 284  * smart shift support
 285  *
 286  * @dev: drm_device pointer
 287  *
 288  * Returns true if the device is a dGPU with Smart Shift support,
 289  * otherwise returns false.
 290  */
 291 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 292 {
 293         return (amdgpu_device_supports_boco(dev) &&
 294                 amdgpu_acpi_is_power_shift_control_supported());
 295 }
 296
 297 /*
 298  * VRAM access helper functions
 299  */
 300
 301 /**
 302  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 303  *
 304  * @adev: amdgpu_device pointer
 305  * @pos: offset of the buffer in vram
 306  * @buf: virtual address of the buffer in system memory
 307  * @size: read/write size, sizeof(@buf) must > @size
 308  * @write: true - write to vram, otherwise - read from vram
 309  */
 310 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 311                              void *buf, size_t size, bool write)
 312 {
 313         unsigned long flags;
 314         uint32_t hi = ~0, tmp = 0;
 315         uint32_t *data = buf;
 316         uint64_t last;
 317         int idx;
 318
 319         if (!drm_dev_enter(adev_to_drm(adev), &idx))
 320                 return;
 321
 322         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 323
 324         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 325         for (last = pos + size; pos < last; pos += 4) {
 326                 tmp = pos >> 31;
 327
 328                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 329                 if (tmp != hi) {
 330                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 331                         hi = tmp;
 332                 }
 333                 if (write)
 334                         WREG32_NO_KIQ(mmMM_DATA, *data++);
 335                 else
 336                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
 337         }
 338
 339         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 340         drm_dev_exit(idx);
 341 }
 342
 343 /**
 344  * amdgpu_device_aper_access - access vram by vram aperature
 345  *
 346  * @adev: amdgpu_device pointer
 347  * @pos: offset of the buffer in vram
 348  * @buf: virtual address of the buffer in system memory
 349  * @size: read/write size, sizeof(@buf) must > @size
 350  * @write: true - write to vram, otherwise - read from vram
 351  *
 352  * The return value means how many bytes have been transferred.
 353  */
 354 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 355                                  void *buf, size_t size, bool write)
 356 {
 357 #ifdef CONFIG_64BIT
 358         void __iomem *addr;
 359         size_t count = 0;
 360         uint64_t last;
 361
 362         if (!adev->mman.aper_base_kaddr)
 363                 return 0;
 364
 365         last = min(pos + size, adev->gmc.visible_vram_size);
 366         if (last > pos) {
 367                 addr = adev->mman.aper_base_kaddr + pos;
 368                 count = last - pos;
 369
 370                 if (write) {
 371                         memcpy_toio(addr, buf, count);
 372                         mb();
 373                         amdgpu_device_flush_hdp(adev, NULL);
 374                 } else {
 375                         amdgpu_device_invalidate_hdp(adev, NULL);
 376                         mb();
 377                         memcpy_fromio(buf, addr, count);
 378                 }
 379
 380         }
 381
 382         return count;
 383 #else
 384         return 0;
 385 #endif
 386 }
 387
 388 /**
 389  * amdgpu_device_vram_access - read/write a buffer in vram
 390  *
 391  * @adev: amdgpu_device pointer
 392  * @pos: offset of the buffer in vram
 393  * @buf: virtual address of the buffer in system memory
 394  * @size: read/write size, sizeof(@buf) must > @size
 395  * @write: true - write to vram, otherwise - read from vram
 396  */
 397 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 398                                void *buf, size_t size, bool write)
 399 {
 400         size_t count;
 401
 402         /* try to using vram apreature to access vram first */
 403         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 404         size -= count;
 405         if (size) {
 406                 /* using MM to access rest vram */
 407                 pos += count;
 408                 buf += count;
 409                 amdgpu_device_mm_access(adev, pos, buf, size, write);
 410         }
 411 }
 412
 413 /*
 414  * register access helper functions.
 415  */
 416
 417 /* Check if hw access should be skipped because of hotplug or device error */
 418 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 419 {
 420         if (adev->no_hw_access)
 421                 return true;
 422
 423 #ifdef CONFIG_LOCKDEP
 424         /*
 425          * This is a bit complicated to understand, so worth a comment. What we assert
 426          * here is that the GPU reset is not running on another thread in parallel.
 427          *
 428          * For this we trylock the read side of the reset semaphore, if that succeeds
 429          * we know that the reset is not running in paralell.
 430          *
 431          * If the trylock fails we assert that we are either already holding the read
 432          * side of the lock or are the reset thread itself and hold the write side of
 433          * the lock.
 434          */
 435         if (in_task()) {
 436                 if (down_read_trylock(&adev->reset_domain->sem))
 437                         up_read(&adev->reset_domain->sem);
 438                 else
 439                         lockdep_assert_held(&adev->reset_domain->sem);
 440         }
 441 #endif
 442         return false;
 443 }
 444
 445 /**
 446  * amdgpu_device_rreg - read a memory mapped IO or indirect register
 447  *
 448  * @adev: amdgpu_device pointer
 449  * @reg: dword aligned register offset
 450  * @acc_flags: access flags which require special behavior
 451  *
 452  * Returns the 32 bit value from the offset specified.
 453  */
 454 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 455                             uint32_t reg, uint32_t acc_flags)
 456 {
 457         uint32_t ret;
 458
 459         if (amdgpu_device_skip_hw_access(adev))
 460                 return 0;
 461
 462         if ((reg * 4) < adev->rmmio_size) {
 463                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 464                     amdgpu_sriov_runtime(adev) &&
 465                     down_read_trylock(&adev->reset_domain->sem)) {
 466                         ret = amdgpu_kiq_rreg(adev, reg);
 467                         up_read(&adev->reset_domain->sem);
 468                 } else {
 469                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 470                 }
 471         } else {
 472                 ret = adev->pcie_rreg(adev, reg * 4);
 473         }
 474
 475         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 476
 477         return ret;
 478 }
 479
 480 /*
 481  * MMIO register read with bytes helper functions
 482  * @offset:bytes offset from MMIO start
 483  *
 484 */
 485
 486 /**
 487  * amdgpu_mm_rreg8 - read a memory mapped IO register
 488  *
 489  * @adev: amdgpu_device pointer
 490  * @offset: byte aligned register offset
 491  *
 492  * Returns the 8 bit value from the offset specified.
 493  */
 494 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 495 {
 496         if (amdgpu_device_skip_hw_access(adev))
 497                 return 0;
 498
 499         if (offset < adev->rmmio_size)
 500                 return (readb(adev->rmmio + offset));
 501         BUG();
 502 }
 503
 504 /*
 505  * MMIO register write with bytes helper functions
 506  * @offset:bytes offset from MMIO start
 507  * @value: the value want to be written to the register
 508  *
 509 */
 510 /**
 511  * amdgpu_mm_wreg8 - read a memory mapped IO register
 512  *
 513  * @adev: amdgpu_device pointer
 514  * @offset: byte aligned register offset
 515  * @value: 8 bit value to write
 516  *
 517  * Writes the value specified to the offset specified.
 518  */
 519 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 520 {
 521         if (amdgpu_device_skip_hw_access(adev))
 522                 return;
 523
 524         if (offset < adev->rmmio_size)
 525                 writeb(value, adev->rmmio + offset);
 526         else
 527                 BUG();
 528 }
 529
 530 /**
 531  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 532  *
 533  * @adev: amdgpu_device pointer
 534  * @reg: dword aligned register offset
 535  * @v: 32 bit value to write to the register
 536  * @acc_flags: access flags which require special behavior
 537  *
 538  * Writes the value specified to the offset specified.
 539  */
 540 void amdgpu_device_wreg(struct amdgpu_device *adev,
 541                         uint32_t reg, uint32_t v,
 542                         uint32_t acc_flags)
 543 {
 544         if (amdgpu_device_skip_hw_access(adev))
 545                 return;
 546
 547         if ((reg * 4) < adev->rmmio_size) {
 548                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 549                     amdgpu_sriov_runtime(adev) &&
 550                     down_read_trylock(&adev->reset_domain->sem)) {
 551                         amdgpu_kiq_wreg(adev, reg, v);
 552                         up_read(&adev->reset_domain->sem);
 553                 } else {
 554                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 555                 }
 556         } else {
 557                 adev->pcie_wreg(adev, reg * 4, v);
 558         }
 559
 560         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 561 }
 562
 563 /**
 564  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 565  *
 566  * @adev: amdgpu_device pointer
 567  * @reg: mmio/rlc register
 568  * @v: value to write
 569  *
 570  * this function is invoked only for the debugfs register access
 571  */
 572 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 573                              uint32_t reg, uint32_t v)
 574 {
 575         if (amdgpu_device_skip_hw_access(adev))
 576                 return;
 577
 578         if (amdgpu_sriov_fullaccess(adev) &&
 579             adev->gfx.rlc.funcs &&
 580             adev->gfx.rlc.funcs->is_rlcg_access_range) {
 581                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 582                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
 583         } else if ((reg * 4) >= adev->rmmio_size) {
 584                 adev->pcie_wreg(adev, reg * 4, v);
 585         } else {
 586                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 587         }
 588 }
 589
 590 /**
 591  * amdgpu_mm_rdoorbell - read a doorbell dword
 592  *
 593  * @adev: amdgpu_device pointer
 594  * @index: doorbell index
 595  *
 596  * Returns the value in the doorbell aperture at the
 597  * requested doorbell index (CIK).
 598  */
 599 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 600 {
 601         if (amdgpu_device_skip_hw_access(adev))
 602                 return 0;
 603
 604         if (index < adev->doorbell.num_doorbells) {
 605                 return readl(adev->doorbell.ptr + index);
 606         } else {
 607                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 608                 return 0;
 609         }
 610 }
 611
 612 /**
 613  * amdgpu_mm_wdoorbell - write a doorbell dword
 614  *
 615  * @adev: amdgpu_device pointer
 616  * @index: doorbell index
 617  * @v: value to write
 618  *
 619  * Writes @v to the doorbell aperture at the
 620  * requested doorbell index (CIK).
 621  */
 622 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 623 {
 624         if (amdgpu_device_skip_hw_access(adev))
 625                 return;
 626
 627         if (index < adev->doorbell.num_doorbells) {
 628                 writel(v, adev->doorbell.ptr + index);
 629         } else {
 630                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 631         }
 632 }
 633
 634 /**
 635  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 636  *
 637  * @adev: amdgpu_device pointer
 638  * @index: doorbell index
 639  *
 640  * Returns the value in the doorbell aperture at the
 641  * requested doorbell index (VEGA10+).
 642  */
 643 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 644 {
 645         if (amdgpu_device_skip_hw_access(adev))
 646                 return 0;
 647
 648         if (index < adev->doorbell.num_doorbells) {
 649                 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 650         } else {
 651                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 652                 return 0;
 653         }
 654 }
 655
 656 /**
 657  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 658  *
 659  * @adev: amdgpu_device pointer
 660  * @index: doorbell index
 661  * @v: value to write
 662  *
 663  * Writes @v to the doorbell aperture at the
 664  * requested doorbell index (VEGA10+).
 665  */
 666 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 667 {
 668         if (amdgpu_device_skip_hw_access(adev))
 669                 return;
 670
 671         if (index < adev->doorbell.num_doorbells) {
 672                 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 673         } else {
 674                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 675         }
 676 }
 677
 678 /**
 679  * amdgpu_device_indirect_rreg - read an indirect register
 680  *
 681  * @adev: amdgpu_device pointer
 682  * @pcie_index: mmio register offset
 683  * @pcie_data: mmio register offset
 684  * @reg_addr: indirect register address to read from
 685  *
 686  * Returns the value of indirect register @reg_addr
 687  */
 688 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 689                                 u32 pcie_index, u32 pcie_data,
 690                                 u32 reg_addr)
 691 {
 692         unsigned long flags;
 693         u32 r;
 694         void __iomem *pcie_index_offset;
 695         void __iomem *pcie_data_offset;
 696
 697         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 698         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 699         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 700
 701         writel(reg_addr, pcie_index_offset);
 702         readl(pcie_index_offset);
 703         r = readl(pcie_data_offset);
 704         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 705
 706         return r;
 707 }
 708
 709 /**
 710  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 711  *
 712  * @adev: amdgpu_device pointer
 713  * @pcie_index: mmio register offset
 714  * @pcie_data: mmio register offset
 715  * @reg_addr: indirect register address to read from
 716  *
 717  * Returns the value of indirect register @reg_addr
 718  */
 719 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 720                                   u32 pcie_index, u32 pcie_data,
 721                                   u32 reg_addr)
 722 {
 723         unsigned long flags;
 724         u64 r;
 725         void __iomem *pcie_index_offset;
 726         void __iomem *pcie_data_offset;
 727
 728         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 729         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 730         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 731
 732         /* read low 32 bits */
 733         writel(reg_addr, pcie_index_offset);
 734         readl(pcie_index_offset);
 735         r = readl(pcie_data_offset);
 736         /* read high 32 bits */
 737         writel(reg_addr + 4, pcie_index_offset);
 738         readl(pcie_index_offset);
 739         r |= ((u64)readl(pcie_data_offset) << 32);
 740         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 741
 742         return r;
 743 }
 744
 745 /**
 746  * amdgpu_device_indirect_wreg - write an indirect register address
 747  *
 748  * @adev: amdgpu_device pointer
 749  * @pcie_index: mmio register offset
 750  * @pcie_data: mmio register offset
 751  * @reg_addr: indirect register offset
 752  * @reg_data: indirect register data
 753  *
 754  */
 755 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
 756                                  u32 pcie_index, u32 pcie_data,
 757                                  u32 reg_addr, u32 reg_data)
 758 {
 759         unsigned long flags;
 760         void __iomem *pcie_index_offset;
 761         void __iomem *pcie_data_offset;
 762
 763         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 764         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 765         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 766
 767         writel(reg_addr, pcie_index_offset);
 768         readl(pcie_index_offset);
 769         writel(reg_data, pcie_data_offset);
 770         readl(pcie_data_offset);
 771         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 772 }
 773
 774 /**
 775  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
 776  *
 777  * @adev: amdgpu_device pointer
 778  * @pcie_index: mmio register offset
 779  * @pcie_data: mmio register offset
 780  * @reg_addr: indirect register offset
 781  * @reg_data: indirect register data
 782  *
 783  */
 784 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 785                                    u32 pcie_index, u32 pcie_data,
 786                                    u32 reg_addr, u64 reg_data)
 787 {
 788         unsigned long flags;
 789         void __iomem *pcie_index_offset;
 790         void __iomem *pcie_data_offset;
 791
 792         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 793         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 794         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 795
 796         /* write low 32 bits */
 797         writel(reg_addr, pcie_index_offset);
 798         readl(pcie_index_offset);
 799         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
 800         readl(pcie_data_offset);
 801         /* write high 32 bits */
 802         writel(reg_addr + 4, pcie_index_offset);
 803         readl(pcie_index_offset);
 804         writel((u32)(reg_data >> 32), pcie_data_offset);
 805         readl(pcie_data_offset);
 806         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 807 }
 808
 809 /**
 810  * amdgpu_invalid_rreg - dummy reg read function
 811  *
 812  * @adev: amdgpu_device pointer
 813  * @reg: offset of register
 814  *
 815  * Dummy register read function.  Used for register blocks
 816  * that certain asics don't have (all asics).
 817  * Returns the value in the register.
 818  */
 819 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 820 {
 821         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 822         BUG();
 823         return 0;
 824 }
 825
 826 /**
 827  * amdgpu_invalid_wreg - dummy reg write function
 828  *
 829  * @adev: amdgpu_device pointer
 830  * @reg: offset of register
 831  * @v: value to write to the register
 832  *
 833  * Dummy register read function.  Used for register blocks
 834  * that certain asics don't have (all asics).
 835  */
 836 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 837 {
 838         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 839                   reg, v);
 840         BUG();
 841 }
 842
 843 /**
 844  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 845  *
 846  * @adev: amdgpu_device pointer
 847  * @reg: offset of register
 848  *
 849  * Dummy register read function.  Used for register blocks
 850  * that certain asics don't have (all asics).
 851  * Returns the value in the register.
 852  */
 853 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 854 {
 855         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 856         BUG();
 857         return 0;
 858 }
 859
 860 /**
 861  * amdgpu_invalid_wreg64 - dummy reg write function
 862  *
 863  * @adev: amdgpu_device pointer
 864  * @reg: offset of register
 865  * @v: value to write to the register
 866  *
 867  * Dummy register read function.  Used for register blocks
 868  * that certain asics don't have (all asics).
 869  */
 870 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 871 {
 872         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 873                   reg, v);
 874         BUG();
 875 }
 876
 877 /**
 878  * amdgpu_block_invalid_rreg - dummy reg read function
 879  *
 880  * @adev: amdgpu_device pointer
 881  * @block: offset of instance
 882  * @reg: offset of register
 883  *
 884  * Dummy register read function.  Used for register blocks
 885  * that certain asics don't have (all asics).
 886  * Returns the value in the register.
 887  */
 888 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 889                                           uint32_t block, uint32_t reg)
 890 {
 891         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 892                   reg, block);
 893         BUG();
 894         return 0;
 895 }
 896
 897 /**
 898  * amdgpu_block_invalid_wreg - dummy reg write function
 899  *
 900  * @adev: amdgpu_device pointer
 901  * @block: offset of instance
 902  * @reg: offset of register
 903  * @v: value to write to the register
 904  *
 905  * Dummy register read function.  Used for register blocks
 906  * that certain asics don't have (all asics).
 907  */
 908 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 909                                       uint32_t block,
 910                                       uint32_t reg, uint32_t v)
 911 {
 912         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 913                   reg, block, v);
 914         BUG();
 915 }
 916
 917 /**
 918  * amdgpu_device_asic_init - Wrapper for atom asic_init
 919  *
 920  * @adev: amdgpu_device pointer
 921  *
 922  * Does any asic specific work and then calls atom asic init.
 923  */
 924 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 925 {
 926         amdgpu_asic_pre_asic_init(adev);
 927
 928         if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
 929                 return amdgpu_atomfirmware_asic_init(adev, true);
 930         else
 931                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
 932 }
 933
 934 /**
 935  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 936  *
 937  * @adev: amdgpu_device pointer
 938  *
 939  * Allocates a scratch page of VRAM for use by various things in the
 940  * driver.
 941  */
 942 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 943 {
 944         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 945                                        PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 946                                        &adev->vram_scratch.robj,
 947                                        &adev->vram_scratch.gpu_addr,
 948                                        (void **)&adev->vram_scratch.ptr);
 949 }
 950
 951 /**
 952  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 953  *
 954  * @adev: amdgpu_device pointer
 955  *
 956  * Frees the VRAM scratch page.
 957  */
 958 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 959 {
 960         amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 961 }
 962
 963 /**
 964  * amdgpu_device_program_register_sequence - program an array of registers.
 965  *
 966  * @adev: amdgpu_device pointer
 967  * @registers: pointer to the register array
 968  * @array_size: size of the register array
 969  *
 970  * Programs an array or registers with and and or masks.
 971  * This is a helper for setting golden registers.
 972  */
 973 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 974                                              const u32 *registers,
 975                                              const u32 array_size)
 976 {
 977         u32 tmp, reg, and_mask, or_mask;
 978         int i;
 979
 980         if (array_size % 3)
 981                 return;
 982
 983         for (i = 0; i < array_size; i +=3) {
 984                 reg = registers[i + 0];
 985                 and_mask = registers[i + 1];
 986                 or_mask = registers[i + 2];
 987
 988                 if (and_mask == 0xffffffff) {
 989                         tmp = or_mask;
 990                 } else {
 991                         tmp = RREG32(reg);
 992                         tmp &= ~and_mask;
 993                         if (adev->family >= AMDGPU_FAMILY_AI)
 994                                 tmp |= (or_mask & and_mask);
 995                         else
 996                                 tmp |= or_mask;
 997                 }
 998                 WREG32(reg, tmp);
 999         }
1000 }
1001
1002 /**
1003  * amdgpu_device_pci_config_reset - reset the GPU
1004  *
1005  * @adev: amdgpu_device pointer
1006  *
1007  * Resets the GPU using the pci config reset sequence.
1008  * Only applicable to asics prior to vega10.
1009  */
1010 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1011 {
1012         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1013 }
1014
1015 /**
1016  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1017  *
1018  * @adev: amdgpu_device pointer
1019  *
1020  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1021  */
1022 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1023 {
1024         return pci_reset_function(adev->pdev);
1025 }
1026
1027 /*
1028  * GPU doorbell aperture helpers function.
1029  */
1030 /**
1031  * amdgpu_device_doorbell_init - Init doorbell driver information.
1032  *
1033  * @adev: amdgpu_device pointer
1034  *
1035  * Init doorbell driver information (CIK)
1036  * Returns 0 on success, error on failure.
1037  */
1038 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1039 {
1040
1041         /* No doorbell on SI hardware generation */
1042         if (adev->asic_type < CHIP_BONAIRE) {
1043                 adev->doorbell.base = 0;
1044                 adev->doorbell.size = 0;
1045                 adev->doorbell.num_doorbells = 0;
1046                 adev->doorbell.ptr = NULL;
1047                 return 0;
1048         }
1049
1050         if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1051                 return -EINVAL;
1052
1053         amdgpu_asic_init_doorbell_index(adev);
1054
1055         /* doorbell bar mapping */
1056         adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1057         adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1058
1059         if (adev->enable_mes) {
1060                 adev->doorbell.num_doorbells =
1061                         adev->doorbell.size / sizeof(u32);
1062         } else {
1063                 adev->doorbell.num_doorbells =
1064                         min_t(u32, adev->doorbell.size / sizeof(u32),
1065                               adev->doorbell_index.max_assignment+1);
1066                 if (adev->doorbell.num_doorbells == 0)
1067                         return -EINVAL;
1068
1069                 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1070                  * paging queue doorbell use the second page. The
1071                  * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1072                  * doorbells are in the first page. So with paging queue enabled,
1073                  * the max num_doorbells should + 1 page (0x400 in dword)
1074                  */
1075                 if (adev->asic_type >= CHIP_VEGA10)
1076                         adev->doorbell.num_doorbells += 0x400;
1077         }
1078
1079         adev->doorbell.ptr = ioremap(adev->doorbell.base,
1080                                      adev->doorbell.num_doorbells *
1081                                      sizeof(u32));
1082         if (adev->doorbell.ptr == NULL)
1083                 return -ENOMEM;
1084
1085         return 0;
1086 }
1087
1088 /**
1089  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1090  *
1091  * @adev: amdgpu_device pointer
1092  *
1093  * Tear down doorbell driver information (CIK)
1094  */
1095 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1096 {
1097         iounmap(adev->doorbell.ptr);
1098         adev->doorbell.ptr = NULL;
1099 }
1100
1101
1102
1103 /*
1104  * amdgpu_device_wb_*()
1105  * Writeback is the method by which the GPU updates special pages in memory
1106  * with the status of certain GPU events (fences, ring pointers,etc.).
1107  */
1108
1109 /**
1110  * amdgpu_device_wb_fini - Disable Writeback and free memory
1111  *
1112  * @adev: amdgpu_device pointer
1113  *
1114  * Disables Writeback and frees the Writeback memory (all asics).
1115  * Used at driver shutdown.
1116  */
1117 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1118 {
1119         if (adev->wb.wb_obj) {
1120                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1121                                       &adev->wb.gpu_addr,
1122                                       (void **)&adev->wb.wb);
1123                 adev->wb.wb_obj = NULL;
1124         }
1125 }
1126
1127 /**
1128  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1129  *
1130  * @adev: amdgpu_device pointer
1131  *
1132  * Initializes writeback and allocates writeback memory (all asics).
1133  * Used at driver startup.
1134  * Returns 0 on success or an -error on failure.
1135  */
1136 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1137 {
1138         int r;
1139
1140         if (adev->wb.wb_obj == NULL) {
1141                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1142                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1143                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1144                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1145                                             (void **)&adev->wb.wb);
1146                 if (r) {
1147                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1148                         return r;
1149                 }
1150
1151                 adev->wb.num_wb = AMDGPU_MAX_WB;
1152                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1153
1154                 /* clear wb memory */
1155                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1156         }
1157
1158         return 0;
1159 }
1160
1161 /**
1162  * amdgpu_device_wb_get - Allocate a wb entry
1163  *
1164  * @adev: amdgpu_device pointer
1165  * @wb: wb index
1166  *
1167  * Allocate a wb slot for use by the driver (all asics).
1168  * Returns 0 on success or -EINVAL on failure.
1169  */
1170 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1171 {
1172         unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1173
1174         if (offset < adev->wb.num_wb) {
1175                 __set_bit(offset, adev->wb.used);
1176                 *wb = offset << 3; /* convert to dw offset */
1177                 return 0;
1178         } else {
1179                 return -EINVAL;
1180         }
1181 }
1182
1183 /**
1184  * amdgpu_device_wb_free - Free a wb entry
1185  *
1186  * @adev: amdgpu_device pointer
1187  * @wb: wb index
1188  *
1189  * Free a wb slot allocated for use by the driver (all asics)
1190  */
1191 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1192 {
1193         wb >>= 3;
1194         if (wb < adev->wb.num_wb)
1195                 __clear_bit(wb, adev->wb.used);
1196 }
1197
1198 /**
1199  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1200  *
1201  * @adev: amdgpu_device pointer
1202  *
1203  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1204  * to fail, but if any of the BARs is not accessible after the size we abort
1205  * driver loading by returning -ENODEV.
1206  */
1207 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1208 {
1209         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1210         struct pci_bus *root;
1211         struct resource *res;
1212         unsigned i;
1213         u16 cmd;
1214         int r;
1215
1216         /* Bypass for VF */
1217         if (amdgpu_sriov_vf(adev))
1218                 return 0;
1219
1220         /* skip if the bios has already enabled large BAR */
1221         if (adev->gmc.real_vram_size &&
1222             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1223                 return 0;
1224
1225         /* Check if the root BUS has 64bit memory resources */
1226         root = adev->pdev->bus;
1227         while (root->parent)
1228                 root = root->parent;
1229
1230         pci_bus_for_each_resource(root, res, i) {
1231                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1232                     res->start > 0x100000000ull)
1233                         break;
1234         }
1235
1236         /* Trying to resize is pointless without a root hub window above 4GB */
1237         if (!res)
1238                 return 0;
1239
1240         /* Limit the BAR size to what is available */
1241         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1242                         rbar_size);
1243
1244         /* Disable memory decoding while we change the BAR addresses and size */
1245         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1246         pci_write_config_word(adev->pdev, PCI_COMMAND,
1247                               cmd & ~PCI_COMMAND_MEMORY);
1248
1249         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1250         amdgpu_device_doorbell_fini(adev);
1251         if (adev->asic_type >= CHIP_BONAIRE)
1252                 pci_release_resource(adev->pdev, 2);
1253
1254         pci_release_resource(adev->pdev, 0);
1255
1256         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1257         if (r == -ENOSPC)
1258                 DRM_INFO("Not enough PCI address space for a large BAR.");
1259         else if (r && r != -ENOTSUPP)
1260                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1261
1262         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1263
1264         /* When the doorbell or fb BAR isn't available we have no chance of
1265          * using the device.
1266          */
1267         r = amdgpu_device_doorbell_init(adev);
1268         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1269                 return -ENODEV;
1270
1271         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1272
1273         return 0;
1274 }
1275
1276 /*
1277  * GPU helpers function.
1278  */
1279 /**
1280  * amdgpu_device_need_post - check if the hw need post or not
1281  *
1282  * @adev: amdgpu_device pointer
1283  *
1284  * Check if the asic has been initialized (all asics) at driver startup
1285  * or post is needed if  hw reset is performed.
1286  * Returns true if need or false if not.
1287  */
1288 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1289 {
1290         uint32_t reg;
1291
1292         if (amdgpu_sriov_vf(adev))
1293                 return false;
1294
1295         if (amdgpu_passthrough(adev)) {
1296                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1297                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1298                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1299                  * vpost executed for smc version below 22.15
1300                  */
1301                 if (adev->asic_type == CHIP_FIJI) {
1302                         int err;
1303                         uint32_t fw_ver;
1304                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1305                         /* force vPost if error occured */
1306                         if (err)
1307                                 return true;
1308
1309                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1310                         if (fw_ver < 0x00160e00)
1311                                 return true;
1312                 }
1313         }
1314
1315         /* Don't post if we need to reset whole hive on init */
1316         if (adev->gmc.xgmi.pending_reset)
1317                 return false;
1318
1319         if (adev->has_hw_reset) {
1320                 adev->has_hw_reset = false;
1321                 return true;
1322         }
1323
1324         /* bios scratch used on CIK+ */
1325         if (adev->asic_type >= CHIP_BONAIRE)
1326                 return amdgpu_atombios_scratch_need_asic_init(adev);
1327
1328         /* check MEM_SIZE for older asics */
1329         reg = amdgpu_asic_get_config_memsize(adev);
1330
1331         if ((reg != 0) && (reg != 0xffffffff))
1332                 return false;
1333
1334         return true;
1335 }
1336
1337 /**
1338  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1339  *
1340  * @adev: amdgpu_device pointer
1341  *
1342  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1343  * be set for this device.
1344  *
1345  * Returns true if it should be used or false if not.
1346  */
1347 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1348 {
1349         switch (amdgpu_aspm) {
1350         case -1:
1351                 break;
1352         case 0:
1353                 return false;
1354         case 1:
1355                 return true;
1356         default:
1357                 return false;
1358         }
1359         return pcie_aspm_enabled(adev->pdev);
1360 }
1361
1362 bool amdgpu_device_aspm_support_quirk(void)
1363 {
1364 #if IS_ENABLED(CONFIG_X86)
1365         struct cpuinfo_x86 *c = &cpu_data(0);
1366
1367         return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1368 #else
1369         return true;
1370 #endif
1371 }
1372
1373 /* if we get transitioned to only one device, take VGA back */
1374 /**
1375  * amdgpu_device_vga_set_decode - enable/disable vga decode
1376  *
1377  * @pdev: PCI device pointer
1378  * @state: enable/disable vga decode
1379  *
1380  * Enable/disable vga decode (all asics).
1381  * Returns VGA resource flags.
1382  */
1383 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1384                 bool state)
1385 {
1386         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1387         amdgpu_asic_set_vga_state(adev, state);
1388         if (state)
1389                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1390                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1391         else
1392                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1393 }
1394
1395 /**
1396  * amdgpu_device_check_block_size - validate the vm block size
1397  *
1398  * @adev: amdgpu_device pointer
1399  *
1400  * Validates the vm block size specified via module parameter.
1401  * The vm block size defines number of bits in page table versus page directory,
1402  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1403  * page table and the remaining bits are in the page directory.
1404  */
1405 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1406 {
1407         /* defines number of bits in page table versus page directory,
1408          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1409          * page table and the remaining bits are in the page directory */
1410         if (amdgpu_vm_block_size == -1)
1411                 return;
1412
1413         if (amdgpu_vm_block_size < 9) {
1414                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1415                          amdgpu_vm_block_size);
1416                 amdgpu_vm_block_size = -1;
1417         }
1418 }
1419
1420 /**
1421  * amdgpu_device_check_vm_size - validate the vm size
1422  *
1423  * @adev: amdgpu_device pointer
1424  *
1425  * Validates the vm size in GB specified via module parameter.
1426  * The VM size is the size of the GPU virtual memory space in GB.
1427  */
1428 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1429 {
1430         /* no need to check the default value */
1431         if (amdgpu_vm_size == -1)
1432                 return;
1433
1434         if (amdgpu_vm_size < 1) {
1435                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1436                          amdgpu_vm_size);
1437                 amdgpu_vm_size = -1;
1438         }
1439 }
1440
1441 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1442 {
1443         struct sysinfo si;
1444         bool is_os_64 = (sizeof(void *) == 8);
1445         uint64_t total_memory;
1446         uint64_t dram_size_seven_GB = 0x1B8000000;
1447         uint64_t dram_size_three_GB = 0xB8000000;
1448
1449         if (amdgpu_smu_memory_pool_size == 0)
1450                 return;
1451
1452         if (!is_os_64) {
1453                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1454                 goto def_value;
1455         }
1456         si_meminfo(&si);
1457         total_memory = (uint64_t)si.totalram * si.mem_unit;
1458
1459         if ((amdgpu_smu_memory_pool_size == 1) ||
1460                 (amdgpu_smu_memory_pool_size == 2)) {
1461                 if (total_memory < dram_size_three_GB)
1462                         goto def_value1;
1463         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1464                 (amdgpu_smu_memory_pool_size == 8)) {
1465                 if (total_memory < dram_size_seven_GB)
1466                         goto def_value1;
1467         } else {
1468                 DRM_WARN("Smu memory pool size not supported\n");
1469                 goto def_value;
1470         }
1471         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1472
1473         return;
1474
1475 def_value1:
1476         DRM_WARN("No enough system memory\n");
1477 def_value:
1478         adev->pm.smu_prv_buffer_size = 0;
1479 }
1480
1481 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1482 {
1483         if (!(adev->flags & AMD_IS_APU) ||
1484             adev->asic_type < CHIP_RAVEN)
1485                 return 0;
1486
1487         switch (adev->asic_type) {
1488         case CHIP_RAVEN:
1489                 if (adev->pdev->device == 0x15dd)
1490                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1491                 if (adev->pdev->device == 0x15d8)
1492                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1493                 break;
1494         case CHIP_RENOIR:
1495                 if ((adev->pdev->device == 0x1636) ||
1496                     (adev->pdev->device == 0x164c))
1497                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1498                 else
1499                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1500                 break;
1501         case CHIP_VANGOGH:
1502                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1503                 break;
1504         case CHIP_YELLOW_CARP:
1505                 break;
1506         case CHIP_CYAN_SKILLFISH:
1507                 if ((adev->pdev->device == 0x13FE) ||
1508                     (adev->pdev->device == 0x143F))
1509                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1510                 break;
1511         default:
1512                 break;
1513         }
1514
1515         return 0;
1516 }
1517
1518 /**
1519  * amdgpu_device_check_arguments - validate module params
1520  *
1521  * @adev: amdgpu_device pointer
1522  *
1523  * Validates certain module parameters and updates
1524  * the associated values used by the driver (all asics).
1525  */
1526 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1527 {
1528         if (amdgpu_sched_jobs < 4) {
1529                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1530                          amdgpu_sched_jobs);
1531                 amdgpu_sched_jobs = 4;
1532         } else if (!is_power_of_2(amdgpu_sched_jobs)){
1533                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1534                          amdgpu_sched_jobs);
1535                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1536         }
1537
1538         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1539                 /* gart size must be greater or equal to 32M */
1540                 dev_warn(adev->dev, "gart size (%d) too small\n",
1541                          amdgpu_gart_size);
1542                 amdgpu_gart_size = -1;
1543         }
1544
1545         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1546                 /* gtt size must be greater or equal to 32M */
1547                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1548                                  amdgpu_gtt_size);
1549                 amdgpu_gtt_size = -1;
1550         }
1551
1552         /* valid range is between 4 and 9 inclusive */
1553         if (amdgpu_vm_fragment_size != -1 &&
1554             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1555                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1556                 amdgpu_vm_fragment_size = -1;
1557         }
1558
1559         if (amdgpu_sched_hw_submission < 2) {
1560                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1561                          amdgpu_sched_hw_submission);
1562                 amdgpu_sched_hw_submission = 2;
1563         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1564                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1565                          amdgpu_sched_hw_submission);
1566                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1567         }
1568
1569         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1570                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1571                 amdgpu_reset_method = -1;
1572         }
1573
1574         amdgpu_device_check_smu_prv_buffer_size(adev);
1575
1576         amdgpu_device_check_vm_size(adev);
1577
1578         amdgpu_device_check_block_size(adev);
1579
1580         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1581
1582         return 0;
1583 }
1584
1585 /**
1586  * amdgpu_switcheroo_set_state - set switcheroo state
1587  *
1588  * @pdev: pci dev pointer
1589  * @state: vga_switcheroo state
1590  *
1591  * Callback for the switcheroo driver.  Suspends or resumes
1592  * the asics before or after it is powered up using ACPI methods.
1593  */
1594 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1595                                         enum vga_switcheroo_state state)
1596 {
1597         struct drm_device *dev = pci_get_drvdata(pdev);
1598         int r;
1599
1600         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1601                 return;
1602
1603         if (state == VGA_SWITCHEROO_ON) {
1604                 pr_info("switched on\n");
1605                 /* don't suspend or resume card normally */
1606                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1607
1608                 pci_set_power_state(pdev, PCI_D0);
1609                 amdgpu_device_load_pci_state(pdev);
1610                 r = pci_enable_device(pdev);
1611                 if (r)
1612                         DRM_WARN("pci_enable_device failed (%d)\n", r);
1613                 amdgpu_device_resume(dev, true);
1614
1615                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1616         } else {
1617                 pr_info("switched off\n");
1618                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1619                 amdgpu_device_suspend(dev, true);
1620                 amdgpu_device_cache_pci_state(pdev);
1621                 /* Shut down the device */
1622                 pci_disable_device(pdev);
1623                 pci_set_power_state(pdev, PCI_D3cold);
1624                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1625         }
1626 }
1627
1628 /**
1629  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1630  *
1631  * @pdev: pci dev pointer
1632  *
1633  * Callback for the switcheroo driver.  Check of the switcheroo
1634  * state can be changed.
1635  * Returns true if the state can be changed, false if not.
1636  */
1637 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1638 {
1639         struct drm_device *dev = pci_get_drvdata(pdev);
1640
1641         /*
1642         * FIXME: open_count is protected by drm_global_mutex but that would lead to
1643         * locking inversion with the driver load path. And the access here is
1644         * completely racy anyway. So don't bother with locking for now.
1645         */
1646         return atomic_read(&dev->open_count) == 0;
1647 }
1648
1649 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1650         .set_gpu_state = amdgpu_switcheroo_set_state,
1651         .reprobe = NULL,
1652         .can_switch = amdgpu_switcheroo_can_switch,
1653 };
1654
1655 /**
1656  * amdgpu_device_ip_set_clockgating_state - set the CG state
1657  *
1658  * @dev: amdgpu_device pointer
1659  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1660  * @state: clockgating state (gate or ungate)
1661  *
1662  * Sets the requested clockgating state for all instances of
1663  * the hardware IP specified.
1664  * Returns the error code from the last instance.
1665  */
1666 int amdgpu_device_ip_set_clockgating_state(void *dev,
1667                                            enum amd_ip_block_type block_type,
1668                                            enum amd_clockgating_state state)
1669 {
1670         struct amdgpu_device *adev = dev;
1671         int i, r = 0;
1672
1673         for (i = 0; i < adev->num_ip_blocks; i++) {
1674                 if (!adev->ip_blocks[i].status.valid)
1675                         continue;
1676                 if (adev->ip_blocks[i].version->type != block_type)
1677                         continue;
1678                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1679                         continue;
1680                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1681                         (void *)adev, state);
1682                 if (r)
1683                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1684                                   adev->ip_blocks[i].version->funcs->name, r);
1685         }
1686         return r;
1687 }
1688
1689 /**
1690  * amdgpu_device_ip_set_powergating_state - set the PG state
1691  *
1692  * @dev: amdgpu_device pointer
1693  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1694  * @state: powergating state (gate or ungate)
1695  *
1696  * Sets the requested powergating state for all instances of
1697  * the hardware IP specified.
1698  * Returns the error code from the last instance.
1699  */
1700 int amdgpu_device_ip_set_powergating_state(void *dev,
1701                                            enum amd_ip_block_type block_type,
1702                                            enum amd_powergating_state state)
1703 {
1704         struct amdgpu_device *adev = dev;
1705         int i, r = 0;
1706
1707         for (i = 0; i < adev->num_ip_blocks; i++) {
1708                 if (!adev->ip_blocks[i].status.valid)
1709                         continue;
1710                 if (adev->ip_blocks[i].version->type != block_type)
1711                         continue;
1712                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1713                         continue;
1714                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1715                         (void *)adev, state);
1716                 if (r)
1717                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1718                                   adev->ip_blocks[i].version->funcs->name, r);
1719         }
1720         return r;
1721 }
1722
1723 /**
1724  * amdgpu_device_ip_get_clockgating_state - get the CG state
1725  *
1726  * @adev: amdgpu_device pointer
1727  * @flags: clockgating feature flags
1728  *
1729  * Walks the list of IPs on the device and updates the clockgating
1730  * flags for each IP.
1731  * Updates @flags with the feature flags for each hardware IP where
1732  * clockgating is enabled.
1733  */
1734 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1735                                             u64 *flags)
1736 {
1737         int i;
1738
1739         for (i = 0; i < adev->num_ip_blocks; i++) {
1740                 if (!adev->ip_blocks[i].status.valid)
1741                         continue;
1742                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1743                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1744         }
1745 }
1746
1747 /**
1748  * amdgpu_device_ip_wait_for_idle - wait for idle
1749  *
1750  * @adev: amdgpu_device pointer
1751  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1752  *
1753  * Waits for the request hardware IP to be idle.
1754  * Returns 0 for success or a negative error code on failure.
1755  */
1756 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1757                                    enum amd_ip_block_type block_type)
1758 {
1759         int i, r;
1760
1761         for (i = 0; i < adev->num_ip_blocks; i++) {
1762                 if (!adev->ip_blocks[i].status.valid)
1763                         continue;
1764                 if (adev->ip_blocks[i].version->type == block_type) {
1765                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1766                         if (r)
1767                                 return r;
1768                         break;
1769                 }
1770         }
1771         return 0;
1772
1773 }
1774
1775 /**
1776  * amdgpu_device_ip_is_idle - is the hardware IP idle
1777  *
1778  * @adev: amdgpu_device pointer
1779  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1780  *
1781  * Check if the hardware IP is idle or not.
1782  * Returns true if it the IP is idle, false if not.
1783  */
1784 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1785                               enum amd_ip_block_type block_type)
1786 {
1787         int i;
1788
1789         for (i = 0; i < adev->num_ip_blocks; i++) {
1790                 if (!adev->ip_blocks[i].status.valid)
1791                         continue;
1792                 if (adev->ip_blocks[i].version->type == block_type)
1793                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1794         }
1795         return true;
1796
1797 }
1798
1799 /**
1800  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1801  *
1802  * @adev: amdgpu_device pointer
1803  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1804  *
1805  * Returns a pointer to the hardware IP block structure
1806  * if it exists for the asic, otherwise NULL.
1807  */
1808 struct amdgpu_ip_block *
1809 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1810                               enum amd_ip_block_type type)
1811 {
1812         int i;
1813
1814         for (i = 0; i < adev->num_ip_blocks; i++)
1815                 if (adev->ip_blocks[i].version->type == type)
1816                         return &adev->ip_blocks[i];
1817
1818         return NULL;
1819 }
1820
1821 /**
1822  * amdgpu_device_ip_block_version_cmp
1823  *
1824  * @adev: amdgpu_device pointer
1825  * @type: enum amd_ip_block_type
1826  * @major: major version
1827  * @minor: minor version
1828  *
1829  * return 0 if equal or greater
1830  * return 1 if smaller or the ip_block doesn't exist
1831  */
1832 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1833                                        enum amd_ip_block_type type,
1834                                        u32 major, u32 minor)
1835 {
1836         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1837
1838         if (ip_block && ((ip_block->version->major > major) ||
1839                         ((ip_block->version->major == major) &&
1840                         (ip_block->version->minor >= minor))))
1841                 return 0;
1842
1843         return 1;
1844 }
1845
1846 /**
1847  * amdgpu_device_ip_block_add
1848  *
1849  * @adev: amdgpu_device pointer
1850  * @ip_block_version: pointer to the IP to add
1851  *
1852  * Adds the IP block driver information to the collection of IPs
1853  * on the asic.
1854  */
1855 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1856                                const struct amdgpu_ip_block_version *ip_block_version)
1857 {
1858         if (!ip_block_version)
1859                 return -EINVAL;
1860
1861         switch (ip_block_version->type) {
1862         case AMD_IP_BLOCK_TYPE_VCN:
1863                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1864                         return 0;
1865                 break;
1866         case AMD_IP_BLOCK_TYPE_JPEG:
1867                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1868                         return 0;
1869                 break;
1870         default:
1871                 break;
1872         }
1873
1874         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1875                   ip_block_version->funcs->name);
1876
1877         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1878
1879         return 0;
1880 }
1881
1882 /**
1883  * amdgpu_device_enable_virtual_display - enable virtual display feature
1884  *
1885  * @adev: amdgpu_device pointer
1886  *
1887  * Enabled the virtual display feature if the user has enabled it via
1888  * the module parameter virtual_display.  This feature provides a virtual
1889  * display hardware on headless boards or in virtualized environments.
1890  * This function parses and validates the configuration string specified by
1891  * the user and configues the virtual display configuration (number of
1892  * virtual connectors, crtcs, etc.) specified.
1893  */
1894 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1895 {
1896         adev->enable_virtual_display = false;
1897
1898         if (amdgpu_virtual_display) {
1899                 const char *pci_address_name = pci_name(adev->pdev);
1900                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1901
1902                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1903                 pciaddstr_tmp = pciaddstr;
1904                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1905                         pciaddname = strsep(&pciaddname_tmp, ",");
1906                         if (!strcmp("all", pciaddname)
1907                             || !strcmp(pci_address_name, pciaddname)) {
1908                                 long num_crtc;
1909                                 int res = -1;
1910
1911                                 adev->enable_virtual_display = true;
1912
1913                                 if (pciaddname_tmp)
1914                                         res = kstrtol(pciaddname_tmp, 10,
1915                                                       &num_crtc);
1916
1917                                 if (!res) {
1918                                         if (num_crtc < 1)
1919                                                 num_crtc = 1;
1920                                         if (num_crtc > 6)
1921                                                 num_crtc = 6;
1922                                         adev->mode_info.num_crtc = num_crtc;
1923                                 } else {
1924                                         adev->mode_info.num_crtc = 1;
1925                                 }
1926                                 break;
1927                         }
1928                 }
1929
1930                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1931                          amdgpu_virtual_display, pci_address_name,
1932                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1933
1934                 kfree(pciaddstr);
1935         }
1936 }
1937
1938 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1939 {
1940         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1941                 adev->mode_info.num_crtc = 1;
1942                 adev->enable_virtual_display = true;
1943                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1944                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1945         }
1946 }
1947
1948 /**
1949  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1950  *
1951  * @adev: amdgpu_device pointer
1952  *
1953  * Parses the asic configuration parameters specified in the gpu info
1954  * firmware and makes them availale to the driver for use in configuring
1955  * the asic.
1956  * Returns 0 on success, -EINVAL on failure.
1957  */
1958 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1959 {
1960         const char *chip_name;
1961         char fw_name[40];
1962         int err;
1963         const struct gpu_info_firmware_header_v1_0 *hdr;
1964
1965         adev->firmware.gpu_info_fw = NULL;
1966
1967         if (adev->mman.discovery_bin) {
1968                 /*
1969                  * FIXME: The bounding box is still needed by Navi12, so
1970                  * temporarily read it from gpu_info firmware. Should be dropped
1971                  * when DAL no longer needs it.
1972                  */
1973                 if (adev->asic_type != CHIP_NAVI12)
1974                         return 0;
1975         }
1976
1977         switch (adev->asic_type) {
1978         default:
1979                 return 0;
1980         case CHIP_VEGA10:
1981                 chip_name = "vega10";
1982                 break;
1983         case CHIP_VEGA12:
1984                 chip_name = "vega12";
1985                 break;
1986         case CHIP_RAVEN:
1987                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1988                         chip_name = "raven2";
1989                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1990                         chip_name = "picasso";
1991                 else
1992                         chip_name = "raven";
1993                 break;
1994         case CHIP_ARCTURUS:
1995                 chip_name = "arcturus";
1996                 break;
1997         case CHIP_NAVI12:
1998                 chip_name = "navi12";
1999                 break;
2000         }
2001
2002         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2003         err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
2004         if (err) {
2005                 dev_err(adev->dev,
2006                         "Failed to load gpu_info firmware \"%s\"\n",
2007                         fw_name);
2008                 goto out;
2009         }
2010         err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
2011         if (err) {
2012                 dev_err(adev->dev,
2013                         "Failed to validate gpu_info firmware \"%s\"\n",
2014                         fw_name);
2015                 goto out;
2016         }
2017
2018         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2019         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2020
2021         switch (hdr->version_major) {
2022         case 1:
2023         {
2024                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2025                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2026                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2027
2028                 /*
2029                  * Should be droped when DAL no longer needs it.
2030                  */
2031                 if (adev->asic_type == CHIP_NAVI12)
2032                         goto parse_soc_bounding_box;
2033
2034                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2035                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2036                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2037                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2038                 adev->gfx.config.max_texture_channel_caches =
2039                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2040                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2041                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2042                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2043                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2044                 adev->gfx.config.double_offchip_lds_buf =
2045                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2046                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2047                 adev->gfx.cu_info.max_waves_per_simd =
2048                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2049                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2050                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2051                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2052                 if (hdr->version_minor >= 1) {
2053                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2054                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2055                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2056                         adev->gfx.config.num_sc_per_sh =
2057                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2058                         adev->gfx.config.num_packer_per_sc =
2059                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2060                 }
2061
2062 parse_soc_bounding_box:
2063                 /*
2064                  * soc bounding box info is not integrated in disocovery table,
2065                  * we always need to parse it from gpu info firmware if needed.
2066                  */
2067                 if (hdr->version_minor == 2) {
2068                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2069                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2070                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2071                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2072                 }
2073                 break;
2074         }
2075         default:
2076                 dev_err(adev->dev,
2077                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2078                 err = -EINVAL;
2079                 goto out;
2080         }
2081 out:
2082         return err;
2083 }
2084
2085 /**
2086  * amdgpu_device_ip_early_init - run early init for hardware IPs
2087  *
2088  * @adev: amdgpu_device pointer
2089  *
2090  * Early initialization pass for hardware IPs.  The hardware IPs that make
2091  * up each asic are discovered each IP's early_init callback is run.  This
2092  * is the first stage in initializing the asic.
2093  * Returns 0 on success, negative error code on failure.
2094  */
2095 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2096 {
2097         struct drm_device *dev = adev_to_drm(adev);
2098         struct pci_dev *parent;
2099         int i, r;
2100
2101         amdgpu_device_enable_virtual_display(adev);
2102
2103         if (amdgpu_sriov_vf(adev)) {
2104                 r = amdgpu_virt_request_full_gpu(adev, true);
2105                 if (r)
2106                         return r;
2107         }
2108
2109         switch (adev->asic_type) {
2110 #ifdef CONFIG_DRM_AMDGPU_SI
2111         case CHIP_VERDE:
2112         case CHIP_TAHITI:
2113         case CHIP_PITCAIRN:
2114         case CHIP_OLAND:
2115         case CHIP_HAINAN:
2116                 adev->family = AMDGPU_FAMILY_SI;
2117                 r = si_set_ip_blocks(adev);
2118                 if (r)
2119                         return r;
2120                 break;
2121 #endif
2122 #ifdef CONFIG_DRM_AMDGPU_CIK
2123         case CHIP_BONAIRE:
2124         case CHIP_HAWAII:
2125         case CHIP_KAVERI:
2126         case CHIP_KABINI:
2127         case CHIP_MULLINS:
2128                 if (adev->flags & AMD_IS_APU)
2129                         adev->family = AMDGPU_FAMILY_KV;
2130                 else
2131                         adev->family = AMDGPU_FAMILY_CI;
2132
2133                 r = cik_set_ip_blocks(adev);
2134                 if (r)
2135                         return r;
2136                 break;
2137 #endif
2138         case CHIP_TOPAZ:
2139         case CHIP_TONGA:
2140         case CHIP_FIJI:
2141         case CHIP_POLARIS10:
2142         case CHIP_POLARIS11:
2143         case CHIP_POLARIS12:
2144         case CHIP_VEGAM:
2145         case CHIP_CARRIZO:
2146         case CHIP_STONEY:
2147                 if (adev->flags & AMD_IS_APU)
2148                         adev->family = AMDGPU_FAMILY_CZ;
2149                 else
2150                         adev->family = AMDGPU_FAMILY_VI;
2151
2152                 r = vi_set_ip_blocks(adev);
2153                 if (r)
2154                         return r;
2155                 break;
2156         default:
2157                 r = amdgpu_discovery_set_ip_blocks(adev);
2158                 if (r)
2159                         return r;
2160                 break;
2161         }
2162
2163         if (amdgpu_has_atpx() &&
2164             (amdgpu_is_atpx_hybrid() ||
2165              amdgpu_has_atpx_dgpu_power_cntl()) &&
2166             ((adev->flags & AMD_IS_APU) == 0) &&
2167             !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2168                 adev->flags |= AMD_IS_PX;
2169
2170         if (!(adev->flags & AMD_IS_APU)) {
2171                 parent = pci_upstream_bridge(adev->pdev);
2172                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2173         }
2174
2175         amdgpu_amdkfd_device_probe(adev);
2176
2177         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2178         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2179                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2180         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2181                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2182
2183         for (i = 0; i < adev->num_ip_blocks; i++) {
2184                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2185                         DRM_ERROR("disabled ip block: %d <%s>\n",
2186                                   i, adev->ip_blocks[i].version->funcs->name);
2187                         adev->ip_blocks[i].status.valid = false;
2188                 } else {
2189                         if (adev->ip_blocks[i].version->funcs->early_init) {
2190                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2191                                 if (r == -ENOENT) {
2192                                         adev->ip_blocks[i].status.valid = false;
2193                                 } else if (r) {
2194                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2195                                                   adev->ip_blocks[i].version->funcs->name, r);
2196                                         return r;
2197                                 } else {
2198                                         adev->ip_blocks[i].status.valid = true;
2199                                 }
2200                         } else {
2201                                 adev->ip_blocks[i].status.valid = true;
2202                         }
2203                 }
2204                 /* get the vbios after the asic_funcs are set up */
2205                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2206                         r = amdgpu_device_parse_gpu_info_fw(adev);
2207                         if (r)
2208                                 return r;
2209
2210                         /* Read BIOS */
2211                         if (!amdgpu_get_bios(adev))
2212                                 return -EINVAL;
2213
2214                         r = amdgpu_atombios_init(adev);
2215                         if (r) {
2216                                 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2217                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2218                                 return r;
2219                         }
2220
2221                         /*get pf2vf msg info at it's earliest time*/
2222                         if (amdgpu_sriov_vf(adev))
2223                                 amdgpu_virt_init_data_exchange(adev);
2224
2225                 }
2226         }
2227
2228         adev->cg_flags &= amdgpu_cg_mask;
2229         adev->pg_flags &= amdgpu_pg_mask;
2230
2231         return 0;
2232 }
2233
2234 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2235 {
2236         int i, r;
2237
2238         for (i = 0; i < adev->num_ip_blocks; i++) {
2239                 if (!adev->ip_blocks[i].status.sw)
2240                         continue;
2241                 if (adev->ip_blocks[i].status.hw)
2242                         continue;
2243                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2244                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2245                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2246                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2247                         if (r) {
2248                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2249                                           adev->ip_blocks[i].version->funcs->name, r);
2250                                 return r;
2251                         }
2252                         adev->ip_blocks[i].status.hw = true;
2253                 }
2254         }
2255
2256         return 0;
2257 }
2258
2259 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2260 {
2261         int i, r;
2262
2263         for (i = 0; i < adev->num_ip_blocks; i++) {
2264                 if (!adev->ip_blocks[i].status.sw)
2265                         continue;
2266                 if (adev->ip_blocks[i].status.hw)
2267                         continue;
2268                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2269                 if (r) {
2270                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2271                                   adev->ip_blocks[i].version->funcs->name, r);
2272                         return r;
2273                 }
2274                 adev->ip_blocks[i].status.hw = true;
2275         }
2276
2277         return 0;
2278 }
2279
2280 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2281 {
2282         int r = 0;
2283         int i;
2284         uint32_t smu_version;
2285
2286         if (adev->asic_type >= CHIP_VEGA10) {
2287                 for (i = 0; i < adev->num_ip_blocks; i++) {
2288                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2289                                 continue;
2290
2291                         if (!adev->ip_blocks[i].status.sw)
2292                                 continue;
2293
2294                         /* no need to do the fw loading again if already done*/
2295                         if (adev->ip_blocks[i].status.hw == true)
2296                                 break;
2297
2298                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2299                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2300                                 if (r) {
2301                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2302                                                           adev->ip_blocks[i].version->funcs->name, r);
2303                                         return r;
2304                                 }
2305                         } else {
2306                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2307                                 if (r) {
2308                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2309                                                           adev->ip_blocks[i].version->funcs->name, r);
2310                                         return r;
2311                                 }
2312                         }
2313
2314                         adev->ip_blocks[i].status.hw = true;
2315                         break;
2316                 }
2317         }
2318
2319         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2320                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2321
2322         return r;
2323 }
2324
2325 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2326 {
2327         long timeout;
2328         int r, i;
2329
2330         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2331                 struct amdgpu_ring *ring = adev->rings[i];
2332
2333                 /* No need to setup the GPU scheduler for rings that don't need it */
2334                 if (!ring || ring->no_scheduler)
2335                         continue;
2336
2337                 switch (ring->funcs->type) {
2338                 case AMDGPU_RING_TYPE_GFX:
2339                         timeout = adev->gfx_timeout;
2340                         break;
2341                 case AMDGPU_RING_TYPE_COMPUTE:
2342                         timeout = adev->compute_timeout;
2343                         break;
2344                 case AMDGPU_RING_TYPE_SDMA:
2345                         timeout = adev->sdma_timeout;
2346                         break;
2347                 default:
2348                         timeout = adev->video_timeout;
2349                         break;
2350                 }
2351
2352                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2353                                    ring->num_hw_submission, amdgpu_job_hang_limit,
2354                                    timeout, adev->reset_domain->wq,
2355                                    ring->sched_score, ring->name,
2356                                    adev->dev);
2357                 if (r) {
2358                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2359                                   ring->name);
2360                         return r;
2361                 }
2362         }
2363
2364         return 0;
2365 }
2366
2367
2368 /**
2369  * amdgpu_device_ip_init - run init for hardware IPs
2370  *
2371  * @adev: amdgpu_device pointer
2372  *
2373  * Main initialization pass for hardware IPs.  The list of all the hardware
2374  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2375  * are run.  sw_init initializes the software state associated with each IP
2376  * and hw_init initializes the hardware associated with each IP.
2377  * Returns 0 on success, negative error code on failure.
2378  */
2379 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2380 {
2381         int i, r;
2382
2383         r = amdgpu_ras_init(adev);
2384         if (r)
2385                 return r;
2386
2387         for (i = 0; i < adev->num_ip_blocks; i++) {
2388                 if (!adev->ip_blocks[i].status.valid)
2389                         continue;
2390                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2391                 if (r) {
2392                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2393                                   adev->ip_blocks[i].version->funcs->name, r);
2394                         goto init_failed;
2395                 }
2396                 adev->ip_blocks[i].status.sw = true;
2397
2398                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2399                         /* need to do common hw init early so everything is set up for gmc */
2400                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2401                         if (r) {
2402                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2403                                 goto init_failed;
2404                         }
2405                         adev->ip_blocks[i].status.hw = true;
2406                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2407                         /* need to do gmc hw init early so we can allocate gpu mem */
2408                         /* Try to reserve bad pages early */
2409                         if (amdgpu_sriov_vf(adev))
2410                                 amdgpu_virt_exchange_data(adev);
2411
2412                         r = amdgpu_device_vram_scratch_init(adev);
2413                         if (r) {
2414                                 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2415                                 goto init_failed;
2416                         }
2417                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2418                         if (r) {
2419                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2420                                 goto init_failed;
2421                         }
2422                         r = amdgpu_device_wb_init(adev);
2423                         if (r) {
2424                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2425                                 goto init_failed;
2426                         }
2427                         adev->ip_blocks[i].status.hw = true;
2428
2429                         /* right after GMC hw init, we create CSA */
2430                         if (amdgpu_mcbp) {
2431                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2432                                                                 AMDGPU_GEM_DOMAIN_VRAM,
2433                                                                 AMDGPU_CSA_SIZE);
2434                                 if (r) {
2435                                         DRM_ERROR("allocate CSA failed %d\n", r);
2436                                         goto init_failed;
2437                                 }
2438                         }
2439                 }
2440         }
2441
2442         if (amdgpu_sriov_vf(adev))
2443                 amdgpu_virt_init_data_exchange(adev);
2444
2445         r = amdgpu_ib_pool_init(adev);
2446         if (r) {
2447                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2448                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2449                 goto init_failed;
2450         }
2451
2452         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2453         if (r)
2454                 goto init_failed;
2455
2456         r = amdgpu_device_ip_hw_init_phase1(adev);
2457         if (r)
2458                 goto init_failed;
2459
2460         r = amdgpu_device_fw_loading(adev);
2461         if (r)
2462                 goto init_failed;
2463
2464         r = amdgpu_device_ip_hw_init_phase2(adev);
2465         if (r)
2466                 goto init_failed;
2467
2468         /*
2469          * retired pages will be loaded from eeprom and reserved here,
2470          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2471          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2472          * for I2C communication which only true at this point.
2473          *
2474          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2475          * failure from bad gpu situation and stop amdgpu init process
2476          * accordingly. For other failed cases, it will still release all
2477          * the resource and print error message, rather than returning one
2478          * negative value to upper level.
2479          *
2480          * Note: theoretically, this should be called before all vram allocations
2481          * to protect retired page from abusing
2482          */
2483         r = amdgpu_ras_recovery_init(adev);
2484         if (r)
2485                 goto init_failed;
2486
2487         /**
2488          * In case of XGMI grab extra reference for reset domain for this device
2489          */
2490         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2491                 if (amdgpu_xgmi_add_device(adev) == 0) {
2492                         if (!amdgpu_sriov_vf(adev)) {
2493                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2494
2495                                 if (WARN_ON(!hive)) {
2496                                         r = -ENOENT;
2497                                         goto init_failed;
2498                                 }
2499
2500                                 if (!hive->reset_domain ||
2501                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2502                                         r = -ENOENT;
2503                                         amdgpu_put_xgmi_hive(hive);
2504                                         goto init_failed;
2505                                 }
2506
2507                                 /* Drop the early temporary reset domain we created for device */
2508                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2509                                 adev->reset_domain = hive->reset_domain;
2510                                 amdgpu_put_xgmi_hive(hive);
2511                         }
2512                 }
2513         }
2514
2515         r = amdgpu_device_init_schedulers(adev);
2516         if (r)
2517                 goto init_failed;
2518
2519         /* Don't init kfd if whole hive need to be reset during init */
2520         if (!adev->gmc.xgmi.pending_reset)
2521                 amdgpu_amdkfd_device_init(adev);
2522
2523         amdgpu_fru_get_product_info(adev);
2524
2525 init_failed:
2526         if (amdgpu_sriov_vf(adev))
2527                 amdgpu_virt_release_full_gpu(adev, true);
2528
2529         return r;
2530 }
2531
2532 /**
2533  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2534  *
2535  * @adev: amdgpu_device pointer
2536  *
2537  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2538  * this function before a GPU reset.  If the value is retained after a
2539  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2540  */
2541 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2542 {
2543         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2544 }
2545
2546 /**
2547  * amdgpu_device_check_vram_lost - check if vram is valid
2548  *
2549  * @adev: amdgpu_device pointer
2550  *
2551  * Checks the reset magic value written to the gart pointer in VRAM.
2552  * The driver calls this after a GPU reset to see if the contents of
2553  * VRAM is lost or now.
2554  * returns true if vram is lost, false if not.
2555  */
2556 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2557 {
2558         if (memcmp(adev->gart.ptr, adev->reset_magic,
2559                         AMDGPU_RESET_MAGIC_NUM))
2560                 return true;
2561
2562         if (!amdgpu_in_reset(adev))
2563                 return false;
2564
2565         /*
2566          * For all ASICs with baco/mode1 reset, the VRAM is
2567          * always assumed to be lost.
2568          */
2569         switch (amdgpu_asic_reset_method(adev)) {
2570         case AMD_RESET_METHOD_BACO:
2571         case AMD_RESET_METHOD_MODE1:
2572                 return true;
2573         default:
2574                 return false;
2575         }
2576 }
2577
2578 /**
2579  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2580  *
2581  * @adev: amdgpu_device pointer
2582  * @state: clockgating state (gate or ungate)
2583  *
2584  * The list of all the hardware IPs that make up the asic is walked and the
2585  * set_clockgating_state callbacks are run.
2586  * Late initialization pass enabling clockgating for hardware IPs.
2587  * Fini or suspend, pass disabling clockgating for hardware IPs.
2588  * Returns 0 on success, negative error code on failure.
2589  */
2590
2591 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2592                                enum amd_clockgating_state state)
2593 {
2594         int i, j, r;
2595
2596         if (amdgpu_emu_mode == 1)
2597                 return 0;
2598
2599         for (j = 0; j < adev->num_ip_blocks; j++) {
2600                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2601                 if (!adev->ip_blocks[i].status.late_initialized)
2602                         continue;
2603                 /* skip CG for GFX on S0ix */
2604                 if (adev->in_s0ix &&
2605                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2606                         continue;
2607                 /* skip CG for VCE/UVD, it's handled specially */
2608                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2609                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2610                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2611                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2612                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2613                         /* enable clockgating to save power */
2614                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2615                                                                                      state);
2616                         if (r) {
2617                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2618                                           adev->ip_blocks[i].version->funcs->name, r);
2619                                 return r;
2620                         }
2621                 }
2622         }
2623
2624         return 0;
2625 }
2626
2627 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2628                                enum amd_powergating_state state)
2629 {
2630         int i, j, r;
2631
2632         if (amdgpu_emu_mode == 1)
2633                 return 0;
2634
2635         for (j = 0; j < adev->num_ip_blocks; j++) {
2636                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2637                 if (!adev->ip_blocks[i].status.late_initialized)
2638                         continue;
2639                 /* skip PG for GFX on S0ix */
2640                 if (adev->in_s0ix &&
2641                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2642                         continue;
2643                 /* skip CG for VCE/UVD, it's handled specially */
2644                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2645                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2646                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2647                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2648                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
2649                         /* enable powergating to save power */
2650                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2651                                                                                         state);
2652                         if (r) {
2653                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2654                                           adev->ip_blocks[i].version->funcs->name, r);
2655                                 return r;
2656                         }
2657                 }
2658         }
2659         return 0;
2660 }
2661
2662 static int amdgpu_device_enable_mgpu_fan_boost(void)
2663 {
2664         struct amdgpu_gpu_instance *gpu_ins;
2665         struct amdgpu_device *adev;
2666         int i, ret = 0;
2667
2668         mutex_lock(&mgpu_info.mutex);
2669
2670         /*
2671          * MGPU fan boost feature should be enabled
2672          * only when there are two or more dGPUs in
2673          * the system
2674          */
2675         if (mgpu_info.num_dgpu < 2)
2676                 goto out;
2677
2678         for (i = 0; i < mgpu_info.num_dgpu; i++) {
2679                 gpu_ins = &(mgpu_info.gpu_ins[i]);
2680                 adev = gpu_ins->adev;
2681                 if (!(adev->flags & AMD_IS_APU) &&
2682                     !gpu_ins->mgpu_fan_enabled) {
2683                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2684                         if (ret)
2685                                 break;
2686
2687                         gpu_ins->mgpu_fan_enabled = 1;
2688                 }
2689         }
2690
2691 out:
2692         mutex_unlock(&mgpu_info.mutex);
2693
2694         return ret;
2695 }
2696
2697 /**
2698  * amdgpu_device_ip_late_init - run late init for hardware IPs
2699  *
2700  * @adev: amdgpu_device pointer
2701  *
2702  * Late initialization pass for hardware IPs.  The list of all the hardware
2703  * IPs that make up the asic is walked and the late_init callbacks are run.
2704  * late_init covers any special initialization that an IP requires
2705  * after all of the have been initialized or something that needs to happen
2706  * late in the init process.
2707  * Returns 0 on success, negative error code on failure.
2708  */
2709 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2710 {
2711         struct amdgpu_gpu_instance *gpu_instance;
2712         int i = 0, r;
2713
2714         for (i = 0; i < adev->num_ip_blocks; i++) {
2715                 if (!adev->ip_blocks[i].status.hw)
2716                         continue;
2717                 if (adev->ip_blocks[i].version->funcs->late_init) {
2718                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2719                         if (r) {
2720                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2721                                           adev->ip_blocks[i].version->funcs->name, r);
2722                                 return r;
2723                         }
2724                 }
2725                 adev->ip_blocks[i].status.late_initialized = true;
2726         }
2727
2728         r = amdgpu_ras_late_init(adev);
2729         if (r) {
2730                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2731                 return r;
2732         }
2733
2734         amdgpu_ras_set_error_query_ready(adev, true);
2735
2736         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2737         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2738
2739         amdgpu_device_fill_reset_magic(adev);
2740
2741         r = amdgpu_device_enable_mgpu_fan_boost();
2742         if (r)
2743                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2744
2745         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2746         if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2747                                adev->asic_type == CHIP_ALDEBARAN ))
2748                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2749
2750         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2751                 mutex_lock(&mgpu_info.mutex);
2752
2753                 /*
2754                  * Reset device p-state to low as this was booted with high.
2755                  *
2756                  * This should be performed only after all devices from the same
2757                  * hive get initialized.
2758                  *
2759                  * However, it's unknown how many device in the hive in advance.
2760                  * As this is counted one by one during devices initializations.
2761                  *
2762                  * So, we wait for all XGMI interlinked devices initialized.
2763                  * This may bring some delays as those devices may come from
2764                  * different hives. But that should be OK.
2765                  */
2766                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2767                         for (i = 0; i < mgpu_info.num_gpu; i++) {
2768                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
2769                                 if (gpu_instance->adev->flags & AMD_IS_APU)
2770                                         continue;
2771
2772                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2773                                                 AMDGPU_XGMI_PSTATE_MIN);
2774                                 if (r) {
2775                                         DRM_ERROR("pstate setting failed (%d).\n", r);
2776                                         break;
2777                                 }
2778                         }
2779                 }
2780
2781                 mutex_unlock(&mgpu_info.mutex);
2782         }
2783
2784         return 0;
2785 }
2786
2787 /**
2788  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2789  *
2790  * @adev: amdgpu_device pointer
2791  *
2792  * For ASICs need to disable SMC first
2793  */
2794 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2795 {
2796         int i, r;
2797
2798         if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2799                 return;
2800
2801         for (i = 0; i < adev->num_ip_blocks; i++) {
2802                 if (!adev->ip_blocks[i].status.hw)
2803                         continue;
2804                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2805                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2806                         /* XXX handle errors */
2807                         if (r) {
2808                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2809                                           adev->ip_blocks[i].version->funcs->name, r);
2810                         }
2811                         adev->ip_blocks[i].status.hw = false;
2812                         break;
2813                 }
2814         }
2815 }
2816
2817 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2818 {
2819         int i, r;
2820
2821         for (i = 0; i < adev->num_ip_blocks; i++) {
2822                 if (!adev->ip_blocks[i].version->funcs->early_fini)
2823                         continue;
2824
2825                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2826                 if (r) {
2827                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2828                                   adev->ip_blocks[i].version->funcs->name, r);
2829                 }
2830         }
2831
2832         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2833         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2834
2835         amdgpu_amdkfd_suspend(adev, false);
2836
2837         /* Workaroud for ASICs need to disable SMC first */
2838         amdgpu_device_smu_fini_early(adev);
2839
2840         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2841                 if (!adev->ip_blocks[i].status.hw)
2842                         continue;
2843
2844                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2845                 /* XXX handle errors */
2846                 if (r) {
2847                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2848                                   adev->ip_blocks[i].version->funcs->name, r);
2849                 }
2850
2851                 adev->ip_blocks[i].status.hw = false;
2852         }
2853
2854         if (amdgpu_sriov_vf(adev)) {
2855                 if (amdgpu_virt_release_full_gpu(adev, false))
2856                         DRM_ERROR("failed to release exclusive mode on fini\n");
2857         }
2858
2859         return 0;
2860 }
2861
2862 /**
2863  * amdgpu_device_ip_fini - run fini for hardware IPs
2864  *
2865  * @adev: amdgpu_device pointer
2866  *
2867  * Main teardown pass for hardware IPs.  The list of all the hardware
2868  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2869  * are run.  hw_fini tears down the hardware associated with each IP
2870  * and sw_fini tears down any software state associated with each IP.
2871  * Returns 0 on success, negative error code on failure.
2872  */
2873 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2874 {
2875         int i, r;
2876
2877         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2878                 amdgpu_virt_release_ras_err_handler_data(adev);
2879
2880         if (adev->gmc.xgmi.num_physical_nodes > 1)
2881                 amdgpu_xgmi_remove_device(adev);
2882
2883         amdgpu_amdkfd_device_fini_sw(adev);
2884
2885         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2886                 if (!adev->ip_blocks[i].status.sw)
2887                         continue;
2888
2889                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2890                         amdgpu_ucode_free_bo(adev);
2891                         amdgpu_free_static_csa(&adev->virt.csa_obj);
2892                         amdgpu_device_wb_fini(adev);
2893                         amdgpu_device_vram_scratch_fini(adev);
2894                         amdgpu_ib_pool_fini(adev);
2895                 }
2896
2897                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2898                 /* XXX handle errors */
2899                 if (r) {
2900                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2901                                   adev->ip_blocks[i].version->funcs->name, r);
2902                 }
2903                 adev->ip_blocks[i].status.sw = false;
2904                 adev->ip_blocks[i].status.valid = false;
2905         }
2906
2907         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2908                 if (!adev->ip_blocks[i].status.late_initialized)
2909                         continue;
2910                 if (adev->ip_blocks[i].version->funcs->late_fini)
2911                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2912                 adev->ip_blocks[i].status.late_initialized = false;
2913         }
2914
2915         amdgpu_ras_fini(adev);
2916
2917         return 0;
2918 }
2919
2920 /**
2921  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2922  *
2923  * @work: work_struct.
2924  */
2925 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2926 {
2927         struct amdgpu_device *adev =
2928                 container_of(work, struct amdgpu_device, delayed_init_work.work);
2929         int r;
2930
2931         r = amdgpu_ib_ring_tests(adev);
2932         if (r)
2933                 DRM_ERROR("ib ring test failed (%d).\n", r);
2934 }
2935
2936 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2937 {
2938         struct amdgpu_device *adev =
2939                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2940
2941         WARN_ON_ONCE(adev->gfx.gfx_off_state);
2942         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2943
2944         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2945                 adev->gfx.gfx_off_state = true;
2946 }
2947
2948 /**
2949  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2950  *
2951  * @adev: amdgpu_device pointer
2952  *
2953  * Main suspend function for hardware IPs.  The list of all the hardware
2954  * IPs that make up the asic is walked, clockgating is disabled and the
2955  * suspend callbacks are run.  suspend puts the hardware and software state
2956  * in each IP into a state suitable for suspend.
2957  * Returns 0 on success, negative error code on failure.
2958  */
2959 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2960 {
2961         int i, r;
2962
2963         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2964         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2965
2966         /*
2967          * Per PMFW team's suggestion, driver needs to handle gfxoff
2968          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2969          * scenario. Add the missing df cstate disablement here.
2970          */
2971         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2972                 dev_warn(adev->dev, "Failed to disallow df cstate");
2973
2974         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2975                 if (!adev->ip_blocks[i].status.valid)
2976                         continue;
2977
2978                 /* displays are handled separately */
2979                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2980                         continue;
2981
2982                 /* XXX handle errors */
2983                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2984                 /* XXX handle errors */
2985                 if (r) {
2986                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
2987                                   adev->ip_blocks[i].version->funcs->name, r);
2988                         return r;
2989                 }
2990
2991                 adev->ip_blocks[i].status.hw = false;
2992         }
2993
2994         return 0;
2995 }
2996
2997 /**
2998  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2999  *
3000  * @adev: amdgpu_device pointer
3001  *
3002  * Main suspend function for hardware IPs.  The list of all the hardware
3003  * IPs that make up the asic is walked, clockgating is disabled and the
3004  * suspend callbacks are run.  suspend puts the hardware and software state
3005  * in each IP into a state suitable for suspend.
3006  * Returns 0 on success, negative error code on failure.
3007  */
3008 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3009 {
3010         int i, r;
3011
3012         if (adev->in_s0ix)
3013                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3014
3015         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3016                 if (!adev->ip_blocks[i].status.valid)
3017                         continue;
3018                 /* displays are handled in phase1 */
3019                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3020                         continue;
3021                 /* PSP lost connection when err_event_athub occurs */
3022                 if (amdgpu_ras_intr_triggered() &&
3023                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3024                         adev->ip_blocks[i].status.hw = false;
3025                         continue;
3026                 }
3027
3028                 /* skip unnecessary suspend if we do not initialize them yet */
3029                 if (adev->gmc.xgmi.pending_reset &&
3030                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3031                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3032                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3033                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3034                         adev->ip_blocks[i].status.hw = false;
3035                         continue;
3036                 }
3037
3038                 /* skip suspend of gfx/mes and psp for S0ix
3039                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3040                  * like at runtime. PSP is also part of the always on hardware
3041                  * so no need to suspend it.
3042                  */
3043                 if (adev->in_s0ix &&
3044                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3045                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3046                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3047                         continue;
3048
3049                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3050                 if (adev->in_s0ix &&
3051                     (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3052                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3053                         continue;
3054
3055                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3056                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3057                  * from this location and RLC Autoload automatically also gets loaded
3058                  * from here based on PMFW -> PSP message during re-init sequence.
3059                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3060                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3061                  */
3062                 if (amdgpu_in_reset(adev) &&
3063                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3064                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3065                         continue;
3066
3067                 /* XXX handle errors */
3068                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3069                 /* XXX handle errors */
3070                 if (r) {
3071                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3072                                   adev->ip_blocks[i].version->funcs->name, r);
3073                 }
3074                 adev->ip_blocks[i].status.hw = false;
3075                 /* handle putting the SMC in the appropriate state */
3076                 if(!amdgpu_sriov_vf(adev)){
3077                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3078                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3079                                 if (r) {
3080                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3081                                                         adev->mp1_state, r);
3082                                         return r;
3083                                 }
3084                         }
3085                 }
3086         }
3087
3088         return 0;
3089 }
3090
3091 /**
3092  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3093  *
3094  * @adev: amdgpu_device pointer
3095  *
3096  * Main suspend function for hardware IPs.  The list of all the hardware
3097  * IPs that make up the asic is walked, clockgating is disabled and the
3098  * suspend callbacks are run.  suspend puts the hardware and software state
3099  * in each IP into a state suitable for suspend.
3100  * Returns 0 on success, negative error code on failure.
3101  */
3102 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3103 {
3104         int r;
3105
3106         if (amdgpu_sriov_vf(adev)) {
3107                 amdgpu_virt_fini_data_exchange(adev);
3108                 amdgpu_virt_request_full_gpu(adev, false);
3109         }
3110
3111         r = amdgpu_device_ip_suspend_phase1(adev);
3112         if (r)
3113                 return r;
3114         r = amdgpu_device_ip_suspend_phase2(adev);
3115
3116         if (amdgpu_sriov_vf(adev))
3117                 amdgpu_virt_release_full_gpu(adev, false);
3118
3119         return r;
3120 }
3121
3122 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3123 {
3124         int i, r;
3125
3126         static enum amd_ip_block_type ip_order[] = {
3127                 AMD_IP_BLOCK_TYPE_COMMON,
3128                 AMD_IP_BLOCK_TYPE_GMC,
3129                 AMD_IP_BLOCK_TYPE_PSP,
3130                 AMD_IP_BLOCK_TYPE_IH,
3131         };
3132
3133         for (i = 0; i < adev->num_ip_blocks; i++) {
3134                 int j;
3135                 struct amdgpu_ip_block *block;
3136
3137                 block = &adev->ip_blocks[i];
3138                 block->status.hw = false;
3139
3140                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3141
3142                         if (block->version->type != ip_order[j] ||
3143                                 !block->status.valid)
3144                                 continue;
3145
3146                         r = block->version->funcs->hw_init(adev);
3147                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3148                         if (r)
3149                                 return r;
3150                         block->status.hw = true;
3151                 }
3152         }
3153
3154         return 0;
3155 }
3156
3157 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3158 {
3159         int i, r;
3160
3161         static enum amd_ip_block_type ip_order[] = {
3162                 AMD_IP_BLOCK_TYPE_SMC,
3163                 AMD_IP_BLOCK_TYPE_DCE,
3164                 AMD_IP_BLOCK_TYPE_GFX,
3165                 AMD_IP_BLOCK_TYPE_SDMA,
3166                 AMD_IP_BLOCK_TYPE_UVD,
3167                 AMD_IP_BLOCK_TYPE_VCE,
3168                 AMD_IP_BLOCK_TYPE_VCN
3169         };
3170
3171         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3172                 int j;
3173                 struct amdgpu_ip_block *block;
3174
3175                 for (j = 0; j < adev->num_ip_blocks; j++) {
3176                         block = &adev->ip_blocks[j];
3177
3178                         if (block->version->type != ip_order[i] ||
3179                                 !block->status.valid ||
3180                                 block->status.hw)
3181                                 continue;
3182
3183                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3184                                 r = block->version->funcs->resume(adev);
3185                         else
3186                                 r = block->version->funcs->hw_init(adev);
3187
3188                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3189                         if (r)
3190                                 return r;
3191                         block->status.hw = true;
3192                 }
3193         }
3194
3195         return 0;
3196 }
3197
3198 /**
3199  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3200  *
3201  * @adev: amdgpu_device pointer
3202  *
3203  * First resume function for hardware IPs.  The list of all the hardware
3204  * IPs that make up the asic is walked and the resume callbacks are run for
3205  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3206  * after a suspend and updates the software state as necessary.  This
3207  * function is also used for restoring the GPU after a GPU reset.
3208  * Returns 0 on success, negative error code on failure.
3209  */
3210 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3211 {
3212         int i, r;
3213
3214         for (i = 0; i < adev->num_ip_blocks; i++) {
3215                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3216                         continue;
3217                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3218                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3219                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3220                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3221
3222                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3223                         if (r) {
3224                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3225                                           adev->ip_blocks[i].version->funcs->name, r);
3226                                 return r;
3227                         }
3228                         adev->ip_blocks[i].status.hw = true;
3229                 }
3230         }
3231
3232         return 0;
3233 }
3234
3235 /**
3236  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3237  *
3238  * @adev: amdgpu_device pointer
3239  *
3240  * First resume function for hardware IPs.  The list of all the hardware
3241  * IPs that make up the asic is walked and the resume callbacks are run for
3242  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3243  * functional state after a suspend and updates the software state as
3244  * necessary.  This function is also used for restoring the GPU after a GPU
3245  * reset.
3246  * Returns 0 on success, negative error code on failure.
3247  */
3248 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3249 {
3250         int i, r;
3251
3252         for (i = 0; i < adev->num_ip_blocks; i++) {
3253                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3254                         continue;
3255                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3256                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3257                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3258                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3259                         continue;
3260                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3261                 if (r) {
3262                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3263                                   adev->ip_blocks[i].version->funcs->name, r);
3264                         return r;
3265                 }
3266                 adev->ip_blocks[i].status.hw = true;
3267
3268                 if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3269                         /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
3270                          * amdgpu_device_resume() after IP resume.
3271                          */
3272                         amdgpu_gfx_off_ctrl(adev, false);
3273                         DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
3274                 }
3275
3276         }
3277
3278         return 0;
3279 }
3280
3281 /**
3282  * amdgpu_device_ip_resume - run resume for hardware IPs
3283  *
3284  * @adev: amdgpu_device pointer
3285  *
3286  * Main resume function for hardware IPs.  The hardware IPs
3287  * are split into two resume functions because they are
3288  * are also used in in recovering from a GPU reset and some additional
3289  * steps need to be take between them.  In this case (S3/S4) they are
3290  * run sequentially.
3291  * Returns 0 on success, negative error code on failure.
3292  */
3293 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3294 {
3295         int r;
3296
3297         r = amdgpu_amdkfd_resume_iommu(adev);
3298         if (r)
3299                 return r;
3300
3301         r = amdgpu_device_ip_resume_phase1(adev);
3302         if (r)
3303                 return r;
3304
3305         r = amdgpu_device_fw_loading(adev);
3306         if (r)
3307                 return r;
3308
3309         r = amdgpu_device_ip_resume_phase2(adev);
3310
3311         return r;
3312 }
3313
3314 /**
3315  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3316  *
3317  * @adev: amdgpu_device pointer
3318  *
3319  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3320  */
3321 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3322 {
3323         if (amdgpu_sriov_vf(adev)) {
3324                 if (adev->is_atom_fw) {
3325                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3326                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3327                 } else {
3328                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3329                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3330                 }
3331
3332                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3333                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3334         }
3335 }
3336
3337 /**
3338  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3339  *
3340  * @asic_type: AMD asic type
3341  *
3342  * Check if there is DC (new modesetting infrastructre) support for an asic.
3343  * returns true if DC has support, false if not.
3344  */
3345 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3346 {
3347         switch (asic_type) {
3348 #ifdef CONFIG_DRM_AMDGPU_SI
3349         case CHIP_HAINAN:
3350 #endif
3351         case CHIP_TOPAZ:
3352                 /* chips with no display hardware */
3353                 return false;
3354 #if defined(CONFIG_DRM_AMD_DC)
3355         case CHIP_TAHITI:
3356         case CHIP_PITCAIRN:
3357         case CHIP_VERDE:
3358         case CHIP_OLAND:
3359                 /*
3360                  * We have systems in the wild with these ASICs that require
3361                  * LVDS and VGA support which is not supported with DC.
3362                  *
3363                  * Fallback to the non-DC driver here by default so as not to
3364                  * cause regressions.
3365                  */
3366 #if defined(CONFIG_DRM_AMD_DC_SI)
3367                 return amdgpu_dc > 0;
3368 #else
3369                 return false;
3370 #endif
3371         case CHIP_BONAIRE:
3372         case CHIP_KAVERI:
3373         case CHIP_KABINI:
3374         case CHIP_MULLINS:
3375                 /*
3376                  * We have systems in the wild with these ASICs that require
3377                  * VGA support which is not supported with DC.
3378                  *
3379                  * Fallback to the non-DC driver here by default so as not to
3380                  * cause regressions.
3381                  */
3382                 return amdgpu_dc > 0;
3383         default:
3384                 return amdgpu_dc != 0;
3385 #else
3386         default:
3387                 if (amdgpu_dc > 0)
3388                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3389                                          "but isn't supported by ASIC, ignoring\n");
3390                 return false;
3391 #endif
3392         }
3393 }
3394
3395 /**
3396  * amdgpu_device_has_dc_support - check if dc is supported
3397  *
3398  * @adev: amdgpu_device pointer
3399  *
3400  * Returns true for supported, false for not supported
3401  */
3402 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3403 {
3404         if (adev->enable_virtual_display ||
3405             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3406                 return false;
3407
3408         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3409 }
3410
3411 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3412 {
3413         struct amdgpu_device *adev =
3414                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3415         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3416
3417         /* It's a bug to not have a hive within this function */
3418         if (WARN_ON(!hive))
3419                 return;
3420
3421         /*
3422          * Use task barrier to synchronize all xgmi reset works across the
3423          * hive. task_barrier_enter and task_barrier_exit will block
3424          * until all the threads running the xgmi reset works reach
3425          * those points. task_barrier_full will do both blocks.
3426          */
3427         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3428
3429                 task_barrier_enter(&hive->tb);
3430                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3431
3432                 if (adev->asic_reset_res)
3433                         goto fail;
3434
3435                 task_barrier_exit(&hive->tb);
3436                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3437
3438                 if (adev->asic_reset_res)
3439                         goto fail;
3440
3441                 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3442                     adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3443                         adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3444         } else {
3445
3446                 task_barrier_full(&hive->tb);
3447                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3448         }
3449
3450 fail:
3451         if (adev->asic_reset_res)
3452                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3453                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3454         amdgpu_put_xgmi_hive(hive);
3455 }
3456
3457 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3458 {
3459         char *input = amdgpu_lockup_timeout;
3460         char *timeout_setting = NULL;
3461         int index = 0;
3462         long timeout;
3463         int ret = 0;
3464
3465         /*
3466          * By default timeout for non compute jobs is 10000
3467          * and 60000 for compute jobs.
3468          * In SR-IOV or passthrough mode, timeout for compute
3469          * jobs are 60000 by default.
3470          */
3471         adev->gfx_timeout = msecs_to_jiffies(10000);
3472         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3473         if (amdgpu_sriov_vf(adev))
3474                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3475                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3476         else
3477                 adev->compute_timeout =  msecs_to_jiffies(60000);
3478
3479         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3480                 while ((timeout_setting = strsep(&input, ",")) &&
3481                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3482                         ret = kstrtol(timeout_setting, 0, &timeout);
3483                         if (ret)
3484                                 return ret;
3485
3486                         if (timeout == 0) {
3487                                 index++;
3488                                 continue;
3489                         } else if (timeout < 0) {
3490                                 timeout = MAX_SCHEDULE_TIMEOUT;
3491                                 dev_warn(adev->dev, "lockup timeout disabled");
3492                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3493                         } else {
3494                                 timeout = msecs_to_jiffies(timeout);
3495                         }
3496
3497                         switch (index++) {
3498                         case 0:
3499                                 adev->gfx_timeout = timeout;
3500                                 break;
3501                         case 1:
3502                                 adev->compute_timeout = timeout;
3503                                 break;
3504                         case 2:
3505                                 adev->sdma_timeout = timeout;
3506                                 break;
3507                         case 3:
3508                                 adev->video_timeout = timeout;
3509                                 break;
3510                         default:
3511                                 break;
3512                         }
3513                 }
3514                 /*
3515                  * There is only one value specified and
3516                  * it should apply to all non-compute jobs.
3517                  */
3518                 if (index == 1) {
3519                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3520                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3521                                 adev->compute_timeout = adev->gfx_timeout;
3522                 }
3523         }
3524
3525         return ret;
3526 }
3527
3528 /**
3529  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3530  *
3531  * @adev: amdgpu_device pointer
3532  *
3533  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3534  */
3535 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3536 {
3537         struct iommu_domain *domain;
3538
3539         domain = iommu_get_domain_for_dev(adev->dev);
3540         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3541                 adev->ram_is_direct_mapped = true;
3542 }
3543
3544 static const struct attribute *amdgpu_dev_attributes[] = {
3545         &dev_attr_product_name.attr,
3546         &dev_attr_product_number.attr,
3547         &dev_attr_serial_number.attr,
3548         &dev_attr_pcie_replay_count.attr,
3549         NULL
3550 };
3551
3552 /**
3553  * amdgpu_device_init - initialize the driver
3554  *
3555  * @adev: amdgpu_device pointer
3556  * @flags: driver flags
3557  *
3558  * Initializes the driver info and hw (all asics).
3559  * Returns 0 for success or an error on failure.
3560  * Called at driver startup.
3561  */
3562 int amdgpu_device_init(struct amdgpu_device *adev,
3563                        uint32_t flags)
3564 {
3565         struct drm_device *ddev = adev_to_drm(adev);
3566         struct pci_dev *pdev = adev->pdev;
3567         int r, i;
3568         bool px = false;
3569         u32 max_MBps;
3570
3571         adev->shutdown = false;
3572         adev->flags = flags;
3573
3574         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3575                 adev->asic_type = amdgpu_force_asic_type;
3576         else
3577                 adev->asic_type = flags & AMD_ASIC_MASK;
3578
3579         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3580         if (amdgpu_emu_mode == 1)
3581                 adev->usec_timeout *= 10;
3582         adev->gmc.gart_size = 512 * 1024 * 1024;
3583         adev->accel_working = false;
3584         adev->num_rings = 0;
3585         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3586         adev->mman.buffer_funcs = NULL;
3587         adev->mman.buffer_funcs_ring = NULL;
3588         adev->vm_manager.vm_pte_funcs = NULL;
3589         adev->vm_manager.vm_pte_num_scheds = 0;
3590         adev->gmc.gmc_funcs = NULL;
3591         adev->harvest_ip_mask = 0x0;
3592         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3593         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3594
3595         adev->smc_rreg = &amdgpu_invalid_rreg;
3596         adev->smc_wreg = &amdgpu_invalid_wreg;
3597         adev->pcie_rreg = &amdgpu_invalid_rreg;
3598         adev->pcie_wreg = &amdgpu_invalid_wreg;
3599         adev->pciep_rreg = &amdgpu_invalid_rreg;
3600         adev->pciep_wreg = &amdgpu_invalid_wreg;
3601         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3602         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3603         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3604         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3605         adev->didt_rreg = &amdgpu_invalid_rreg;
3606         adev->didt_wreg = &amdgpu_invalid_wreg;
3607         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3608         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3609         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3610         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3611
3612         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3613                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3614                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3615
3616         /* mutex initialization are all done here so we
3617          * can recall function without having locking issues */
3618         mutex_init(&adev->firmware.mutex);
3619         mutex_init(&adev->pm.mutex);
3620         mutex_init(&adev->gfx.gpu_clock_mutex);
3621         mutex_init(&adev->srbm_mutex);
3622         mutex_init(&adev->gfx.pipe_reserve_mutex);
3623         mutex_init(&adev->gfx.gfx_off_mutex);
3624         mutex_init(&adev->grbm_idx_mutex);
3625         mutex_init(&adev->mn_lock);
3626         mutex_init(&adev->virt.vf_errors.lock);
3627         hash_init(adev->mn_hash);
3628         mutex_init(&adev->psp.mutex);
3629         mutex_init(&adev->notifier_lock);
3630         mutex_init(&adev->pm.stable_pstate_ctx_lock);
3631         mutex_init(&adev->benchmark_mutex);
3632
3633         amdgpu_device_init_apu_flags(adev);
3634
3635         r = amdgpu_device_check_arguments(adev);
3636         if (r)
3637                 return r;
3638
3639         spin_lock_init(&adev->mmio_idx_lock);
3640         spin_lock_init(&adev->smc_idx_lock);
3641         spin_lock_init(&adev->pcie_idx_lock);
3642         spin_lock_init(&adev->uvd_ctx_idx_lock);
3643         spin_lock_init(&adev->didt_idx_lock);
3644         spin_lock_init(&adev->gc_cac_idx_lock);
3645         spin_lock_init(&adev->se_cac_idx_lock);
3646         spin_lock_init(&adev->audio_endpt_idx_lock);
3647         spin_lock_init(&adev->mm_stats.lock);
3648
3649         INIT_LIST_HEAD(&adev->shadow_list);
3650         mutex_init(&adev->shadow_list_lock);
3651
3652         INIT_LIST_HEAD(&adev->reset_list);
3653
3654         INIT_LIST_HEAD(&adev->ras_list);
3655
3656         INIT_DELAYED_WORK(&adev->delayed_init_work,
3657                           amdgpu_device_delayed_init_work_handler);
3658         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3659                           amdgpu_device_delay_enable_gfx_off);
3660
3661         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3662
3663         adev->gfx.gfx_off_req_count = 1;
3664         adev->gfx.gfx_off_residency = 0;
3665         adev->gfx.gfx_off_entrycount = 0;
3666         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3667
3668         atomic_set(&adev->throttling_logging_enabled, 1);
3669         /*
3670          * If throttling continues, logging will be performed every minute
3671          * to avoid log flooding. "-1" is subtracted since the thermal
3672          * throttling interrupt comes every second. Thus, the total logging
3673          * interval is 59 seconds(retelimited printk interval) + 1(waiting
3674          * for throttling interrupt) = 60 seconds.
3675          */
3676         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3677         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3678
3679         /* Registers mapping */
3680         /* TODO: block userspace mapping of io register */
3681         if (adev->asic_type >= CHIP_BONAIRE) {
3682                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3683                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3684         } else {
3685                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3686                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3687         }
3688
3689         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3690                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3691
3692         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3693         if (adev->rmmio == NULL) {
3694                 return -ENOMEM;
3695         }
3696         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3697         DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3698
3699         amdgpu_device_get_pcie_info(adev);
3700
3701         if (amdgpu_mcbp)
3702                 DRM_INFO("MCBP is enabled\n");
3703
3704         /*
3705          * Reset domain needs to be present early, before XGMI hive discovered
3706          * (if any) and intitialized to use reset sem and in_gpu reset flag
3707          * early on during init and before calling to RREG32.
3708          */
3709         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3710         if (!adev->reset_domain)
3711                 return -ENOMEM;
3712
3713         /* detect hw virtualization here */
3714         amdgpu_detect_virtualization(adev);
3715
3716         r = amdgpu_device_get_job_timeout_settings(adev);
3717         if (r) {
3718                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3719                 return r;
3720         }
3721
3722         /* early init functions */
3723         r = amdgpu_device_ip_early_init(adev);
3724         if (r)
3725                 return r;
3726
3727         /* Get rid of things like offb */
3728         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3729         if (r)
3730                 return r;
3731
3732         /* Enable TMZ based on IP_VERSION */
3733         amdgpu_gmc_tmz_set(adev);
3734
3735         amdgpu_gmc_noretry_set(adev);
3736         /* Need to get xgmi info early to decide the reset behavior*/
3737         if (adev->gmc.xgmi.supported) {
3738                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3739                 if (r)
3740                         return r;
3741         }
3742
3743         /* enable PCIE atomic ops */
3744         if (amdgpu_sriov_vf(adev))
3745                 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3746                         adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3747                         (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3748         else
3749                 adev->have_atomics_support =
3750                         !pci_enable_atomic_ops_to_root(adev->pdev,
3751                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3752                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3753         if (!adev->have_atomics_support)
3754                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3755
3756         /* doorbell bar mapping and doorbell index init*/
3757         amdgpu_device_doorbell_init(adev);
3758
3759         if (amdgpu_emu_mode == 1) {
3760                 /* post the asic on emulation mode */
3761                 emu_soc_asic_init(adev);
3762                 goto fence_driver_init;
3763         }
3764
3765         amdgpu_reset_init(adev);
3766
3767         /* detect if we are with an SRIOV vbios */
3768         amdgpu_device_detect_sriov_bios(adev);
3769
3770         /* check if we need to reset the asic
3771          *  E.g., driver was not cleanly unloaded previously, etc.
3772          */
3773         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3774                 if (adev->gmc.xgmi.num_physical_nodes) {
3775                         dev_info(adev->dev, "Pending hive reset.\n");
3776                         adev->gmc.xgmi.pending_reset = true;
3777                         /* Only need to init necessary block for SMU to handle the reset */
3778                         for (i = 0; i < adev->num_ip_blocks; i++) {
3779                                 if (!adev->ip_blocks[i].status.valid)
3780                                         continue;
3781                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3782                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3783                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3784                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3785                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
3786                                                 adev->ip_blocks[i].version->funcs->name);
3787                                         adev->ip_blocks[i].status.hw = true;
3788                                 }
3789                         }
3790                 } else {
3791                         r = amdgpu_asic_reset(adev);
3792                         if (r) {
3793                                 dev_err(adev->dev, "asic reset on init failed\n");
3794                                 goto failed;
3795                         }
3796                 }
3797         }
3798
3799         pci_enable_pcie_error_reporting(adev->pdev);
3800
3801         /* Post card if necessary */
3802         if (amdgpu_device_need_post(adev)) {
3803                 if (!adev->bios) {
3804                         dev_err(adev->dev, "no vBIOS found\n");
3805                         r = -EINVAL;
3806                         goto failed;
3807                 }
3808                 DRM_INFO("GPU posting now...\n");
3809                 r = amdgpu_device_asic_init(adev);
3810                 if (r) {
3811                         dev_err(adev->dev, "gpu post error!\n");
3812                         goto failed;
3813                 }
3814         }
3815
3816         if (adev->is_atom_fw) {
3817                 /* Initialize clocks */
3818                 r = amdgpu_atomfirmware_get_clock_info(adev);
3819                 if (r) {
3820                         dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3821                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3822                         goto failed;
3823                 }
3824         } else {
3825                 /* Initialize clocks */
3826                 r = amdgpu_atombios_get_clock_info(adev);
3827                 if (r) {
3828                         dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3829                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3830                         goto failed;
3831                 }
3832                 /* init i2c buses */
3833                 if (!amdgpu_device_has_dc_support(adev))
3834                         amdgpu_atombios_i2c_init(adev);
3835         }
3836
3837 fence_driver_init:
3838         /* Fence driver */
3839         r = amdgpu_fence_driver_sw_init(adev);
3840         if (r) {
3841                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3842                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3843                 goto failed;
3844         }
3845
3846         /* init the mode config */
3847         drm_mode_config_init(adev_to_drm(adev));
3848
3849         r = amdgpu_device_ip_init(adev);
3850         if (r) {
3851                 /* failed in exclusive mode due to timeout */
3852                 if (amdgpu_sriov_vf(adev) &&
3853                     !amdgpu_sriov_runtime(adev) &&
3854                     amdgpu_virt_mmio_blocked(adev) &&
3855                     !amdgpu_virt_wait_reset(adev)) {
3856                         dev_err(adev->dev, "VF exclusive mode timeout\n");
3857                         /* Don't send request since VF is inactive. */
3858                         adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3859                         adev->virt.ops = NULL;
3860                         r = -EAGAIN;
3861                         goto release_ras_con;
3862                 }
3863                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3864                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3865                 goto release_ras_con;
3866         }
3867
3868         amdgpu_fence_driver_hw_init(adev);
3869
3870         dev_info(adev->dev,
3871                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3872                         adev->gfx.config.max_shader_engines,
3873                         adev->gfx.config.max_sh_per_se,
3874                         adev->gfx.config.max_cu_per_sh,
3875                         adev->gfx.cu_info.number);
3876
3877         adev->accel_working = true;
3878
3879         amdgpu_vm_check_compute_bug(adev);
3880
3881         /* Initialize the buffer migration limit. */
3882         if (amdgpu_moverate >= 0)
3883                 max_MBps = amdgpu_moverate;
3884         else
3885                 max_MBps = 8; /* Allow 8 MB/s. */
3886         /* Get a log2 for easy divisions. */
3887         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3888
3889         r = amdgpu_pm_sysfs_init(adev);
3890         if (r) {
3891                 adev->pm_sysfs_en = false;
3892                 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3893         } else
3894                 adev->pm_sysfs_en = true;
3895
3896         r = amdgpu_ucode_sysfs_init(adev);
3897         if (r) {
3898                 adev->ucode_sysfs_en = false;
3899                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3900         } else
3901                 adev->ucode_sysfs_en = true;
3902
3903         r = amdgpu_psp_sysfs_init(adev);
3904         if (r) {
3905                 adev->psp_sysfs_en = false;
3906                 if (!amdgpu_sriov_vf(adev))
3907                         DRM_ERROR("Creating psp sysfs failed\n");
3908         } else
3909                 adev->psp_sysfs_en = true;
3910
3911         /*
3912          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3913          * Otherwise the mgpu fan boost feature will be skipped due to the
3914          * gpu instance is counted less.
3915          */
3916         amdgpu_register_gpu_instance(adev);
3917
3918         /* enable clockgating, etc. after ib tests, etc. since some blocks require
3919          * explicit gating rather than handling it automatically.
3920          */
3921         if (!adev->gmc.xgmi.pending_reset) {
3922                 r = amdgpu_device_ip_late_init(adev);
3923                 if (r) {
3924                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3925                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3926                         goto release_ras_con;
3927                 }
3928                 /* must succeed. */
3929                 amdgpu_ras_resume(adev);
3930                 queue_delayed_work(system_wq, &adev->delayed_init_work,
3931                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3932         }
3933
3934         if (amdgpu_sriov_vf(adev))
3935                 flush_delayed_work(&adev->delayed_init_work);
3936
3937         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3938         if (r)
3939                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3940
3941         if (IS_ENABLED(CONFIG_PERF_EVENTS))
3942                 r = amdgpu_pmu_init(adev);
3943         if (r)
3944                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3945
3946         /* Have stored pci confspace at hand for restore in sudden PCI error */
3947         if (amdgpu_device_cache_pci_state(adev->pdev))
3948                 pci_restore_state(pdev);
3949
3950         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3951         /* this will fail for cards that aren't VGA class devices, just
3952          * ignore it */
3953         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3954                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3955
3956         px = amdgpu_device_supports_px(ddev);
3957
3958         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3959                                 apple_gmux_detect(NULL, NULL)))
3960                 vga_switcheroo_register_client(adev->pdev,
3961                                                &amdgpu_switcheroo_ops, px);
3962
3963         if (px)
3964                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3965
3966         if (adev->gmc.xgmi.pending_reset)
3967                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3968                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3969
3970         amdgpu_device_check_iommu_direct_map(adev);
3971
3972         return 0;
3973
3974 release_ras_con:
3975         amdgpu_release_ras_context(adev);
3976
3977 failed:
3978         amdgpu_vf_error_trans_all(adev);
3979
3980         return r;
3981 }
3982
3983 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3984 {
3985
3986         /* Clear all CPU mappings pointing to this device */
3987         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3988
3989         /* Unmap all mapped bars - Doorbell, registers and VRAM */
3990         amdgpu_device_doorbell_fini(adev);
3991
3992         iounmap(adev->rmmio);
3993         adev->rmmio = NULL;
3994         if (adev->mman.aper_base_kaddr)
3995                 iounmap(adev->mman.aper_base_kaddr);
3996         adev->mman.aper_base_kaddr = NULL;
3997
3998         /* Memory manager related */
3999         if (!adev->gmc.xgmi.connected_to_cpu) {
4000                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4001                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4002         }
4003 }
4004
4005 /**
4006  * amdgpu_device_fini_hw - tear down the driver
4007  *
4008  * @adev: amdgpu_device pointer
4009  *
4010  * Tear down the driver info (all asics).
4011  * Called at driver shutdown.
4012  */
4013 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4014 {
4015         dev_info(adev->dev, "amdgpu: finishing device.\n");
4016         flush_delayed_work(&adev->delayed_init_work);
4017         adev->shutdown = true;
4018
4019         /* make sure IB test finished before entering exclusive mode
4020          * to avoid preemption on IB test
4021          * */
4022         if (amdgpu_sriov_vf(adev)) {
4023                 amdgpu_virt_request_full_gpu(adev, false);
4024                 amdgpu_virt_fini_data_exchange(adev);
4025         }
4026
4027         /* disable all interrupts */
4028         amdgpu_irq_disable_all(adev);
4029         if (adev->mode_info.mode_config_initialized){
4030                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4031                         drm_helper_force_disable_all(adev_to_drm(adev));
4032                 else
4033                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4034         }
4035         amdgpu_fence_driver_hw_fini(adev);
4036
4037         if (adev->mman.initialized) {
4038                 flush_delayed_work(&adev->mman.bdev.wq);
4039                 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
4040         }
4041
4042         if (adev->pm_sysfs_en)
4043                 amdgpu_pm_sysfs_fini(adev);
4044         if (adev->ucode_sysfs_en)
4045                 amdgpu_ucode_sysfs_fini(adev);
4046         if (adev->psp_sysfs_en)
4047                 amdgpu_psp_sysfs_fini(adev);
4048         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4049
4050         /* disable ras feature must before hw fini */
4051         amdgpu_ras_pre_fini(adev);
4052
4053         amdgpu_device_ip_fini_early(adev);
4054
4055         amdgpu_irq_fini_hw(adev);
4056
4057         if (adev->mman.initialized)
4058                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4059
4060         amdgpu_gart_dummy_page_fini(adev);
4061
4062         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4063                 amdgpu_device_unmap_mmio(adev);
4064
4065 }
4066
4067 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4068 {
4069         int idx;
4070         bool px;
4071
4072         amdgpu_fence_driver_sw_fini(adev);
4073         amdgpu_device_ip_fini(adev);
4074         release_firmware(adev->firmware.gpu_info_fw);
4075         adev->firmware.gpu_info_fw = NULL;
4076         adev->accel_working = false;
4077         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4078
4079         amdgpu_reset_fini(adev);
4080
4081         /* free i2c buses */
4082         if (!amdgpu_device_has_dc_support(adev))
4083                 amdgpu_i2c_fini(adev);
4084
4085         if (amdgpu_emu_mode != 1)
4086                 amdgpu_atombios_fini(adev);
4087
4088         kfree(adev->bios);
4089         adev->bios = NULL;
4090
4091         px = amdgpu_device_supports_px(adev_to_drm(adev));
4092
4093         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4094                                 apple_gmux_detect(NULL, NULL)))
4095                 vga_switcheroo_unregister_client(adev->pdev);
4096
4097         if (px)
4098                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4099
4100         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4101                 vga_client_unregister(adev->pdev);
4102
4103         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4104
4105                 iounmap(adev->rmmio);
4106                 adev->rmmio = NULL;
4107                 amdgpu_device_doorbell_fini(adev);
4108                 drm_dev_exit(idx);
4109         }
4110
4111         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4112                 amdgpu_pmu_fini(adev);
4113         if (adev->mman.discovery_bin)
4114                 amdgpu_discovery_fini(adev);
4115
4116         amdgpu_reset_put_reset_domain(adev->reset_domain);
4117         adev->reset_domain = NULL;
4118
4119         kfree(adev->pci_state);
4120
4121 }
4122
4123 /**
4124  * amdgpu_device_evict_resources - evict device resources
4125  * @adev: amdgpu device object
4126  *
4127  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4128  * of the vram memory type. Mainly used for evicting device resources
4129  * at suspend time.
4130  *
4131  */
4132 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4133 {
4134         int ret;
4135
4136         /* No need to evict vram on APUs for suspend to ram or s2idle */
4137         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4138                 return 0;
4139
4140         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4141         if (ret)
4142                 DRM_WARN("evicting device resources failed\n");
4143         return ret;
4144 }
4145
4146 /*
4147  * Suspend & resume.
4148  */
4149 /**
4150  * amdgpu_device_suspend - initiate device suspend
4151  *
4152  * @dev: drm dev pointer
4153  * @fbcon : notify the fbdev of suspend
4154  *
4155  * Puts the hw in the suspend state (all asics).
4156  * Returns 0 for success or an error on failure.
4157  * Called at driver suspend.
4158  */
4159 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4160 {
4161         struct amdgpu_device *adev = drm_to_adev(dev);
4162         int r = 0;
4163
4164         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4165                 return 0;
4166
4167         adev->in_suspend = true;
4168
4169         /* Evict the majority of BOs before grabbing the full access */
4170         r = amdgpu_device_evict_resources(adev);
4171         if (r)
4172                 return r;
4173
4174         if (amdgpu_sriov_vf(adev)) {
4175                 amdgpu_virt_fini_data_exchange(adev);
4176                 r = amdgpu_virt_request_full_gpu(adev, false);
4177                 if (r)
4178                         return r;
4179         }
4180
4181         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4182                 DRM_WARN("smart shift update failed\n");
4183
4184         drm_kms_helper_poll_disable(dev);
4185
4186         if (fbcon)
4187                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4188
4189         cancel_delayed_work_sync(&adev->delayed_init_work);
4190
4191         amdgpu_ras_suspend(adev);
4192
4193         amdgpu_device_ip_suspend_phase1(adev);
4194
4195         if (!adev->in_s0ix)
4196                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4197
4198         r = amdgpu_device_evict_resources(adev);
4199         if (r)
4200                 return r;
4201
4202         amdgpu_fence_driver_hw_fini(adev);
4203
4204         amdgpu_device_ip_suspend_phase2(adev);
4205
4206         if (amdgpu_sriov_vf(adev))
4207                 amdgpu_virt_release_full_gpu(adev, false);
4208
4209         return 0;
4210 }
4211
4212 /**
4213  * amdgpu_device_resume - initiate device resume
4214  *
4215  * @dev: drm dev pointer
4216  * @fbcon : notify the fbdev of resume
4217  *
4218  * Bring the hw back to operating state (all asics).
4219  * Returns 0 for success or an error on failure.
4220  * Called at driver resume.
4221  */
4222 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4223 {
4224         struct amdgpu_device *adev = drm_to_adev(dev);
4225         int r = 0;
4226
4227         if (amdgpu_sriov_vf(adev)) {
4228                 r = amdgpu_virt_request_full_gpu(adev, true);
4229                 if (r)
4230                         return r;
4231         }
4232
4233         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4234                 return 0;
4235
4236         if (adev->in_s0ix)
4237                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4238
4239         /* post card */
4240         if (amdgpu_device_need_post(adev)) {
4241                 r = amdgpu_device_asic_init(adev);
4242                 if (r)
4243                         dev_err(adev->dev, "amdgpu asic init failed\n");
4244         }
4245
4246         r = amdgpu_device_ip_resume(adev);
4247
4248         if (r) {
4249                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4250                 goto exit;
4251         }
4252         amdgpu_fence_driver_hw_init(adev);
4253
4254         r = amdgpu_device_ip_late_init(adev);
4255         if (r)
4256                 goto exit;
4257
4258         queue_delayed_work(system_wq, &adev->delayed_init_work,
4259                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4260
4261         if (!adev->in_s0ix) {
4262                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4263                 if (r)
4264                         goto exit;
4265         }
4266
4267 exit:
4268         if (amdgpu_sriov_vf(adev)) {
4269                 amdgpu_virt_init_data_exchange(adev);
4270                 amdgpu_virt_release_full_gpu(adev, true);
4271         }
4272
4273         if (r)
4274                 return r;
4275
4276         /* Make sure IB tests flushed */
4277         flush_delayed_work(&adev->delayed_init_work);
4278
4279         if (adev->in_s0ix) {
4280                 /* re-enable gfxoff after IP resume. This re-enables gfxoff after
4281                  * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
4282                  */
4283                 amdgpu_gfx_off_ctrl(adev, true);
4284                 DRM_DEBUG("will enable gfxoff for the mission mode\n");
4285         }
4286         if (fbcon)
4287                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4288
4289         drm_kms_helper_poll_enable(dev);
4290
4291         amdgpu_ras_resume(adev);
4292
4293         if (adev->mode_info.num_crtc) {
4294                 /*
4295                  * Most of the connector probing functions try to acquire runtime pm
4296                  * refs to ensure that the GPU is powered on when connector polling is
4297                  * performed. Since we're calling this from a runtime PM callback,
4298                  * trying to acquire rpm refs will cause us to deadlock.
4299                  *
4300                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4301                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4302                  */
4303 #ifdef CONFIG_PM
4304                 dev->dev->power.disable_depth++;
4305 #endif
4306                 if (!adev->dc_enabled)
4307                         drm_helper_hpd_irq_event(dev);
4308                 else
4309                         drm_kms_helper_hotplug_event(dev);
4310 #ifdef CONFIG_PM
4311                 dev->dev->power.disable_depth--;
4312 #endif
4313         }
4314         adev->in_suspend = false;
4315
4316         if (adev->enable_mes)
4317                 amdgpu_mes_self_test(adev);
4318
4319         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4320                 DRM_WARN("smart shift update failed\n");
4321
4322         return 0;
4323 }
4324
4325 /**
4326  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4327  *
4328  * @adev: amdgpu_device pointer
4329  *
4330  * The list of all the hardware IPs that make up the asic is walked and
4331  * the check_soft_reset callbacks are run.  check_soft_reset determines
4332  * if the asic is still hung or not.
4333  * Returns true if any of the IPs are still in a hung state, false if not.
4334  */
4335 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4336 {
4337         int i;
4338         bool asic_hang = false;
4339
4340         if (amdgpu_sriov_vf(adev))
4341                 return true;
4342
4343         if (amdgpu_asic_need_full_reset(adev))
4344                 return true;
4345
4346         for (i = 0; i < adev->num_ip_blocks; i++) {
4347                 if (!adev->ip_blocks[i].status.valid)
4348                         continue;
4349                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4350                         adev->ip_blocks[i].status.hang =
4351                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4352                 if (adev->ip_blocks[i].status.hang) {
4353                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4354                         asic_hang = true;
4355                 }
4356         }
4357         return asic_hang;
4358 }
4359
4360 /**
4361  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4362  *
4363  * @adev: amdgpu_device pointer
4364  *
4365  * The list of all the hardware IPs that make up the asic is walked and the
4366  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4367  * handles any IP specific hardware or software state changes that are
4368  * necessary for a soft reset to succeed.
4369  * Returns 0 on success, negative error code on failure.
4370  */
4371 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4372 {
4373         int i, r = 0;
4374
4375         for (i = 0; i < adev->num_ip_blocks; i++) {
4376                 if (!adev->ip_blocks[i].status.valid)
4377                         continue;
4378                 if (adev->ip_blocks[i].status.hang &&
4379                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4380                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4381                         if (r)
4382                                 return r;
4383                 }
4384         }
4385
4386         return 0;
4387 }
4388
4389 /**
4390  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4391  *
4392  * @adev: amdgpu_device pointer
4393  *
4394  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4395  * reset is necessary to recover.
4396  * Returns true if a full asic reset is required, false if not.
4397  */
4398 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4399 {
4400         int i;
4401
4402         if (amdgpu_asic_need_full_reset(adev))
4403                 return true;
4404
4405         for (i = 0; i < adev->num_ip_blocks; i++) {
4406                 if (!adev->ip_blocks[i].status.valid)
4407                         continue;
4408                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4409                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4410                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4411                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4412                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4413                         if (adev->ip_blocks[i].status.hang) {
4414                                 dev_info(adev->dev, "Some block need full reset!\n");
4415                                 return true;
4416                         }
4417                 }
4418         }
4419         return false;
4420 }
4421
4422 /**
4423  * amdgpu_device_ip_soft_reset - do a soft reset
4424  *
4425  * @adev: amdgpu_device pointer
4426  *
4427  * The list of all the hardware IPs that make up the asic is walked and the
4428  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4429  * IP specific hardware or software state changes that are necessary to soft
4430  * reset the IP.
4431  * Returns 0 on success, negative error code on failure.
4432  */
4433 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4434 {
4435         int i, r = 0;
4436
4437         for (i = 0; i < adev->num_ip_blocks; i++) {
4438                 if (!adev->ip_blocks[i].status.valid)
4439                         continue;
4440                 if (adev->ip_blocks[i].status.hang &&
4441                     adev->ip_blocks[i].version->funcs->soft_reset) {
4442                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4443                         if (r)
4444                                 return r;
4445                 }
4446         }
4447
4448         return 0;
4449 }
4450
4451 /**
4452  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4453  *
4454  * @adev: amdgpu_device pointer
4455  *
4456  * The list of all the hardware IPs that make up the asic is walked and the
4457  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4458  * handles any IP specific hardware or software state changes that are
4459  * necessary after the IP has been soft reset.
4460  * Returns 0 on success, negative error code on failure.
4461  */
4462 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4463 {
4464         int i, r = 0;
4465
4466         for (i = 0; i < adev->num_ip_blocks; i++) {
4467                 if (!adev->ip_blocks[i].status.valid)
4468                         continue;
4469                 if (adev->ip_blocks[i].status.hang &&
4470                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4471                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4472                 if (r)
4473                         return r;
4474         }
4475
4476         return 0;
4477 }
4478
4479 /**
4480  * amdgpu_device_recover_vram - Recover some VRAM contents
4481  *
4482  * @adev: amdgpu_device pointer
4483  *
4484  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4485  * restore things like GPUVM page tables after a GPU reset where
4486  * the contents of VRAM might be lost.
4487  *
4488  * Returns:
4489  * 0 on success, negative error code on failure.
4490  */
4491 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4492 {
4493         struct dma_fence *fence = NULL, *next = NULL;
4494         struct amdgpu_bo *shadow;
4495         struct amdgpu_bo_vm *vmbo;
4496         long r = 1, tmo;
4497
4498         if (amdgpu_sriov_runtime(adev))
4499                 tmo = msecs_to_jiffies(8000);
4500         else
4501                 tmo = msecs_to_jiffies(100);
4502
4503         dev_info(adev->dev, "recover vram bo from shadow start\n");
4504         mutex_lock(&adev->shadow_list_lock);
4505         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4506                 /* If vm is compute context or adev is APU, shadow will be NULL */
4507                 if (!vmbo->shadow)
4508                         continue;
4509                 shadow = vmbo->shadow;
4510
4511                 /* No need to recover an evicted BO */
4512                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4513                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4514                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4515                         continue;
4516
4517                 r = amdgpu_bo_restore_shadow(shadow, &next);
4518                 if (r)
4519                         break;
4520
4521                 if (fence) {
4522                         tmo = dma_fence_wait_timeout(fence, false, tmo);
4523                         dma_fence_put(fence);
4524                         fence = next;
4525                         if (tmo == 0) {
4526                                 r = -ETIMEDOUT;
4527                                 break;
4528                         } else if (tmo < 0) {
4529                                 r = tmo;
4530                                 break;
4531                         }
4532                 } else {
4533                         fence = next;
4534                 }
4535         }
4536         mutex_unlock(&adev->shadow_list_lock);
4537
4538         if (fence)
4539                 tmo = dma_fence_wait_timeout(fence, false, tmo);
4540         dma_fence_put(fence);
4541
4542         if (r < 0 || tmo <= 0) {
4543                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4544                 return -EIO;
4545         }
4546
4547         dev_info(adev->dev, "recover vram bo from shadow done\n");
4548         return 0;
4549 }
4550
4551
4552 /**
4553  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4554  *
4555  * @adev: amdgpu_device pointer
4556  * @from_hypervisor: request from hypervisor
4557  *
4558  * do VF FLR and reinitialize Asic
4559  * return 0 means succeeded otherwise failed
4560  */
4561 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4562                                      bool from_hypervisor)
4563 {
4564         int r;
4565         struct amdgpu_hive_info *hive = NULL;
4566         int retry_limit = 0;
4567
4568 retry:
4569         amdgpu_amdkfd_pre_reset(adev);
4570
4571         if (from_hypervisor)
4572                 r = amdgpu_virt_request_full_gpu(adev, true);
4573         else
4574                 r = amdgpu_virt_reset_gpu(adev);
4575         if (r)
4576                 return r;
4577
4578         /* Resume IP prior to SMC */
4579         r = amdgpu_device_ip_reinit_early_sriov(adev);
4580         if (r)
4581                 goto error;
4582
4583         amdgpu_virt_init_data_exchange(adev);
4584
4585         r = amdgpu_device_fw_loading(adev);
4586         if (r)
4587                 return r;
4588
4589         /* now we are okay to resume SMC/CP/SDMA */
4590         r = amdgpu_device_ip_reinit_late_sriov(adev);
4591         if (r)
4592                 goto error;
4593
4594         hive = amdgpu_get_xgmi_hive(adev);
4595         /* Update PSP FW topology after reset */
4596         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4597                 r = amdgpu_xgmi_update_topology(hive, adev);
4598
4599         if (hive)
4600                 amdgpu_put_xgmi_hive(hive);
4601
4602         if (!r) {
4603                 amdgpu_irq_gpu_reset_resume_helper(adev);
4604                 r = amdgpu_ib_ring_tests(adev);
4605
4606                 amdgpu_amdkfd_post_reset(adev);
4607         }
4608
4609 error:
4610         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4611                 amdgpu_inc_vram_lost(adev);
4612                 r = amdgpu_device_recover_vram(adev);
4613         }
4614         amdgpu_virt_release_full_gpu(adev, true);
4615
4616         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4617                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4618                         retry_limit++;
4619                         goto retry;
4620                 } else
4621                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4622         }
4623
4624         return r;
4625 }
4626
4627 /**
4628  * amdgpu_device_has_job_running - check if there is any job in mirror list
4629  *
4630  * @adev: amdgpu_device pointer
4631  *
4632  * check if there is any job in mirror list
4633  */
4634 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4635 {
4636         int i;
4637         struct drm_sched_job *job;
4638
4639         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4640                 struct amdgpu_ring *ring = adev->rings[i];
4641
4642                 if (!ring || !ring->sched.thread)
4643                         continue;
4644
4645                 spin_lock(&ring->sched.job_list_lock);
4646                 job = list_first_entry_or_null(&ring->sched.pending_list,
4647                                                struct drm_sched_job, list);
4648                 spin_unlock(&ring->sched.job_list_lock);
4649                 if (job)
4650                         return true;
4651         }
4652         return false;
4653 }
4654
4655 /**
4656  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4657  *
4658  * @adev: amdgpu_device pointer
4659  *
4660  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4661  * a hung GPU.
4662  */
4663 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4664 {
4665
4666         if (amdgpu_gpu_recovery == 0)
4667                 goto disabled;
4668
4669         /* Skip soft reset check in fatal error mode */
4670         if (!amdgpu_ras_is_poison_mode_supported(adev))
4671                 return true;
4672
4673         if (!amdgpu_device_ip_check_soft_reset(adev)) {
4674                 dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
4675                 return false;
4676         }
4677
4678         if (amdgpu_sriov_vf(adev))
4679                 return true;
4680
4681         if (amdgpu_gpu_recovery == -1) {
4682                 switch (adev->asic_type) {
4683 #ifdef CONFIG_DRM_AMDGPU_SI
4684                 case CHIP_VERDE:
4685                 case CHIP_TAHITI:
4686                 case CHIP_PITCAIRN:
4687                 case CHIP_OLAND:
4688                 case CHIP_HAINAN:
4689 #endif
4690 #ifdef CONFIG_DRM_AMDGPU_CIK
4691                 case CHIP_KAVERI:
4692                 case CHIP_KABINI:
4693                 case CHIP_MULLINS:
4694 #endif
4695                 case CHIP_CARRIZO:
4696                 case CHIP_STONEY:
4697                 case CHIP_CYAN_SKILLFISH:
4698                         goto disabled;
4699                 default:
4700                         break;
4701                 }
4702         }
4703
4704         return true;
4705
4706 disabled:
4707                 dev_info(adev->dev, "GPU recovery disabled.\n");
4708                 return false;
4709 }
4710
4711 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4712 {
4713         u32 i;
4714         int ret = 0;
4715
4716         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4717
4718         dev_info(adev->dev, "GPU mode1 reset\n");
4719
4720         /* disable BM */
4721         pci_clear_master(adev->pdev);
4722
4723         amdgpu_device_cache_pci_state(adev->pdev);
4724
4725         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4726                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4727                 ret = amdgpu_dpm_mode1_reset(adev);
4728         } else {
4729                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4730                 ret = psp_gpu_reset(adev);
4731         }
4732
4733         if (ret)
4734                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4735
4736         amdgpu_device_load_pci_state(adev->pdev);
4737
4738         /* wait for asic to come out of reset */
4739         for (i = 0; i < adev->usec_timeout; i++) {
4740                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4741
4742                 if (memsize != 0xffffffff)
4743                         break;
4744                 udelay(1);
4745         }
4746
4747         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4748         return ret;
4749 }
4750
4751 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4752                                  struct amdgpu_reset_context *reset_context)
4753 {
4754         int i, r = 0;
4755         struct amdgpu_job *job = NULL;
4756         bool need_full_reset =
4757                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4758
4759         if (reset_context->reset_req_dev == adev)
4760                 job = reset_context->job;
4761
4762         if (amdgpu_sriov_vf(adev)) {
4763                 /* stop the data exchange thread */
4764                 amdgpu_virt_fini_data_exchange(adev);
4765         }
4766
4767         amdgpu_fence_driver_isr_toggle(adev, true);
4768
4769         /* block all schedulers and reset given job's ring */
4770         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4771                 struct amdgpu_ring *ring = adev->rings[i];
4772
4773                 if (!ring || !ring->sched.thread)
4774                         continue;
4775
4776                 /*clear job fence from fence drv to avoid force_completion
4777                  *leave NULL and vm flush fence in fence drv */
4778                 amdgpu_fence_driver_clear_job_fences(ring);
4779
4780                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4781                 amdgpu_fence_driver_force_completion(ring);
4782         }
4783
4784         amdgpu_fence_driver_isr_toggle(adev, false);
4785
4786         if (job && job->vm)
4787                 drm_sched_increase_karma(&job->base);
4788
4789         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4790         /* If reset handler not implemented, continue; otherwise return */
4791         if (r == -ENOSYS)
4792                 r = 0;
4793         else
4794                 return r;
4795
4796         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4797         if (!amdgpu_sriov_vf(adev)) {
4798
4799                 if (!need_full_reset)
4800                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4801
4802                 if (!need_full_reset && amdgpu_gpu_recovery) {
4803                         amdgpu_device_ip_pre_soft_reset(adev);
4804                         r = amdgpu_device_ip_soft_reset(adev);
4805                         amdgpu_device_ip_post_soft_reset(adev);
4806                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4807                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4808                                 need_full_reset = true;
4809                         }
4810                 }
4811
4812                 if (need_full_reset)
4813                         r = amdgpu_device_ip_suspend(adev);
4814                 if (need_full_reset)
4815                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4816                 else
4817                         clear_bit(AMDGPU_NEED_FULL_RESET,
4818                                   &reset_context->flags);
4819         }
4820
4821         return r;
4822 }
4823
4824 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4825 {
4826         int i;
4827
4828         lockdep_assert_held(&adev->reset_domain->sem);
4829
4830         for (i = 0; i < adev->num_regs; i++) {
4831                 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4832                 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4833                                              adev->reset_dump_reg_value[i]);
4834         }
4835
4836         return 0;
4837 }
4838
4839 #ifdef CONFIG_DEV_COREDUMP
4840 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4841                 size_t count, void *data, size_t datalen)
4842 {
4843         struct drm_printer p;
4844         struct amdgpu_device *adev = data;
4845         struct drm_print_iterator iter;
4846         int i;
4847
4848         iter.data = buffer;
4849         iter.offset = 0;
4850         iter.start = offset;
4851         iter.remain = count;
4852
4853         p = drm_coredump_printer(&iter);
4854
4855         drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4856         drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4857         drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4858         drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4859         if (adev->reset_task_info.pid)
4860                 drm_printf(&p, "process_name: %s PID: %d\n",
4861                            adev->reset_task_info.process_name,
4862                            adev->reset_task_info.pid);
4863
4864         if (adev->reset_vram_lost)
4865                 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4866         if (adev->num_regs) {
4867                 drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4868
4869                 for (i = 0; i < adev->num_regs; i++)
4870                         drm_printf(&p, "0x%08x: 0x%08x\n",
4871                                    adev->reset_dump_reg_list[i],
4872                                    adev->reset_dump_reg_value[i]);
4873         }
4874
4875         return count - iter.remain;
4876 }
4877
4878 static void amdgpu_devcoredump_free(void *data)
4879 {
4880 }
4881
4882 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4883 {
4884         struct drm_device *dev = adev_to_drm(adev);
4885
4886         ktime_get_ts64(&adev->reset_time);
4887         dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4888                       amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4889 }
4890 #endif
4891
4892 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4893                          struct amdgpu_reset_context *reset_context)
4894 {
4895         struct amdgpu_device *tmp_adev = NULL;
4896         bool need_full_reset, skip_hw_reset, vram_lost = false;
4897         int r = 0;
4898         bool gpu_reset_for_dev_remove = 0;
4899
4900         /* Try reset handler method first */
4901         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4902                                     reset_list);
4903         amdgpu_reset_reg_dumps(tmp_adev);
4904
4905         reset_context->reset_device_list = device_list_handle;
4906         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4907         /* If reset handler not implemented, continue; otherwise return */
4908         if (r == -ENOSYS)
4909                 r = 0;
4910         else
4911                 return r;
4912
4913         /* Reset handler not implemented, use the default method */
4914         need_full_reset =
4915                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4916         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4917
4918         gpu_reset_for_dev_remove =
4919                 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4920                         test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4921
4922         /*
4923          * ASIC reset has to be done on all XGMI hive nodes ASAP
4924          * to allow proper links negotiation in FW (within 1 sec)
4925          */
4926         if (!skip_hw_reset && need_full_reset) {
4927                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4928                         /* For XGMI run all resets in parallel to speed up the process */
4929                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4930                                 tmp_adev->gmc.xgmi.pending_reset = false;
4931                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4932                                         r = -EALREADY;
4933                         } else
4934                                 r = amdgpu_asic_reset(tmp_adev);
4935
4936                         if (r) {
4937                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4938                                          r, adev_to_drm(tmp_adev)->unique);
4939                                 break;
4940                         }
4941                 }
4942
4943                 /* For XGMI wait for all resets to complete before proceed */
4944                 if (!r) {
4945                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4946                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4947                                         flush_work(&tmp_adev->xgmi_reset_work);
4948                                         r = tmp_adev->asic_reset_res;
4949                                         if (r)
4950                                                 break;
4951                                 }
4952                         }
4953                 }
4954         }
4955
4956         if (!r && amdgpu_ras_intr_triggered()) {
4957                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4958                         if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4959                             tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4960                                 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4961                 }
4962
4963                 amdgpu_ras_intr_cleared();
4964         }
4965
4966         /* Since the mode1 reset affects base ip blocks, the
4967          * phase1 ip blocks need to be resumed. Otherwise there
4968          * will be a BIOS signature error and the psp bootloader
4969          * can't load kdb on the next amdgpu install.
4970          */
4971         if (gpu_reset_for_dev_remove) {
4972                 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4973                         amdgpu_device_ip_resume_phase1(tmp_adev);
4974
4975                 goto end;
4976         }
4977
4978         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4979                 if (need_full_reset) {
4980                         /* post card */
4981                         r = amdgpu_device_asic_init(tmp_adev);
4982                         if (r) {
4983                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
4984                         } else {
4985                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4986                                 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4987                                 if (r)
4988                                         goto out;
4989
4990                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4991                                 if (r)
4992                                         goto out;
4993
4994                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4995 #ifdef CONFIG_DEV_COREDUMP
4996                                 tmp_adev->reset_vram_lost = vram_lost;
4997                                 memset(&tmp_adev->reset_task_info, 0,
4998                                                 sizeof(tmp_adev->reset_task_info));
4999                                 if (reset_context->job && reset_context->job->vm)
5000                                         tmp_adev->reset_task_info =
5001                                                 reset_context->job->vm->task_info;
5002                                 amdgpu_reset_capture_coredumpm(tmp_adev);
5003 #endif
5004                                 if (vram_lost) {
5005                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
5006                                         amdgpu_inc_vram_lost(tmp_adev);
5007                                 }
5008
5009                                 r = amdgpu_device_fw_loading(tmp_adev);
5010                                 if (r)
5011                                         return r;
5012
5013                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5014                                 if (r)
5015                                         goto out;
5016
5017                                 if (vram_lost)
5018                                         amdgpu_device_fill_reset_magic(tmp_adev);
5019
5020                                 /*
5021                                  * Add this ASIC as tracked as reset was already
5022                                  * complete successfully.
5023                                  */
5024                                 amdgpu_register_gpu_instance(tmp_adev);
5025
5026                                 if (!reset_context->hive &&
5027                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5028                                         amdgpu_xgmi_add_device(tmp_adev);
5029
5030                                 r = amdgpu_device_ip_late_init(tmp_adev);
5031                                 if (r)
5032                                         goto out;
5033
5034                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5035
5036                                 /*
5037                                  * The GPU enters bad state once faulty pages
5038                                  * by ECC has reached the threshold, and ras
5039                                  * recovery is scheduled next. So add one check
5040                                  * here to break recovery if it indeed exceeds
5041                                  * bad page threshold, and remind user to
5042                                  * retire this GPU or setting one bigger
5043                                  * bad_page_threshold value to fix this once
5044                                  * probing driver again.
5045                                  */
5046                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5047                                         /* must succeed. */
5048                                         amdgpu_ras_resume(tmp_adev);
5049                                 } else {
5050                                         r = -EINVAL;
5051                                         goto out;
5052                                 }
5053
5054                                 /* Update PSP FW topology after reset */
5055                                 if (reset_context->hive &&
5056                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5057                                         r = amdgpu_xgmi_update_topology(
5058                                                 reset_context->hive, tmp_adev);
5059                         }
5060                 }
5061
5062 out:
5063                 if (!r) {
5064                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5065                         r = amdgpu_ib_ring_tests(tmp_adev);
5066                         if (r) {
5067                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5068                                 need_full_reset = true;
5069                                 r = -EAGAIN;
5070                                 goto end;
5071                         }
5072                 }
5073
5074                 if (!r)
5075                         r = amdgpu_device_recover_vram(tmp_adev);
5076                 else
5077                         tmp_adev->asic_reset_res = r;
5078         }
5079
5080 end:
5081         if (need_full_reset)
5082                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5083         else
5084                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5085         return r;
5086 }
5087
5088 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5089 {
5090
5091         switch (amdgpu_asic_reset_method(adev)) {
5092         case AMD_RESET_METHOD_MODE1:
5093                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5094                 break;
5095         case AMD_RESET_METHOD_MODE2:
5096                 adev->mp1_state = PP_MP1_STATE_RESET;
5097                 break;
5098         default:
5099                 adev->mp1_state = PP_MP1_STATE_NONE;
5100                 break;
5101         }
5102 }
5103
5104 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5105 {
5106         amdgpu_vf_error_trans_all(adev);
5107         adev->mp1_state = PP_MP1_STATE_NONE;
5108 }
5109
5110 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5111 {
5112         struct pci_dev *p = NULL;
5113
5114         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5115                         adev->pdev->bus->number, 1);
5116         if (p) {
5117                 pm_runtime_enable(&(p->dev));
5118                 pm_runtime_resume(&(p->dev));
5119         }
5120
5121         pci_dev_put(p);
5122 }
5123
5124 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5125 {
5126         enum amd_reset_method reset_method;
5127         struct pci_dev *p = NULL;
5128         u64 expires;
5129
5130         /*
5131          * For now, only BACO and mode1 reset are confirmed
5132          * to suffer the audio issue without proper suspended.
5133          */
5134         reset_method = amdgpu_asic_reset_method(adev);
5135         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5136              (reset_method != AMD_RESET_METHOD_MODE1))
5137                 return -EINVAL;
5138
5139         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5140                         adev->pdev->bus->number, 1);
5141         if (!p)
5142                 return -ENODEV;
5143
5144         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5145         if (!expires)
5146                 /*
5147                  * If we cannot get the audio device autosuspend delay,
5148                  * a fixed 4S interval will be used. Considering 3S is
5149                  * the audio controller default autosuspend delay setting.
5150                  * 4S used here is guaranteed to cover that.
5151                  */
5152                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5153
5154         while (!pm_runtime_status_suspended(&(p->dev))) {
5155                 if (!pm_runtime_suspend(&(p->dev)))
5156                         break;
5157
5158                 if (expires < ktime_get_mono_fast_ns()) {
5159                         dev_warn(adev->dev, "failed to suspend display audio\n");
5160                         pci_dev_put(p);
5161                         /* TODO: abort the succeeding gpu reset? */
5162                         return -ETIMEDOUT;
5163                 }
5164         }
5165
5166         pm_runtime_disable(&(p->dev));
5167
5168         pci_dev_put(p);
5169         return 0;
5170 }
5171
5172 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5173 {
5174         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5175
5176 #if defined(CONFIG_DEBUG_FS)
5177         if (!amdgpu_sriov_vf(adev))
5178                 cancel_work(&adev->reset_work);
5179 #endif
5180
5181         if (adev->kfd.dev)
5182                 cancel_work(&adev->kfd.reset_work);
5183
5184         if (amdgpu_sriov_vf(adev))
5185                 cancel_work(&adev->virt.flr_work);
5186
5187         if (con && adev->ras_enabled)
5188                 cancel_work(&con->recovery_work);
5189
5190 }
5191
5192 /**
5193  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5194  *
5195  * @adev: amdgpu_device pointer
5196  * @job: which job trigger hang
5197  *
5198  * Attempt to reset the GPU if it has hung (all asics).
5199  * Attempt to do soft-reset or full-reset and reinitialize Asic
5200  * Returns 0 for success or an error on failure.
5201  */
5202
5203 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5204                               struct amdgpu_job *job,
5205                               struct amdgpu_reset_context *reset_context)
5206 {
5207         struct list_head device_list, *device_list_handle =  NULL;
5208         bool job_signaled = false;
5209         struct amdgpu_hive_info *hive = NULL;
5210         struct amdgpu_device *tmp_adev = NULL;
5211         int i, r = 0;
5212         bool need_emergency_restart = false;
5213         bool audio_suspended = false;
5214         bool gpu_reset_for_dev_remove = false;
5215
5216         gpu_reset_for_dev_remove =
5217                         test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5218                                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5219
5220         /*
5221          * Special case: RAS triggered and full reset isn't supported
5222          */
5223         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5224
5225         /*
5226          * Flush RAM to disk so that after reboot
5227          * the user can read log and see why the system rebooted.
5228          */
5229         if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5230                 DRM_WARN("Emergency reboot.");
5231
5232                 ksys_sync_helper();
5233                 emergency_restart();
5234         }
5235
5236         dev_info(adev->dev, "GPU %s begin!\n",
5237                 need_emergency_restart ? "jobs stop":"reset");
5238
5239         if (!amdgpu_sriov_vf(adev))
5240                 hive = amdgpu_get_xgmi_hive(adev);
5241         if (hive)
5242                 mutex_lock(&hive->hive_lock);
5243
5244         reset_context->job = job;
5245         reset_context->hive = hive;
5246         /*
5247          * Build list of devices to reset.
5248          * In case we are in XGMI hive mode, resort the device list
5249          * to put adev in the 1st position.
5250          */
5251         INIT_LIST_HEAD(&device_list);
5252         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5253                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5254                         list_add_tail(&tmp_adev->reset_list, &device_list);
5255                         if (gpu_reset_for_dev_remove && adev->shutdown)
5256                                 tmp_adev->shutdown = true;
5257                 }
5258                 if (!list_is_first(&adev->reset_list, &device_list))
5259                         list_rotate_to_front(&adev->reset_list, &device_list);
5260                 device_list_handle = &device_list;
5261         } else {
5262                 list_add_tail(&adev->reset_list, &device_list);
5263                 device_list_handle = &device_list;
5264         }
5265
5266         /* We need to lock reset domain only once both for XGMI and single device */
5267         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5268                                     reset_list);
5269         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5270
5271         /* block all schedulers and reset given job's ring */
5272         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5273
5274                 amdgpu_device_set_mp1_state(tmp_adev);
5275
5276                 /*
5277                  * Try to put the audio codec into suspend state
5278                  * before gpu reset started.
5279                  *
5280                  * Due to the power domain of the graphics device
5281                  * is shared with AZ power domain. Without this,
5282                  * we may change the audio hardware from behind
5283                  * the audio driver's back. That will trigger
5284                  * some audio codec errors.
5285                  */
5286                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5287                         audio_suspended = true;
5288
5289                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5290
5291                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5292
5293                 if (!amdgpu_sriov_vf(tmp_adev))
5294                         amdgpu_amdkfd_pre_reset(tmp_adev);
5295
5296                 /*
5297                  * Mark these ASICs to be reseted as untracked first
5298                  * And add them back after reset completed
5299                  */
5300                 amdgpu_unregister_gpu_instance(tmp_adev);
5301
5302                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5303
5304                 /* disable ras on ALL IPs */
5305                 if (!need_emergency_restart &&
5306                       amdgpu_device_ip_need_full_reset(tmp_adev))
5307                         amdgpu_ras_suspend(tmp_adev);
5308
5309                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5310                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5311
5312                         if (!ring || !ring->sched.thread)
5313                                 continue;
5314
5315                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5316
5317                         if (need_emergency_restart)
5318                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5319                 }
5320                 atomic_inc(&tmp_adev->gpu_reset_counter);
5321         }
5322
5323         if (need_emergency_restart)
5324                 goto skip_sched_resume;
5325
5326         /*
5327          * Must check guilty signal here since after this point all old
5328          * HW fences are force signaled.
5329          *
5330          * job->base holds a reference to parent fence
5331          */
5332         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5333                 job_signaled = true;
5334                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5335                 goto skip_hw_reset;
5336         }
5337
5338 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5339         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5340                 if (gpu_reset_for_dev_remove) {
5341                         /* Workaroud for ASICs need to disable SMC first */
5342                         amdgpu_device_smu_fini_early(tmp_adev);
5343                 }
5344                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5345                 /*TODO Should we stop ?*/
5346                 if (r) {
5347                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5348                                   r, adev_to_drm(tmp_adev)->unique);
5349                         tmp_adev->asic_reset_res = r;
5350                 }
5351
5352                 /*
5353                  * Drop all pending non scheduler resets. Scheduler resets
5354                  * were already dropped during drm_sched_stop
5355                  */
5356                 amdgpu_device_stop_pending_resets(tmp_adev);
5357         }
5358
5359         /* Actual ASIC resets if needed.*/
5360         /* Host driver will handle XGMI hive reset for SRIOV */
5361         if (amdgpu_sriov_vf(adev)) {
5362                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5363                 if (r)
5364                         adev->asic_reset_res = r;
5365
5366                 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5367                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5368                         amdgpu_ras_resume(adev);
5369         } else {
5370                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5371                 if (r && r == -EAGAIN)
5372                         goto retry;
5373
5374                 if (!r && gpu_reset_for_dev_remove)
5375                         goto recover_end;
5376         }
5377
5378 skip_hw_reset:
5379
5380         /* Post ASIC reset for all devs .*/
5381         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5382
5383                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5384                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5385
5386                         if (!ring || !ring->sched.thread)
5387                                 continue;
5388
5389                         drm_sched_start(&ring->sched, true);
5390                 }
5391
5392                 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5393                         amdgpu_mes_self_test(tmp_adev);
5394
5395                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5396                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5397                 }
5398
5399                 if (tmp_adev->asic_reset_res)
5400                         r = tmp_adev->asic_reset_res;
5401
5402                 tmp_adev->asic_reset_res = 0;
5403
5404                 if (r) {
5405                         /* bad news, how to tell it to userspace ? */
5406                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5407                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5408                 } else {
5409                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5410                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5411                                 DRM_WARN("smart shift update failed\n");
5412                 }
5413         }
5414
5415 skip_sched_resume:
5416         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5417                 /* unlock kfd: SRIOV would do it separately */
5418                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5419                         amdgpu_amdkfd_post_reset(tmp_adev);
5420
5421                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5422                  * need to bring up kfd here if it's not be initialized before
5423                  */
5424                 if (!adev->kfd.init_complete)
5425                         amdgpu_amdkfd_device_init(adev);
5426
5427                 if (audio_suspended)
5428                         amdgpu_device_resume_display_audio(tmp_adev);
5429
5430                 amdgpu_device_unset_mp1_state(tmp_adev);
5431
5432                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5433         }
5434
5435 recover_end:
5436         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5437                                             reset_list);
5438         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5439
5440         if (hive) {
5441                 mutex_unlock(&hive->hive_lock);
5442                 amdgpu_put_xgmi_hive(hive);
5443         }
5444
5445         if (r)
5446                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5447
5448         atomic_set(&adev->reset_domain->reset_res, r);
5449         return r;
5450 }
5451
5452 /**
5453  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5454  *
5455  * @adev: amdgpu_device pointer
5456  *
5457  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5458  * and lanes) of the slot the device is in. Handles APUs and
5459  * virtualized environments where PCIE config space may not be available.
5460  */
5461 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5462 {
5463         struct pci_dev *pdev;
5464         enum pci_bus_speed speed_cap, platform_speed_cap;
5465         enum pcie_link_width platform_link_width;
5466
5467         if (amdgpu_pcie_gen_cap)
5468                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5469
5470         if (amdgpu_pcie_lane_cap)
5471                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5472
5473         /* covers APUs as well */
5474         if (pci_is_root_bus(adev->pdev->bus)) {
5475                 if (adev->pm.pcie_gen_mask == 0)
5476                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5477                 if (adev->pm.pcie_mlw_mask == 0)
5478                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5479                 return;
5480         }
5481
5482         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5483                 return;
5484
5485         pcie_bandwidth_available(adev->pdev, NULL,
5486                                  &platform_speed_cap, &platform_link_width);
5487
5488         if (adev->pm.pcie_gen_mask == 0) {
5489                 /* asic caps */
5490                 pdev = adev->pdev;
5491                 speed_cap = pcie_get_speed_cap(pdev);
5492                 if (speed_cap == PCI_SPEED_UNKNOWN) {
5493                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5494                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5495                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5496                 } else {
5497                         if (speed_cap == PCIE_SPEED_32_0GT)
5498                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5499                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5500                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5501                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5502                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5503                         else if (speed_cap == PCIE_SPEED_16_0GT)
5504                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5505                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5506                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5507                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5508                         else if (speed_cap == PCIE_SPEED_8_0GT)
5509                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5510                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5511                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5512                         else if (speed_cap == PCIE_SPEED_5_0GT)
5513                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5514                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5515                         else
5516                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5517                 }
5518                 /* platform caps */
5519                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5520                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5521                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5522                 } else {
5523                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
5524                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5525                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5526                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5527                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5528                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5529                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5530                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5531                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5532                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5533                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5534                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5535                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5536                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5537                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5538                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5539                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5540                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5541                         else
5542                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5543
5544                 }
5545         }
5546         if (adev->pm.pcie_mlw_mask == 0) {
5547                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5548                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5549                 } else {
5550                         switch (platform_link_width) {
5551                         case PCIE_LNK_X32:
5552                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5553                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5554                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5555                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5556                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5557                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5558                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5559                                 break;
5560                         case PCIE_LNK_X16:
5561                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5562                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5563                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5564                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5565                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5566                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5567                                 break;
5568                         case PCIE_LNK_X12:
5569                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5570                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5571                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5572                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5573                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5574                                 break;
5575                         case PCIE_LNK_X8:
5576                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5577                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5578                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5579                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5580                                 break;
5581                         case PCIE_LNK_X4:
5582                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5583                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5584                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5585                                 break;
5586                         case PCIE_LNK_X2:
5587                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5588                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5589                                 break;
5590                         case PCIE_LNK_X1:
5591                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5592                                 break;
5593                         default:
5594                                 break;
5595                         }
5596                 }
5597         }
5598 }
5599
5600 /**
5601  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5602  *
5603  * @adev: amdgpu_device pointer
5604  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5605  *
5606  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5607  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5608  * @peer_adev.
5609  */
5610 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5611                                       struct amdgpu_device *peer_adev)
5612 {
5613 #ifdef CONFIG_HSA_AMD_P2P
5614         uint64_t address_mask = peer_adev->dev->dma_mask ?
5615                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5616         resource_size_t aper_limit =
5617                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5618         bool p2p_access =
5619                 !adev->gmc.xgmi.connected_to_cpu &&
5620                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5621
5622         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5623                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5624                 !(adev->gmc.aper_base & address_mask ||
5625                   aper_limit & address_mask));
5626 #else
5627         return false;
5628 #endif
5629 }
5630
5631 int amdgpu_device_baco_enter(struct drm_device *dev)
5632 {
5633         struct amdgpu_device *adev = drm_to_adev(dev);
5634         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5635
5636         if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5637                 return -ENOTSUPP;
5638
5639         if (ras && adev->ras_enabled &&
5640             adev->nbio.funcs->enable_doorbell_interrupt)
5641                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5642
5643         return amdgpu_dpm_baco_enter(adev);
5644 }
5645
5646 int amdgpu_device_baco_exit(struct drm_device *dev)
5647 {
5648         struct amdgpu_device *adev = drm_to_adev(dev);
5649         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5650         int ret = 0;
5651
5652         if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5653                 return -ENOTSUPP;
5654
5655         ret = amdgpu_dpm_baco_exit(adev);
5656         if (ret)
5657                 return ret;
5658
5659         if (ras && adev->ras_enabled &&
5660             adev->nbio.funcs->enable_doorbell_interrupt)
5661                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5662
5663         if (amdgpu_passthrough(adev) &&
5664             adev->nbio.funcs->clear_doorbell_interrupt)
5665                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5666
5667         return 0;
5668 }
5669
5670 /**
5671  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5672  * @pdev: PCI device struct
5673  * @state: PCI channel state
5674  *
5675  * Description: Called when a PCI error is detected.
5676  *
5677  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5678  */
5679 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5680 {
5681         struct drm_device *dev = pci_get_drvdata(pdev);
5682         struct amdgpu_device *adev = drm_to_adev(dev);
5683         int i;
5684
5685         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5686
5687         if (adev->gmc.xgmi.num_physical_nodes > 1) {
5688                 DRM_WARN("No support for XGMI hive yet...");
5689                 return PCI_ERS_RESULT_DISCONNECT;
5690         }
5691
5692         adev->pci_channel_state = state;
5693
5694         switch (state) {
5695         case pci_channel_io_normal:
5696                 return PCI_ERS_RESULT_CAN_RECOVER;
5697         /* Fatal error, prepare for slot reset */
5698         case pci_channel_io_frozen:
5699                 /*
5700                  * Locking adev->reset_domain->sem will prevent any external access
5701                  * to GPU during PCI error recovery
5702                  */
5703                 amdgpu_device_lock_reset_domain(adev->reset_domain);
5704                 amdgpu_device_set_mp1_state(adev);
5705
5706                 /*
5707                  * Block any work scheduling as we do for regular GPU reset
5708                  * for the duration of the recovery
5709                  */
5710                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5711                         struct amdgpu_ring *ring = adev->rings[i];
5712
5713                         if (!ring || !ring->sched.thread)
5714                                 continue;
5715
5716                         drm_sched_stop(&ring->sched, NULL);
5717                 }
5718                 atomic_inc(&adev->gpu_reset_counter);
5719                 return PCI_ERS_RESULT_NEED_RESET;
5720         case pci_channel_io_perm_failure:
5721                 /* Permanent error, prepare for device removal */
5722                 return PCI_ERS_RESULT_DISCONNECT;
5723         }
5724
5725         return PCI_ERS_RESULT_NEED_RESET;
5726 }
5727
5728 /**
5729  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5730  * @pdev: pointer to PCI device
5731  */
5732 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5733 {
5734
5735         DRM_INFO("PCI error: mmio enabled callback!!\n");
5736
5737         /* TODO - dump whatever for debugging purposes */
5738
5739         /* This called only if amdgpu_pci_error_detected returns
5740          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5741          * works, no need to reset slot.
5742          */
5743
5744         return PCI_ERS_RESULT_RECOVERED;
5745 }
5746
5747 /**
5748  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5749  * @pdev: PCI device struct
5750  *
5751  * Description: This routine is called by the pci error recovery
5752  * code after the PCI slot has been reset, just before we
5753  * should resume normal operations.
5754  */
5755 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5756 {
5757         struct drm_device *dev = pci_get_drvdata(pdev);
5758         struct amdgpu_device *adev = drm_to_adev(dev);
5759         int r, i;
5760         struct amdgpu_reset_context reset_context;
5761         u32 memsize;
5762         struct list_head device_list;
5763
5764         DRM_INFO("PCI error: slot reset callback!!\n");
5765
5766         memset(&reset_context, 0, sizeof(reset_context));
5767
5768         INIT_LIST_HEAD(&device_list);
5769         list_add_tail(&adev->reset_list, &device_list);
5770
5771         /* wait for asic to come out of reset */
5772         msleep(500);
5773
5774         /* Restore PCI confspace */
5775         amdgpu_device_load_pci_state(pdev);
5776
5777         /* confirm  ASIC came out of reset */
5778         for (i = 0; i < adev->usec_timeout; i++) {
5779                 memsize = amdgpu_asic_get_config_memsize(adev);
5780
5781                 if (memsize != 0xffffffff)
5782                         break;
5783                 udelay(1);
5784         }
5785         if (memsize == 0xffffffff) {
5786                 r = -ETIME;
5787                 goto out;
5788         }
5789
5790         reset_context.method = AMD_RESET_METHOD_NONE;
5791         reset_context.reset_req_dev = adev;
5792         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5793         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5794
5795         adev->no_hw_access = true;
5796         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5797         adev->no_hw_access = false;
5798         if (r)
5799                 goto out;
5800
5801         r = amdgpu_do_asic_reset(&device_list, &reset_context);
5802
5803 out:
5804         if (!r) {
5805                 if (amdgpu_device_cache_pci_state(adev->pdev))
5806                         pci_restore_state(adev->pdev);
5807
5808                 DRM_INFO("PCIe error recovery succeeded\n");
5809         } else {
5810                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5811                 amdgpu_device_unset_mp1_state(adev);
5812                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5813         }
5814
5815         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5816 }
5817
5818 /**
5819  * amdgpu_pci_resume() - resume normal ops after PCI reset
5820  * @pdev: pointer to PCI device
5821  *
5822  * Called when the error recovery driver tells us that its
5823  * OK to resume normal operation.
5824  */
5825 void amdgpu_pci_resume(struct pci_dev *pdev)
5826 {
5827         struct drm_device *dev = pci_get_drvdata(pdev);
5828         struct amdgpu_device *adev = drm_to_adev(dev);
5829         int i;
5830
5831
5832         DRM_INFO("PCI error: resume callback!!\n");
5833
5834         /* Only continue execution for the case of pci_channel_io_frozen */
5835         if (adev->pci_channel_state != pci_channel_io_frozen)
5836                 return;
5837
5838         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5839                 struct amdgpu_ring *ring = adev->rings[i];
5840
5841                 if (!ring || !ring->sched.thread)
5842                         continue;
5843
5844                 drm_sched_start(&ring->sched, true);
5845         }
5846
5847         amdgpu_device_unset_mp1_state(adev);
5848         amdgpu_device_unlock_reset_domain(adev->reset_domain);
5849 }
5850
5851 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5852 {
5853         struct drm_device *dev = pci_get_drvdata(pdev);
5854         struct amdgpu_device *adev = drm_to_adev(dev);
5855         int r;
5856
5857         r = pci_save_state(pdev);
5858         if (!r) {
5859                 kfree(adev->pci_state);
5860
5861                 adev->pci_state = pci_store_saved_state(pdev);
5862
5863                 if (!adev->pci_state) {
5864                         DRM_ERROR("Failed to store PCI saved state");
5865                         return false;
5866                 }
5867         } else {
5868                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5869                 return false;
5870         }
5871
5872         return true;
5873 }
5874
5875 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5876 {
5877         struct drm_device *dev = pci_get_drvdata(pdev);
5878         struct amdgpu_device *adev = drm_to_adev(dev);
5879         int r;
5880
5881         if (!adev->pci_state)
5882                 return false;
5883
5884         r = pci_load_saved_state(pdev, adev->pci_state);
5885
5886         if (!r) {
5887                 pci_restore_state(pdev);
5888         } else {
5889                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5890                 return false;
5891         }
5892
5893         return true;
5894 }
5895
5896 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5897                 struct amdgpu_ring *ring)
5898 {
5899 #ifdef CONFIG_X86_64
5900         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5901                 return;
5902 #endif
5903         if (adev->gmc.xgmi.connected_to_cpu)
5904                 return;
5905
5906         if (ring && ring->funcs->emit_hdp_flush)
5907                 amdgpu_ring_emit_hdp_flush(ring);
5908         else
5909                 amdgpu_asic_flush_hdp(adev, ring);
5910 }
5911
5912 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5913                 struct amdgpu_ring *ring)
5914 {
5915 #ifdef CONFIG_X86_64
5916         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5917                 return;
5918 #endif
5919         if (adev->gmc.xgmi.connected_to_cpu)
5920                 return;
5921
5922         amdgpu_asic_invalidate_hdp(adev, ring);
5923 }
5924
5925 int amdgpu_in_reset(struct amdgpu_device *adev)
5926 {
5927         return atomic_read(&adev->reset_domain->in_gpu_reset);
5928         }
5929
5930 /**
5931  * amdgpu_device_halt() - bring hardware to some kind of halt state
5932  *
5933  * @adev: amdgpu_device pointer
5934  *
5935  * Bring hardware to some kind of halt state so that no one can touch it
5936  * any more. It will help to maintain error context when error occurred.
5937  * Compare to a simple hang, the system will keep stable at least for SSH
5938  * access. Then it should be trivial to inspect the hardware state and
5939  * see what's going on. Implemented as following:
5940  *
5941  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5942  *    clears all CPU mappings to device, disallows remappings through page faults
5943  * 2. amdgpu_irq_disable_all() disables all interrupts
5944  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5945  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5946  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5947  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5948  *    flush any in flight DMA operations
5949  */
5950 void amdgpu_device_halt(struct amdgpu_device *adev)
5951 {
5952         struct pci_dev *pdev = adev->pdev;
5953         struct drm_device *ddev = adev_to_drm(adev);
5954
5955         drm_dev_unplug(ddev);
5956
5957         amdgpu_irq_disable_all(adev);
5958
5959         amdgpu_fence_driver_hw_fini(adev);
5960
5961         adev->no_hw_access = true;
5962
5963         amdgpu_device_unmap_mmio(adev);
5964
5965         pci_disable_device(pdev);
5966         pci_wait_for_pending_transaction(pdev);
5967 }
5968
5969 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5970                                 u32 reg)
5971 {
5972         unsigned long flags, address, data;
5973         u32 r;
5974
5975         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5976         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5977
5978         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5979         WREG32(address, reg * 4);
5980         (void)RREG32(address);
5981         r = RREG32(data);
5982         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5983         return r;
5984 }
5985
5986 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5987                                 u32 reg, u32 v)
5988 {
5989         unsigned long flags, address, data;
5990
5991         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5992         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5993
5994         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5995         WREG32(address, reg * 4);
5996         (void)RREG32(address);
5997         WREG32(data, v);
5998         (void)RREG32(data);
5999         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6000 }
6001
6002 /**
6003  * amdgpu_device_switch_gang - switch to a new gang
6004  * @adev: amdgpu_device pointer
6005  * @gang: the gang to switch to
6006  *
6007  * Try to switch to a new gang.
6008  * Returns: NULL if we switched to the new gang or a reference to the current
6009  * gang leader.
6010  */
6011 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6012                                             struct dma_fence *gang)
6013 {
6014         struct dma_fence *old = NULL;
6015
6016         do {
6017                 dma_fence_put(old);
6018                 rcu_read_lock();
6019                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6020                 rcu_read_unlock();
6021
6022                 if (old == gang)
6023                         break;
6024
6025                 if (!dma_fence_is_signaled(old))
6026                         return old;
6027
6028         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6029                          old, gang) != old);
6030
6031         dma_fence_put(old);
6032         return NULL;
6033 }
6034
6035 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6036 {
6037         switch (adev->asic_type) {
6038 #ifdef CONFIG_DRM_AMDGPU_SI
6039         case CHIP_HAINAN:
6040 #endif
6041         case CHIP_TOPAZ:
6042                 /* chips with no display hardware */
6043                 return false;
6044 #ifdef CONFIG_DRM_AMDGPU_SI
6045         case CHIP_TAHITI:
6046         case CHIP_PITCAIRN:
6047         case CHIP_VERDE:
6048         case CHIP_OLAND:
6049 #endif
6050 #ifdef CONFIG_DRM_AMDGPU_CIK
6051         case CHIP_BONAIRE:
6052         case CHIP_HAWAII:
6053         case CHIP_KAVERI:
6054         case CHIP_KABINI:
6055         case CHIP_MULLINS:
6056 #endif
6057         case CHIP_TONGA:
6058         case CHIP_FIJI:
6059         case CHIP_POLARIS10:
6060         case CHIP_POLARIS11:
6061         case CHIP_POLARIS12:
6062         case CHIP_VEGAM:
6063         case CHIP_CARRIZO:
6064         case CHIP_STONEY:
6065                 /* chips with display hardware */
6066                 return true;
6067         default:
6068                 /* IP discovery */
6069                 if (!adev->ip_versions[DCE_HWIP][0] ||
6070                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6071                         return false;
6072                 return true;
6073         }
6074 }