0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #include <linux/power_supply.h>
0029 #include <linux/kthread.h>
0030 #include <linux/module.h>
0031 #include <linux/console.h>
0032 #include <linux/slab.h>
0033 #include <linux/iommu.h>
0034 #include <linux/pci.h>
0035 #include <linux/devcoredump.h>
0036 #include <generated/utsrelease.h>
0037 #include <linux/pci-p2pdma.h>
0038
0039 #include <drm/drm_atomic_helper.h>
0040 #include <drm/drm_probe_helper.h>
0041 #include <drm/amdgpu_drm.h>
0042 #include <linux/vgaarb.h>
0043 #include <linux/vga_switcheroo.h>
0044 #include <linux/efi.h>
0045 #include "amdgpu.h"
0046 #include "amdgpu_trace.h"
0047 #include "amdgpu_i2c.h"
0048 #include "atom.h"
0049 #include "amdgpu_atombios.h"
0050 #include "amdgpu_atomfirmware.h"
0051 #include "amd_pcie.h"
0052 #ifdef CONFIG_DRM_AMDGPU_SI
0053 #include "si.h"
0054 #endif
0055 #ifdef CONFIG_DRM_AMDGPU_CIK
0056 #include "cik.h"
0057 #endif
0058 #include "vi.h"
0059 #include "soc15.h"
0060 #include "nv.h"
0061 #include "bif/bif_4_1_d.h"
0062 #include <linux/firmware.h>
0063 #include "amdgpu_vf_error.h"
0064
0065 #include "amdgpu_amdkfd.h"
0066 #include "amdgpu_pm.h"
0067
0068 #include "amdgpu_xgmi.h"
0069 #include "amdgpu_ras.h"
0070 #include "amdgpu_pmu.h"
0071 #include "amdgpu_fru_eeprom.h"
0072 #include "amdgpu_reset.h"
0073
0074 #include <linux/suspend.h>
0075 #include <drm/task_barrier.h>
0076 #include <linux/pm_runtime.h>
0077
0078 #include <drm/drm_drv.h>
0079
0080 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
0081 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
0082 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
0083 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
0084 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
0085 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
0086 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
0087
0088 #define AMDGPU_RESUME_MS 2000
0089 #define AMDGPU_MAX_RETRY_LIMIT 2
0090 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
0091
0092 const char *amdgpu_asic_name[] = {
0093 "TAHITI",
0094 "PITCAIRN",
0095 "VERDE",
0096 "OLAND",
0097 "HAINAN",
0098 "BONAIRE",
0099 "KAVERI",
0100 "KABINI",
0101 "HAWAII",
0102 "MULLINS",
0103 "TOPAZ",
0104 "TONGA",
0105 "FIJI",
0106 "CARRIZO",
0107 "STONEY",
0108 "POLARIS10",
0109 "POLARIS11",
0110 "POLARIS12",
0111 "VEGAM",
0112 "VEGA10",
0113 "VEGA12",
0114 "VEGA20",
0115 "RAVEN",
0116 "ARCTURUS",
0117 "RENOIR",
0118 "ALDEBARAN",
0119 "NAVI10",
0120 "CYAN_SKILLFISH",
0121 "NAVI14",
0122 "NAVI12",
0123 "SIENNA_CICHLID",
0124 "NAVY_FLOUNDER",
0125 "VANGOGH",
0126 "DIMGREY_CAVEFISH",
0127 "BEIGE_GOBY",
0128 "YELLOW_CARP",
0129 "IP DISCOVERY",
0130 "LAST",
0131 };
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
0143 struct device_attribute *attr, char *buf)
0144 {
0145 struct drm_device *ddev = dev_get_drvdata(dev);
0146 struct amdgpu_device *adev = drm_to_adev(ddev);
0147 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
0148
0149 return sysfs_emit(buf, "%llu\n", cnt);
0150 }
0151
0152 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
0153 amdgpu_device_get_pcie_replay_count, NULL);
0154
0155 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167 static ssize_t amdgpu_device_get_product_name(struct device *dev,
0168 struct device_attribute *attr, char *buf)
0169 {
0170 struct drm_device *ddev = dev_get_drvdata(dev);
0171 struct amdgpu_device *adev = drm_to_adev(ddev);
0172
0173 return sysfs_emit(buf, "%s\n", adev->product_name);
0174 }
0175
0176 static DEVICE_ATTR(product_name, S_IRUGO,
0177 amdgpu_device_get_product_name, NULL);
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189 static ssize_t amdgpu_device_get_product_number(struct device *dev,
0190 struct device_attribute *attr, char *buf)
0191 {
0192 struct drm_device *ddev = dev_get_drvdata(dev);
0193 struct amdgpu_device *adev = drm_to_adev(ddev);
0194
0195 return sysfs_emit(buf, "%s\n", adev->product_number);
0196 }
0197
0198 static DEVICE_ATTR(product_number, S_IRUGO,
0199 amdgpu_device_get_product_number, NULL);
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
0212 struct device_attribute *attr, char *buf)
0213 {
0214 struct drm_device *ddev = dev_get_drvdata(dev);
0215 struct amdgpu_device *adev = drm_to_adev(ddev);
0216
0217 return sysfs_emit(buf, "%s\n", adev->serial);
0218 }
0219
0220 static DEVICE_ATTR(serial_number, S_IRUGO,
0221 amdgpu_device_get_serial_number, NULL);
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231 bool amdgpu_device_supports_px(struct drm_device *dev)
0232 {
0233 struct amdgpu_device *adev = drm_to_adev(dev);
0234
0235 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
0236 return true;
0237 return false;
0238 }
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248 bool amdgpu_device_supports_boco(struct drm_device *dev)
0249 {
0250 struct amdgpu_device *adev = drm_to_adev(dev);
0251
0252 if (adev->has_pr3 ||
0253 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
0254 return true;
0255 return false;
0256 }
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266 bool amdgpu_device_supports_baco(struct drm_device *dev)
0267 {
0268 struct amdgpu_device *adev = drm_to_adev(dev);
0269
0270 return amdgpu_asic_supports_baco(adev);
0271 }
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
0283 {
0284 return (amdgpu_device_supports_boco(dev) &&
0285 amdgpu_acpi_is_power_shift_control_supported());
0286 }
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
0302 void *buf, size_t size, bool write)
0303 {
0304 unsigned long flags;
0305 uint32_t hi = ~0, tmp = 0;
0306 uint32_t *data = buf;
0307 uint64_t last;
0308 int idx;
0309
0310 if (!drm_dev_enter(adev_to_drm(adev), &idx))
0311 return;
0312
0313 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
0314
0315 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
0316 for (last = pos + size; pos < last; pos += 4) {
0317 tmp = pos >> 31;
0318
0319 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
0320 if (tmp != hi) {
0321 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
0322 hi = tmp;
0323 }
0324 if (write)
0325 WREG32_NO_KIQ(mmMM_DATA, *data++);
0326 else
0327 *data++ = RREG32_NO_KIQ(mmMM_DATA);
0328 }
0329
0330 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
0331 drm_dev_exit(idx);
0332 }
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
0346 void *buf, size_t size, bool write)
0347 {
0348 #ifdef CONFIG_64BIT
0349 void __iomem *addr;
0350 size_t count = 0;
0351 uint64_t last;
0352
0353 if (!adev->mman.aper_base_kaddr)
0354 return 0;
0355
0356 last = min(pos + size, adev->gmc.visible_vram_size);
0357 if (last > pos) {
0358 addr = adev->mman.aper_base_kaddr + pos;
0359 count = last - pos;
0360
0361 if (write) {
0362 memcpy_toio(addr, buf, count);
0363 mb();
0364 amdgpu_device_flush_hdp(adev, NULL);
0365 } else {
0366 amdgpu_device_invalidate_hdp(adev, NULL);
0367 mb();
0368 memcpy_fromio(buf, addr, count);
0369 }
0370
0371 }
0372
0373 return count;
0374 #else
0375 return 0;
0376 #endif
0377 }
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
0389 void *buf, size_t size, bool write)
0390 {
0391 size_t count;
0392
0393
0394 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
0395 size -= count;
0396 if (size) {
0397
0398 pos += count;
0399 buf += count;
0400 amdgpu_device_mm_access(adev, pos, buf, size, write);
0401 }
0402 }
0403
0404
0405
0406
0407
0408
0409 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
0410 {
0411 if (adev->no_hw_access)
0412 return true;
0413
0414 #ifdef CONFIG_LOCKDEP
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426 if (in_task()) {
0427 if (down_read_trylock(&adev->reset_domain->sem))
0428 up_read(&adev->reset_domain->sem);
0429 else
0430 lockdep_assert_held(&adev->reset_domain->sem);
0431 }
0432 #endif
0433 return false;
0434 }
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
0446 uint32_t reg, uint32_t acc_flags)
0447 {
0448 uint32_t ret;
0449
0450 if (amdgpu_device_skip_hw_access(adev))
0451 return 0;
0452
0453 if ((reg * 4) < adev->rmmio_size) {
0454 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
0455 amdgpu_sriov_runtime(adev) &&
0456 down_read_trylock(&adev->reset_domain->sem)) {
0457 ret = amdgpu_kiq_rreg(adev, reg);
0458 up_read(&adev->reset_domain->sem);
0459 } else {
0460 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
0461 }
0462 } else {
0463 ret = adev->pcie_rreg(adev, reg * 4);
0464 }
0465
0466 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
0467
0468 return ret;
0469 }
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
0486 {
0487 if (amdgpu_device_skip_hw_access(adev))
0488 return 0;
0489
0490 if (offset < adev->rmmio_size)
0491 return (readb(adev->rmmio + offset));
0492 BUG();
0493 }
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
0511 {
0512 if (amdgpu_device_skip_hw_access(adev))
0513 return;
0514
0515 if (offset < adev->rmmio_size)
0516 writeb(value, adev->rmmio + offset);
0517 else
0518 BUG();
0519 }
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531 void amdgpu_device_wreg(struct amdgpu_device *adev,
0532 uint32_t reg, uint32_t v,
0533 uint32_t acc_flags)
0534 {
0535 if (amdgpu_device_skip_hw_access(adev))
0536 return;
0537
0538 if ((reg * 4) < adev->rmmio_size) {
0539 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
0540 amdgpu_sriov_runtime(adev) &&
0541 down_read_trylock(&adev->reset_domain->sem)) {
0542 amdgpu_kiq_wreg(adev, reg, v);
0543 up_read(&adev->reset_domain->sem);
0544 } else {
0545 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
0546 }
0547 } else {
0548 adev->pcie_wreg(adev, reg * 4, v);
0549 }
0550
0551 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
0552 }
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
0564 uint32_t reg, uint32_t v)
0565 {
0566 if (amdgpu_device_skip_hw_access(adev))
0567 return;
0568
0569 if (amdgpu_sriov_fullaccess(adev) &&
0570 adev->gfx.rlc.funcs &&
0571 adev->gfx.rlc.funcs->is_rlcg_access_range) {
0572 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
0573 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
0574 } else if ((reg * 4) >= adev->rmmio_size) {
0575 adev->pcie_wreg(adev, reg * 4, v);
0576 } else {
0577 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
0578 }
0579 }
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
0591 {
0592 if (amdgpu_device_skip_hw_access(adev))
0593 return 0;
0594
0595 if (index < adev->doorbell.num_doorbells) {
0596 return readl(adev->doorbell.ptr + index);
0597 } else {
0598 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
0599 return 0;
0600 }
0601 }
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
0614 {
0615 if (amdgpu_device_skip_hw_access(adev))
0616 return;
0617
0618 if (index < adev->doorbell.num_doorbells) {
0619 writel(v, adev->doorbell.ptr + index);
0620 } else {
0621 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
0622 }
0623 }
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
0635 {
0636 if (amdgpu_device_skip_hw_access(adev))
0637 return 0;
0638
0639 if (index < adev->doorbell.num_doorbells) {
0640 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
0641 } else {
0642 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
0643 return 0;
0644 }
0645 }
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
0658 {
0659 if (amdgpu_device_skip_hw_access(adev))
0660 return;
0661
0662 if (index < adev->doorbell.num_doorbells) {
0663 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
0664 } else {
0665 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
0666 }
0667 }
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
0680 u32 pcie_index, u32 pcie_data,
0681 u32 reg_addr)
0682 {
0683 unsigned long flags;
0684 u32 r;
0685 void __iomem *pcie_index_offset;
0686 void __iomem *pcie_data_offset;
0687
0688 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
0689 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
0690 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
0691
0692 writel(reg_addr, pcie_index_offset);
0693 readl(pcie_index_offset);
0694 r = readl(pcie_data_offset);
0695 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
0696
0697 return r;
0698 }
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
0711 u32 pcie_index, u32 pcie_data,
0712 u32 reg_addr)
0713 {
0714 unsigned long flags;
0715 u64 r;
0716 void __iomem *pcie_index_offset;
0717 void __iomem *pcie_data_offset;
0718
0719 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
0720 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
0721 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
0722
0723
0724 writel(reg_addr, pcie_index_offset);
0725 readl(pcie_index_offset);
0726 r = readl(pcie_data_offset);
0727
0728 writel(reg_addr + 4, pcie_index_offset);
0729 readl(pcie_index_offset);
0730 r |= ((u64)readl(pcie_data_offset) << 32);
0731 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
0732
0733 return r;
0734 }
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
0747 u32 pcie_index, u32 pcie_data,
0748 u32 reg_addr, u32 reg_data)
0749 {
0750 unsigned long flags;
0751 void __iomem *pcie_index_offset;
0752 void __iomem *pcie_data_offset;
0753
0754 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
0755 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
0756 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
0757
0758 writel(reg_addr, pcie_index_offset);
0759 readl(pcie_index_offset);
0760 writel(reg_data, pcie_data_offset);
0761 readl(pcie_data_offset);
0762 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
0763 }
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
0776 u32 pcie_index, u32 pcie_data,
0777 u32 reg_addr, u64 reg_data)
0778 {
0779 unsigned long flags;
0780 void __iomem *pcie_index_offset;
0781 void __iomem *pcie_data_offset;
0782
0783 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
0784 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
0785 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
0786
0787
0788 writel(reg_addr, pcie_index_offset);
0789 readl(pcie_index_offset);
0790 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
0791 readl(pcie_data_offset);
0792
0793 writel(reg_addr + 4, pcie_index_offset);
0794 readl(pcie_index_offset);
0795 writel((u32)(reg_data >> 32), pcie_data_offset);
0796 readl(pcie_data_offset);
0797 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
0798 }
0799
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
0811 {
0812 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
0813 BUG();
0814 return 0;
0815 }
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
0828 {
0829 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
0830 reg, v);
0831 BUG();
0832 }
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
0845 {
0846 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
0847 BUG();
0848 return 0;
0849 }
0850
0851
0852
0853
0854
0855
0856
0857
0858
0859
0860
0861 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
0862 {
0863 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
0864 reg, v);
0865 BUG();
0866 }
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877
0878
0879 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
0880 uint32_t block, uint32_t reg)
0881 {
0882 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
0883 reg, block);
0884 BUG();
0885 return 0;
0886 }
0887
0888
0889
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
0900 uint32_t block,
0901 uint32_t reg, uint32_t v)
0902 {
0903 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
0904 reg, block, v);
0905 BUG();
0906 }
0907
0908
0909
0910
0911
0912
0913
0914
0915 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
0916 {
0917 amdgpu_asic_pre_asic_init(adev);
0918
0919 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
0920 return amdgpu_atomfirmware_asic_init(adev, true);
0921 else
0922 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
0923 }
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
0934 {
0935 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
0936 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
0937 &adev->vram_scratch.robj,
0938 &adev->vram_scratch.gpu_addr,
0939 (void **)&adev->vram_scratch.ptr);
0940 }
0941
0942
0943
0944
0945
0946
0947
0948
0949 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
0950 {
0951 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
0952 }
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
0965 const u32 *registers,
0966 const u32 array_size)
0967 {
0968 u32 tmp, reg, and_mask, or_mask;
0969 int i;
0970
0971 if (array_size % 3)
0972 return;
0973
0974 for (i = 0; i < array_size; i +=3) {
0975 reg = registers[i + 0];
0976 and_mask = registers[i + 1];
0977 or_mask = registers[i + 2];
0978
0979 if (and_mask == 0xffffffff) {
0980 tmp = or_mask;
0981 } else {
0982 tmp = RREG32(reg);
0983 tmp &= ~and_mask;
0984 if (adev->family >= AMDGPU_FAMILY_AI)
0985 tmp |= (or_mask & and_mask);
0986 else
0987 tmp |= or_mask;
0988 }
0989 WREG32(reg, tmp);
0990 }
0991 }
0992
0993
0994
0995
0996
0997
0998
0999
1000
1001 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1002 {
1003 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1004 }
1005
1006
1007
1008
1009
1010
1011
1012
1013 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1014 {
1015 return pci_reset_function(adev->pdev);
1016 }
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1030 {
1031
1032
1033 if (adev->asic_type < CHIP_BONAIRE) {
1034 adev->doorbell.base = 0;
1035 adev->doorbell.size = 0;
1036 adev->doorbell.num_doorbells = 0;
1037 adev->doorbell.ptr = NULL;
1038 return 0;
1039 }
1040
1041 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1042 return -EINVAL;
1043
1044 amdgpu_asic_init_doorbell_index(adev);
1045
1046
1047 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1048 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1049
1050 if (adev->enable_mes) {
1051 adev->doorbell.num_doorbells =
1052 adev->doorbell.size / sizeof(u32);
1053 } else {
1054 adev->doorbell.num_doorbells =
1055 min_t(u32, adev->doorbell.size / sizeof(u32),
1056 adev->doorbell_index.max_assignment+1);
1057 if (adev->doorbell.num_doorbells == 0)
1058 return -EINVAL;
1059
1060
1061
1062
1063
1064
1065
1066 if (adev->asic_type >= CHIP_VEGA10)
1067 adev->doorbell.num_doorbells += 0x400;
1068 }
1069
1070 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1071 adev->doorbell.num_doorbells *
1072 sizeof(u32));
1073 if (adev->doorbell.ptr == NULL)
1074 return -ENOMEM;
1075
1076 return 0;
1077 }
1078
1079
1080
1081
1082
1083
1084
1085
1086 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1087 {
1088 iounmap(adev->doorbell.ptr);
1089 adev->doorbell.ptr = NULL;
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1109 {
1110 if (adev->wb.wb_obj) {
1111 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1112 &adev->wb.gpu_addr,
1113 (void **)&adev->wb.wb);
1114 adev->wb.wb_obj = NULL;
1115 }
1116 }
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1128 {
1129 int r;
1130
1131 if (adev->wb.wb_obj == NULL) {
1132
1133 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1134 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1135 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1136 (void **)&adev->wb.wb);
1137 if (r) {
1138 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1139 return r;
1140 }
1141
1142 adev->wb.num_wb = AMDGPU_MAX_WB;
1143 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1144
1145
1146 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1147 }
1148
1149 return 0;
1150 }
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1162 {
1163 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1164
1165 if (offset < adev->wb.num_wb) {
1166 __set_bit(offset, adev->wb.used);
1167 *wb = offset << 3;
1168 return 0;
1169 } else {
1170 return -EINVAL;
1171 }
1172 }
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1183 {
1184 wb >>= 3;
1185 if (wb < adev->wb.num_wb)
1186 __clear_bit(wb, adev->wb.used);
1187 }
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1199 {
1200 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1201 struct pci_bus *root;
1202 struct resource *res;
1203 unsigned i;
1204 u16 cmd;
1205 int r;
1206
1207
1208 if (amdgpu_sriov_vf(adev))
1209 return 0;
1210
1211
1212 if (adev->gmc.real_vram_size &&
1213 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1214 return 0;
1215
1216
1217 root = adev->pdev->bus;
1218 while (root->parent)
1219 root = root->parent;
1220
1221 pci_bus_for_each_resource(root, res, i) {
1222 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1223 res->start > 0x100000000ull)
1224 break;
1225 }
1226
1227
1228 if (!res)
1229 return 0;
1230
1231
1232 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1233 rbar_size);
1234
1235
1236 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1237 pci_write_config_word(adev->pdev, PCI_COMMAND,
1238 cmd & ~PCI_COMMAND_MEMORY);
1239
1240
1241 amdgpu_device_doorbell_fini(adev);
1242 if (adev->asic_type >= CHIP_BONAIRE)
1243 pci_release_resource(adev->pdev, 2);
1244
1245 pci_release_resource(adev->pdev, 0);
1246
1247 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1248 if (r == -ENOSPC)
1249 DRM_INFO("Not enough PCI address space for a large BAR.");
1250 else if (r && r != -ENOTSUPP)
1251 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1252
1253 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1254
1255
1256
1257
1258 r = amdgpu_device_doorbell_init(adev);
1259 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1260 return -ENODEV;
1261
1262 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1263
1264 return 0;
1265 }
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1280 {
1281 uint32_t reg;
1282
1283 if (amdgpu_sriov_vf(adev))
1284 return false;
1285
1286 if (amdgpu_passthrough(adev)) {
1287
1288
1289
1290
1291
1292 if (adev->asic_type == CHIP_FIJI) {
1293 int err;
1294 uint32_t fw_ver;
1295 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1296
1297 if (err)
1298 return true;
1299
1300 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1301 if (fw_ver < 0x00160e00)
1302 return true;
1303 }
1304 }
1305
1306
1307 if (adev->gmc.xgmi.pending_reset)
1308 return false;
1309
1310 if (adev->has_hw_reset) {
1311 adev->has_hw_reset = false;
1312 return true;
1313 }
1314
1315
1316 if (adev->asic_type >= CHIP_BONAIRE)
1317 return amdgpu_atombios_scratch_need_asic_init(adev);
1318
1319
1320 reg = amdgpu_asic_get_config_memsize(adev);
1321
1322 if ((reg != 0) && (reg != 0xffffffff))
1323 return false;
1324
1325 return true;
1326 }
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1339 {
1340 switch (amdgpu_aspm) {
1341 case -1:
1342 break;
1343 case 0:
1344 return false;
1345 case 1:
1346 return true;
1347 default:
1348 return false;
1349 }
1350 return pcie_aspm_enabled(adev->pdev);
1351 }
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1364 bool state)
1365 {
1366 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1367 amdgpu_asic_set_vga_state(adev, state);
1368 if (state)
1369 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1370 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1371 else
1372 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1373 }
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1386 {
1387
1388
1389
1390 if (amdgpu_vm_block_size == -1)
1391 return;
1392
1393 if (amdgpu_vm_block_size < 9) {
1394 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1395 amdgpu_vm_block_size);
1396 amdgpu_vm_block_size = -1;
1397 }
1398 }
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1409 {
1410
1411 if (amdgpu_vm_size == -1)
1412 return;
1413
1414 if (amdgpu_vm_size < 1) {
1415 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1416 amdgpu_vm_size);
1417 amdgpu_vm_size = -1;
1418 }
1419 }
1420
1421 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1422 {
1423 struct sysinfo si;
1424 bool is_os_64 = (sizeof(void *) == 8);
1425 uint64_t total_memory;
1426 uint64_t dram_size_seven_GB = 0x1B8000000;
1427 uint64_t dram_size_three_GB = 0xB8000000;
1428
1429 if (amdgpu_smu_memory_pool_size == 0)
1430 return;
1431
1432 if (!is_os_64) {
1433 DRM_WARN("Not 64-bit OS, feature not supported\n");
1434 goto def_value;
1435 }
1436 si_meminfo(&si);
1437 total_memory = (uint64_t)si.totalram * si.mem_unit;
1438
1439 if ((amdgpu_smu_memory_pool_size == 1) ||
1440 (amdgpu_smu_memory_pool_size == 2)) {
1441 if (total_memory < dram_size_three_GB)
1442 goto def_value1;
1443 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1444 (amdgpu_smu_memory_pool_size == 8)) {
1445 if (total_memory < dram_size_seven_GB)
1446 goto def_value1;
1447 } else {
1448 DRM_WARN("Smu memory pool size not supported\n");
1449 goto def_value;
1450 }
1451 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1452
1453 return;
1454
1455 def_value1:
1456 DRM_WARN("No enough system memory\n");
1457 def_value:
1458 adev->pm.smu_prv_buffer_size = 0;
1459 }
1460
1461 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1462 {
1463 if (!(adev->flags & AMD_IS_APU) ||
1464 adev->asic_type < CHIP_RAVEN)
1465 return 0;
1466
1467 switch (adev->asic_type) {
1468 case CHIP_RAVEN:
1469 if (adev->pdev->device == 0x15dd)
1470 adev->apu_flags |= AMD_APU_IS_RAVEN;
1471 if (adev->pdev->device == 0x15d8)
1472 adev->apu_flags |= AMD_APU_IS_PICASSO;
1473 break;
1474 case CHIP_RENOIR:
1475 if ((adev->pdev->device == 0x1636) ||
1476 (adev->pdev->device == 0x164c))
1477 adev->apu_flags |= AMD_APU_IS_RENOIR;
1478 else
1479 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1480 break;
1481 case CHIP_VANGOGH:
1482 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1483 break;
1484 case CHIP_YELLOW_CARP:
1485 break;
1486 case CHIP_CYAN_SKILLFISH:
1487 if ((adev->pdev->device == 0x13FE) ||
1488 (adev->pdev->device == 0x143F))
1489 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1490 break;
1491 default:
1492 break;
1493 }
1494
1495 return 0;
1496 }
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1507 {
1508 if (amdgpu_sched_jobs < 4) {
1509 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1510 amdgpu_sched_jobs);
1511 amdgpu_sched_jobs = 4;
1512 } else if (!is_power_of_2(amdgpu_sched_jobs)){
1513 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1514 amdgpu_sched_jobs);
1515 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1516 }
1517
1518 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1519
1520 dev_warn(adev->dev, "gart size (%d) too small\n",
1521 amdgpu_gart_size);
1522 amdgpu_gart_size = -1;
1523 }
1524
1525 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1526
1527 dev_warn(adev->dev, "gtt size (%d) too small\n",
1528 amdgpu_gtt_size);
1529 amdgpu_gtt_size = -1;
1530 }
1531
1532
1533 if (amdgpu_vm_fragment_size != -1 &&
1534 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1535 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1536 amdgpu_vm_fragment_size = -1;
1537 }
1538
1539 if (amdgpu_sched_hw_submission < 2) {
1540 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1541 amdgpu_sched_hw_submission);
1542 amdgpu_sched_hw_submission = 2;
1543 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1544 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1545 amdgpu_sched_hw_submission);
1546 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1547 }
1548
1549 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1550 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1551 amdgpu_reset_method = -1;
1552 }
1553
1554 amdgpu_device_check_smu_prv_buffer_size(adev);
1555
1556 amdgpu_device_check_vm_size(adev);
1557
1558 amdgpu_device_check_block_size(adev);
1559
1560 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1561
1562 return 0;
1563 }
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1575 enum vga_switcheroo_state state)
1576 {
1577 struct drm_device *dev = pci_get_drvdata(pdev);
1578 int r;
1579
1580 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1581 return;
1582
1583 if (state == VGA_SWITCHEROO_ON) {
1584 pr_info("switched on\n");
1585
1586 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1587
1588 pci_set_power_state(pdev, PCI_D0);
1589 amdgpu_device_load_pci_state(pdev);
1590 r = pci_enable_device(pdev);
1591 if (r)
1592 DRM_WARN("pci_enable_device failed (%d)\n", r);
1593 amdgpu_device_resume(dev, true);
1594
1595 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1596 } else {
1597 pr_info("switched off\n");
1598 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1599 amdgpu_device_suspend(dev, true);
1600 amdgpu_device_cache_pci_state(pdev);
1601
1602 pci_disable_device(pdev);
1603 pci_set_power_state(pdev, PCI_D3cold);
1604 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1605 }
1606 }
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1618 {
1619 struct drm_device *dev = pci_get_drvdata(pdev);
1620
1621
1622
1623
1624
1625
1626 return atomic_read(&dev->open_count) == 0;
1627 }
1628
1629 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1630 .set_gpu_state = amdgpu_switcheroo_set_state,
1631 .reprobe = NULL,
1632 .can_switch = amdgpu_switcheroo_can_switch,
1633 };
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 int amdgpu_device_ip_set_clockgating_state(void *dev,
1647 enum amd_ip_block_type block_type,
1648 enum amd_clockgating_state state)
1649 {
1650 struct amdgpu_device *adev = dev;
1651 int i, r = 0;
1652
1653 for (i = 0; i < adev->num_ip_blocks; i++) {
1654 if (!adev->ip_blocks[i].status.valid)
1655 continue;
1656 if (adev->ip_blocks[i].version->type != block_type)
1657 continue;
1658 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1659 continue;
1660 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1661 (void *)adev, state);
1662 if (r)
1663 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1664 adev->ip_blocks[i].version->funcs->name, r);
1665 }
1666 return r;
1667 }
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680 int amdgpu_device_ip_set_powergating_state(void *dev,
1681 enum amd_ip_block_type block_type,
1682 enum amd_powergating_state state)
1683 {
1684 struct amdgpu_device *adev = dev;
1685 int i, r = 0;
1686
1687 for (i = 0; i < adev->num_ip_blocks; i++) {
1688 if (!adev->ip_blocks[i].status.valid)
1689 continue;
1690 if (adev->ip_blocks[i].version->type != block_type)
1691 continue;
1692 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1693 continue;
1694 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1695 (void *)adev, state);
1696 if (r)
1697 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1698 adev->ip_blocks[i].version->funcs->name, r);
1699 }
1700 return r;
1701 }
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1715 u64 *flags)
1716 {
1717 int i;
1718
1719 for (i = 0; i < adev->num_ip_blocks; i++) {
1720 if (!adev->ip_blocks[i].status.valid)
1721 continue;
1722 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1723 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1724 }
1725 }
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1737 enum amd_ip_block_type block_type)
1738 {
1739 int i, r;
1740
1741 for (i = 0; i < adev->num_ip_blocks; i++) {
1742 if (!adev->ip_blocks[i].status.valid)
1743 continue;
1744 if (adev->ip_blocks[i].version->type == block_type) {
1745 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1746 if (r)
1747 return r;
1748 break;
1749 }
1750 }
1751 return 0;
1752
1753 }
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1765 enum amd_ip_block_type block_type)
1766 {
1767 int i;
1768
1769 for (i = 0; i < adev->num_ip_blocks; i++) {
1770 if (!adev->ip_blocks[i].status.valid)
1771 continue;
1772 if (adev->ip_blocks[i].version->type == block_type)
1773 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1774 }
1775 return true;
1776
1777 }
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788 struct amdgpu_ip_block *
1789 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1790 enum amd_ip_block_type type)
1791 {
1792 int i;
1793
1794 for (i = 0; i < adev->num_ip_blocks; i++)
1795 if (adev->ip_blocks[i].version->type == type)
1796 return &adev->ip_blocks[i];
1797
1798 return NULL;
1799 }
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1813 enum amd_ip_block_type type,
1814 u32 major, u32 minor)
1815 {
1816 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1817
1818 if (ip_block && ((ip_block->version->major > major) ||
1819 ((ip_block->version->major == major) &&
1820 (ip_block->version->minor >= minor))))
1821 return 0;
1822
1823 return 1;
1824 }
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1836 const struct amdgpu_ip_block_version *ip_block_version)
1837 {
1838 if (!ip_block_version)
1839 return -EINVAL;
1840
1841 switch (ip_block_version->type) {
1842 case AMD_IP_BLOCK_TYPE_VCN:
1843 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1844 return 0;
1845 break;
1846 case AMD_IP_BLOCK_TYPE_JPEG:
1847 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1848 return 0;
1849 break;
1850 default:
1851 break;
1852 }
1853
1854 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1855 ip_block_version->funcs->name);
1856
1857 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1858
1859 return 0;
1860 }
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1875 {
1876 adev->enable_virtual_display = false;
1877
1878 if (amdgpu_virtual_display) {
1879 const char *pci_address_name = pci_name(adev->pdev);
1880 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1881
1882 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1883 pciaddstr_tmp = pciaddstr;
1884 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1885 pciaddname = strsep(&pciaddname_tmp, ",");
1886 if (!strcmp("all", pciaddname)
1887 || !strcmp(pci_address_name, pciaddname)) {
1888 long num_crtc;
1889 int res = -1;
1890
1891 adev->enable_virtual_display = true;
1892
1893 if (pciaddname_tmp)
1894 res = kstrtol(pciaddname_tmp, 10,
1895 &num_crtc);
1896
1897 if (!res) {
1898 if (num_crtc < 1)
1899 num_crtc = 1;
1900 if (num_crtc > 6)
1901 num_crtc = 6;
1902 adev->mode_info.num_crtc = num_crtc;
1903 } else {
1904 adev->mode_info.num_crtc = 1;
1905 }
1906 break;
1907 }
1908 }
1909
1910 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1911 amdgpu_virtual_display, pci_address_name,
1912 adev->enable_virtual_display, adev->mode_info.num_crtc);
1913
1914 kfree(pciaddstr);
1915 }
1916 }
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1929 {
1930 const char *chip_name;
1931 char fw_name[40];
1932 int err;
1933 const struct gpu_info_firmware_header_v1_0 *hdr;
1934
1935 adev->firmware.gpu_info_fw = NULL;
1936
1937 if (adev->mman.discovery_bin) {
1938
1939
1940
1941
1942
1943 if (adev->asic_type != CHIP_NAVI12)
1944 return 0;
1945 }
1946
1947 switch (adev->asic_type) {
1948 default:
1949 return 0;
1950 case CHIP_VEGA10:
1951 chip_name = "vega10";
1952 break;
1953 case CHIP_VEGA12:
1954 chip_name = "vega12";
1955 break;
1956 case CHIP_RAVEN:
1957 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1958 chip_name = "raven2";
1959 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1960 chip_name = "picasso";
1961 else
1962 chip_name = "raven";
1963 break;
1964 case CHIP_ARCTURUS:
1965 chip_name = "arcturus";
1966 break;
1967 case CHIP_NAVI12:
1968 chip_name = "navi12";
1969 break;
1970 }
1971
1972 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1973 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1974 if (err) {
1975 dev_err(adev->dev,
1976 "Failed to load gpu_info firmware \"%s\"\n",
1977 fw_name);
1978 goto out;
1979 }
1980 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1981 if (err) {
1982 dev_err(adev->dev,
1983 "Failed to validate gpu_info firmware \"%s\"\n",
1984 fw_name);
1985 goto out;
1986 }
1987
1988 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1989 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1990
1991 switch (hdr->version_major) {
1992 case 1:
1993 {
1994 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1995 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1996 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1997
1998
1999
2000
2001 if (adev->asic_type == CHIP_NAVI12)
2002 goto parse_soc_bounding_box;
2003
2004 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2005 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2006 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2007 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2008 adev->gfx.config.max_texture_channel_caches =
2009 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2010 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2011 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2012 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2013 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2014 adev->gfx.config.double_offchip_lds_buf =
2015 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2016 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2017 adev->gfx.cu_info.max_waves_per_simd =
2018 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2019 adev->gfx.cu_info.max_scratch_slots_per_cu =
2020 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2021 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2022 if (hdr->version_minor >= 1) {
2023 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2024 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2025 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2026 adev->gfx.config.num_sc_per_sh =
2027 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2028 adev->gfx.config.num_packer_per_sc =
2029 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2030 }
2031
2032 parse_soc_bounding_box:
2033
2034
2035
2036
2037 if (hdr->version_minor == 2) {
2038 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2039 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2040 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2041 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2042 }
2043 break;
2044 }
2045 default:
2046 dev_err(adev->dev,
2047 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2048 err = -EINVAL;
2049 goto out;
2050 }
2051 out:
2052 return err;
2053 }
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2066 {
2067 struct drm_device *dev = adev_to_drm(adev);
2068 struct pci_dev *parent;
2069 int i, r;
2070
2071 amdgpu_device_enable_virtual_display(adev);
2072
2073 if (amdgpu_sriov_vf(adev)) {
2074 r = amdgpu_virt_request_full_gpu(adev, true);
2075 if (r)
2076 return r;
2077 }
2078
2079 switch (adev->asic_type) {
2080 #ifdef CONFIG_DRM_AMDGPU_SI
2081 case CHIP_VERDE:
2082 case CHIP_TAHITI:
2083 case CHIP_PITCAIRN:
2084 case CHIP_OLAND:
2085 case CHIP_HAINAN:
2086 adev->family = AMDGPU_FAMILY_SI;
2087 r = si_set_ip_blocks(adev);
2088 if (r)
2089 return r;
2090 break;
2091 #endif
2092 #ifdef CONFIG_DRM_AMDGPU_CIK
2093 case CHIP_BONAIRE:
2094 case CHIP_HAWAII:
2095 case CHIP_KAVERI:
2096 case CHIP_KABINI:
2097 case CHIP_MULLINS:
2098 if (adev->flags & AMD_IS_APU)
2099 adev->family = AMDGPU_FAMILY_KV;
2100 else
2101 adev->family = AMDGPU_FAMILY_CI;
2102
2103 r = cik_set_ip_blocks(adev);
2104 if (r)
2105 return r;
2106 break;
2107 #endif
2108 case CHIP_TOPAZ:
2109 case CHIP_TONGA:
2110 case CHIP_FIJI:
2111 case CHIP_POLARIS10:
2112 case CHIP_POLARIS11:
2113 case CHIP_POLARIS12:
2114 case CHIP_VEGAM:
2115 case CHIP_CARRIZO:
2116 case CHIP_STONEY:
2117 if (adev->flags & AMD_IS_APU)
2118 adev->family = AMDGPU_FAMILY_CZ;
2119 else
2120 adev->family = AMDGPU_FAMILY_VI;
2121
2122 r = vi_set_ip_blocks(adev);
2123 if (r)
2124 return r;
2125 break;
2126 default:
2127 r = amdgpu_discovery_set_ip_blocks(adev);
2128 if (r)
2129 return r;
2130 break;
2131 }
2132
2133 if (amdgpu_has_atpx() &&
2134 (amdgpu_is_atpx_hybrid() ||
2135 amdgpu_has_atpx_dgpu_power_cntl()) &&
2136 ((adev->flags & AMD_IS_APU) == 0) &&
2137 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2138 adev->flags |= AMD_IS_PX;
2139
2140 if (!(adev->flags & AMD_IS_APU)) {
2141 parent = pci_upstream_bridge(adev->pdev);
2142 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2143 }
2144
2145 amdgpu_amdkfd_device_probe(adev);
2146
2147 adev->pm.pp_feature = amdgpu_pp_feature_mask;
2148 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2149 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2150 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2151 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2152
2153 for (i = 0; i < adev->num_ip_blocks; i++) {
2154 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2155 DRM_ERROR("disabled ip block: %d <%s>\n",
2156 i, adev->ip_blocks[i].version->funcs->name);
2157 adev->ip_blocks[i].status.valid = false;
2158 } else {
2159 if (adev->ip_blocks[i].version->funcs->early_init) {
2160 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2161 if (r == -ENOENT) {
2162 adev->ip_blocks[i].status.valid = false;
2163 } else if (r) {
2164 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2165 adev->ip_blocks[i].version->funcs->name, r);
2166 return r;
2167 } else {
2168 adev->ip_blocks[i].status.valid = true;
2169 }
2170 } else {
2171 adev->ip_blocks[i].status.valid = true;
2172 }
2173 }
2174
2175 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2176 r = amdgpu_device_parse_gpu_info_fw(adev);
2177 if (r)
2178 return r;
2179
2180
2181 if (!amdgpu_get_bios(adev))
2182 return -EINVAL;
2183
2184 r = amdgpu_atombios_init(adev);
2185 if (r) {
2186 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2187 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2188 return r;
2189 }
2190
2191
2192 if (amdgpu_sriov_vf(adev))
2193 amdgpu_virt_init_data_exchange(adev);
2194
2195 }
2196 }
2197
2198 adev->cg_flags &= amdgpu_cg_mask;
2199 adev->pg_flags &= amdgpu_pg_mask;
2200
2201 return 0;
2202 }
2203
2204 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2205 {
2206 int i, r;
2207
2208 for (i = 0; i < adev->num_ip_blocks; i++) {
2209 if (!adev->ip_blocks[i].status.sw)
2210 continue;
2211 if (adev->ip_blocks[i].status.hw)
2212 continue;
2213 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2214 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2216 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2217 if (r) {
2218 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2219 adev->ip_blocks[i].version->funcs->name, r);
2220 return r;
2221 }
2222 adev->ip_blocks[i].status.hw = true;
2223 }
2224 }
2225
2226 return 0;
2227 }
2228
2229 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2230 {
2231 int i, r;
2232
2233 for (i = 0; i < adev->num_ip_blocks; i++) {
2234 if (!adev->ip_blocks[i].status.sw)
2235 continue;
2236 if (adev->ip_blocks[i].status.hw)
2237 continue;
2238 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2239 if (r) {
2240 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2241 adev->ip_blocks[i].version->funcs->name, r);
2242 return r;
2243 }
2244 adev->ip_blocks[i].status.hw = true;
2245 }
2246
2247 return 0;
2248 }
2249
2250 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2251 {
2252 int r = 0;
2253 int i;
2254 uint32_t smu_version;
2255
2256 if (adev->asic_type >= CHIP_VEGA10) {
2257 for (i = 0; i < adev->num_ip_blocks; i++) {
2258 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2259 continue;
2260
2261 if (!adev->ip_blocks[i].status.sw)
2262 continue;
2263
2264
2265 if (adev->ip_blocks[i].status.hw == true)
2266 break;
2267
2268 if (amdgpu_in_reset(adev) || adev->in_suspend) {
2269 r = adev->ip_blocks[i].version->funcs->resume(adev);
2270 if (r) {
2271 DRM_ERROR("resume of IP block <%s> failed %d\n",
2272 adev->ip_blocks[i].version->funcs->name, r);
2273 return r;
2274 }
2275 } else {
2276 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2277 if (r) {
2278 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2279 adev->ip_blocks[i].version->funcs->name, r);
2280 return r;
2281 }
2282 }
2283
2284 adev->ip_blocks[i].status.hw = true;
2285 break;
2286 }
2287 }
2288
2289 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2290 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2291
2292 return r;
2293 }
2294
2295 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2296 {
2297 long timeout;
2298 int r, i;
2299
2300 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2301 struct amdgpu_ring *ring = adev->rings[i];
2302
2303
2304 if (!ring || ring->no_scheduler)
2305 continue;
2306
2307 switch (ring->funcs->type) {
2308 case AMDGPU_RING_TYPE_GFX:
2309 timeout = adev->gfx_timeout;
2310 break;
2311 case AMDGPU_RING_TYPE_COMPUTE:
2312 timeout = adev->compute_timeout;
2313 break;
2314 case AMDGPU_RING_TYPE_SDMA:
2315 timeout = adev->sdma_timeout;
2316 break;
2317 default:
2318 timeout = adev->video_timeout;
2319 break;
2320 }
2321
2322 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2323 ring->num_hw_submission, amdgpu_job_hang_limit,
2324 timeout, adev->reset_domain->wq,
2325 ring->sched_score, ring->name,
2326 adev->dev);
2327 if (r) {
2328 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2329 ring->name);
2330 return r;
2331 }
2332 }
2333
2334 return 0;
2335 }
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2350 {
2351 int i, r;
2352
2353 r = amdgpu_ras_init(adev);
2354 if (r)
2355 return r;
2356
2357 for (i = 0; i < adev->num_ip_blocks; i++) {
2358 if (!adev->ip_blocks[i].status.valid)
2359 continue;
2360 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2361 if (r) {
2362 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2363 adev->ip_blocks[i].version->funcs->name, r);
2364 goto init_failed;
2365 }
2366 adev->ip_blocks[i].status.sw = true;
2367
2368 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2369
2370 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2371 if (r) {
2372 DRM_ERROR("hw_init %d failed %d\n", i, r);
2373 goto init_failed;
2374 }
2375 adev->ip_blocks[i].status.hw = true;
2376 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2377
2378
2379 if (amdgpu_sriov_vf(adev))
2380 amdgpu_virt_exchange_data(adev);
2381
2382 r = amdgpu_device_vram_scratch_init(adev);
2383 if (r) {
2384 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2385 goto init_failed;
2386 }
2387 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2388 if (r) {
2389 DRM_ERROR("hw_init %d failed %d\n", i, r);
2390 goto init_failed;
2391 }
2392 r = amdgpu_device_wb_init(adev);
2393 if (r) {
2394 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2395 goto init_failed;
2396 }
2397 adev->ip_blocks[i].status.hw = true;
2398
2399
2400 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2401 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2402 AMDGPU_GEM_DOMAIN_VRAM,
2403 AMDGPU_CSA_SIZE);
2404 if (r) {
2405 DRM_ERROR("allocate CSA failed %d\n", r);
2406 goto init_failed;
2407 }
2408 }
2409 }
2410 }
2411
2412 if (amdgpu_sriov_vf(adev))
2413 amdgpu_virt_init_data_exchange(adev);
2414
2415 r = amdgpu_ib_pool_init(adev);
2416 if (r) {
2417 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2418 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2419 goto init_failed;
2420 }
2421
2422 r = amdgpu_ucode_create_bo(adev);
2423 if (r)
2424 goto init_failed;
2425
2426 r = amdgpu_device_ip_hw_init_phase1(adev);
2427 if (r)
2428 goto init_failed;
2429
2430 r = amdgpu_device_fw_loading(adev);
2431 if (r)
2432 goto init_failed;
2433
2434 r = amdgpu_device_ip_hw_init_phase2(adev);
2435 if (r)
2436 goto init_failed;
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453 r = amdgpu_ras_recovery_init(adev);
2454 if (r)
2455 goto init_failed;
2456
2457
2458
2459
2460 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2461 if (amdgpu_xgmi_add_device(adev) == 0) {
2462 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2463
2464 if (!hive->reset_domain ||
2465 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2466 r = -ENOENT;
2467 amdgpu_put_xgmi_hive(hive);
2468 goto init_failed;
2469 }
2470
2471
2472 amdgpu_reset_put_reset_domain(adev->reset_domain);
2473 adev->reset_domain = hive->reset_domain;
2474 amdgpu_put_xgmi_hive(hive);
2475 }
2476 }
2477
2478 r = amdgpu_device_init_schedulers(adev);
2479 if (r)
2480 goto init_failed;
2481
2482
2483 if (!adev->gmc.xgmi.pending_reset)
2484 amdgpu_amdkfd_device_init(adev);
2485
2486 amdgpu_fru_get_product_info(adev);
2487
2488 init_failed:
2489 if (amdgpu_sriov_vf(adev))
2490 amdgpu_virt_release_full_gpu(adev, true);
2491
2492 return r;
2493 }
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2505 {
2506 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2507 }
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2520 {
2521 if (memcmp(adev->gart.ptr, adev->reset_magic,
2522 AMDGPU_RESET_MAGIC_NUM))
2523 return true;
2524
2525 if (!amdgpu_in_reset(adev))
2526 return false;
2527
2528
2529
2530
2531
2532 switch (amdgpu_asic_reset_method(adev)) {
2533 case AMD_RESET_METHOD_BACO:
2534 case AMD_RESET_METHOD_MODE1:
2535 return true;
2536 default:
2537 return false;
2538 }
2539 }
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2555 enum amd_clockgating_state state)
2556 {
2557 int i, j, r;
2558
2559 if (amdgpu_emu_mode == 1)
2560 return 0;
2561
2562 for (j = 0; j < adev->num_ip_blocks; j++) {
2563 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2564 if (!adev->ip_blocks[i].status.late_initialized)
2565 continue;
2566
2567 if (adev->in_s0ix &&
2568 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2569 continue;
2570
2571 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2572 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2573 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2574 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2575 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2576
2577 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2578 state);
2579 if (r) {
2580 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2581 adev->ip_blocks[i].version->funcs->name, r);
2582 return r;
2583 }
2584 }
2585 }
2586
2587 return 0;
2588 }
2589
2590 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2591 enum amd_powergating_state state)
2592 {
2593 int i, j, r;
2594
2595 if (amdgpu_emu_mode == 1)
2596 return 0;
2597
2598 for (j = 0; j < adev->num_ip_blocks; j++) {
2599 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2600 if (!adev->ip_blocks[i].status.late_initialized)
2601 continue;
2602
2603 if (adev->in_s0ix &&
2604 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2605 continue;
2606
2607 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2608 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2609 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2610 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2611 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2612
2613 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2614 state);
2615 if (r) {
2616 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2617 adev->ip_blocks[i].version->funcs->name, r);
2618 return r;
2619 }
2620 }
2621 }
2622 return 0;
2623 }
2624
2625 static int amdgpu_device_enable_mgpu_fan_boost(void)
2626 {
2627 struct amdgpu_gpu_instance *gpu_ins;
2628 struct amdgpu_device *adev;
2629 int i, ret = 0;
2630
2631 mutex_lock(&mgpu_info.mutex);
2632
2633
2634
2635
2636
2637
2638 if (mgpu_info.num_dgpu < 2)
2639 goto out;
2640
2641 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2642 gpu_ins = &(mgpu_info.gpu_ins[i]);
2643 adev = gpu_ins->adev;
2644 if (!(adev->flags & AMD_IS_APU) &&
2645 !gpu_ins->mgpu_fan_enabled) {
2646 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2647 if (ret)
2648 break;
2649
2650 gpu_ins->mgpu_fan_enabled = 1;
2651 }
2652 }
2653
2654 out:
2655 mutex_unlock(&mgpu_info.mutex);
2656
2657 return ret;
2658 }
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2673 {
2674 struct amdgpu_gpu_instance *gpu_instance;
2675 int i = 0, r;
2676
2677 for (i = 0; i < adev->num_ip_blocks; i++) {
2678 if (!adev->ip_blocks[i].status.hw)
2679 continue;
2680 if (adev->ip_blocks[i].version->funcs->late_init) {
2681 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2682 if (r) {
2683 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2684 adev->ip_blocks[i].version->funcs->name, r);
2685 return r;
2686 }
2687 }
2688 adev->ip_blocks[i].status.late_initialized = true;
2689 }
2690
2691 r = amdgpu_ras_late_init(adev);
2692 if (r) {
2693 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2694 return r;
2695 }
2696
2697 amdgpu_ras_set_error_query_ready(adev, true);
2698
2699 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2700 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2701
2702 amdgpu_device_fill_reset_magic(adev);
2703
2704 r = amdgpu_device_enable_mgpu_fan_boost();
2705 if (r)
2706 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2707
2708
2709 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2710 adev->asic_type == CHIP_ALDEBARAN ))
2711 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2712
2713 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2714 mutex_lock(&mgpu_info.mutex);
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2730 for (i = 0; i < mgpu_info.num_gpu; i++) {
2731 gpu_instance = &(mgpu_info.gpu_ins[i]);
2732 if (gpu_instance->adev->flags & AMD_IS_APU)
2733 continue;
2734
2735 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2736 AMDGPU_XGMI_PSTATE_MIN);
2737 if (r) {
2738 DRM_ERROR("pstate setting failed (%d).\n", r);
2739 break;
2740 }
2741 }
2742 }
2743
2744 mutex_unlock(&mgpu_info.mutex);
2745 }
2746
2747 return 0;
2748 }
2749
2750
2751
2752
2753
2754
2755
2756
2757 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2758 {
2759 int i, r;
2760
2761 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2762 return;
2763
2764 for (i = 0; i < adev->num_ip_blocks; i++) {
2765 if (!adev->ip_blocks[i].status.hw)
2766 continue;
2767 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2768 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2769
2770 if (r) {
2771 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2772 adev->ip_blocks[i].version->funcs->name, r);
2773 }
2774 adev->ip_blocks[i].status.hw = false;
2775 break;
2776 }
2777 }
2778 }
2779
2780 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2781 {
2782 int i, r;
2783
2784 for (i = 0; i < adev->num_ip_blocks; i++) {
2785 if (!adev->ip_blocks[i].version->funcs->early_fini)
2786 continue;
2787
2788 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2789 if (r) {
2790 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2791 adev->ip_blocks[i].version->funcs->name, r);
2792 }
2793 }
2794
2795 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2796 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2797
2798 amdgpu_amdkfd_suspend(adev, false);
2799
2800
2801 amdgpu_device_smu_fini_early(adev);
2802
2803 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2804 if (!adev->ip_blocks[i].status.hw)
2805 continue;
2806
2807 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2808
2809 if (r) {
2810 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2811 adev->ip_blocks[i].version->funcs->name, r);
2812 }
2813
2814 adev->ip_blocks[i].status.hw = false;
2815 }
2816
2817 if (amdgpu_sriov_vf(adev)) {
2818 if (amdgpu_virt_release_full_gpu(adev, false))
2819 DRM_ERROR("failed to release exclusive mode on fini\n");
2820 }
2821
2822 return 0;
2823 }
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2837 {
2838 int i, r;
2839
2840 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2841 amdgpu_virt_release_ras_err_handler_data(adev);
2842
2843 if (adev->gmc.xgmi.num_physical_nodes > 1)
2844 amdgpu_xgmi_remove_device(adev);
2845
2846 amdgpu_amdkfd_device_fini_sw(adev);
2847
2848 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2849 if (!adev->ip_blocks[i].status.sw)
2850 continue;
2851
2852 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2853 amdgpu_ucode_free_bo(adev);
2854 amdgpu_free_static_csa(&adev->virt.csa_obj);
2855 amdgpu_device_wb_fini(adev);
2856 amdgpu_device_vram_scratch_fini(adev);
2857 amdgpu_ib_pool_fini(adev);
2858 }
2859
2860 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2861
2862 if (r) {
2863 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2864 adev->ip_blocks[i].version->funcs->name, r);
2865 }
2866 adev->ip_blocks[i].status.sw = false;
2867 adev->ip_blocks[i].status.valid = false;
2868 }
2869
2870 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2871 if (!adev->ip_blocks[i].status.late_initialized)
2872 continue;
2873 if (adev->ip_blocks[i].version->funcs->late_fini)
2874 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2875 adev->ip_blocks[i].status.late_initialized = false;
2876 }
2877
2878 amdgpu_ras_fini(adev);
2879
2880 return 0;
2881 }
2882
2883
2884
2885
2886
2887
2888 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2889 {
2890 struct amdgpu_device *adev =
2891 container_of(work, struct amdgpu_device, delayed_init_work.work);
2892 int r;
2893
2894 r = amdgpu_ib_ring_tests(adev);
2895 if (r)
2896 DRM_ERROR("ib ring test failed (%d).\n", r);
2897 }
2898
2899 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2900 {
2901 struct amdgpu_device *adev =
2902 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2903
2904 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2905 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2906
2907 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2908 adev->gfx.gfx_off_state = true;
2909 }
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2923 {
2924 int i, r;
2925
2926 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2927 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2928
2929 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2930 if (!adev->ip_blocks[i].status.valid)
2931 continue;
2932
2933
2934 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2935 continue;
2936
2937
2938 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2939
2940 if (r) {
2941 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2942 adev->ip_blocks[i].version->funcs->name, r);
2943 return r;
2944 }
2945
2946 adev->ip_blocks[i].status.hw = false;
2947 }
2948
2949 return 0;
2950 }
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2964 {
2965 int i, r;
2966
2967 if (adev->in_s0ix)
2968 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
2969
2970 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2971 if (!adev->ip_blocks[i].status.valid)
2972 continue;
2973
2974 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2975 continue;
2976
2977 if (amdgpu_ras_intr_triggered() &&
2978 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2979 adev->ip_blocks[i].status.hw = false;
2980 continue;
2981 }
2982
2983
2984 if (adev->gmc.xgmi.pending_reset &&
2985 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2986 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2987 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2988 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2989 adev->ip_blocks[i].status.hw = false;
2990 continue;
2991 }
2992
2993
2994
2995
2996
2997
2998 if (adev->in_s0ix &&
2999 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3000 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
3001 continue;
3002
3003
3004 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3005
3006 if (r) {
3007 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3008 adev->ip_blocks[i].version->funcs->name, r);
3009 }
3010 adev->ip_blocks[i].status.hw = false;
3011
3012 if(!amdgpu_sriov_vf(adev)){
3013 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3014 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3015 if (r) {
3016 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3017 adev->mp1_state, r);
3018 return r;
3019 }
3020 }
3021 }
3022 }
3023
3024 return 0;
3025 }
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3039 {
3040 int r;
3041
3042 if (amdgpu_sriov_vf(adev)) {
3043 amdgpu_virt_fini_data_exchange(adev);
3044 amdgpu_virt_request_full_gpu(adev, false);
3045 }
3046
3047 r = amdgpu_device_ip_suspend_phase1(adev);
3048 if (r)
3049 return r;
3050 r = amdgpu_device_ip_suspend_phase2(adev);
3051
3052 if (amdgpu_sriov_vf(adev))
3053 amdgpu_virt_release_full_gpu(adev, false);
3054
3055 return r;
3056 }
3057
3058 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3059 {
3060 int i, r;
3061
3062 static enum amd_ip_block_type ip_order[] = {
3063 AMD_IP_BLOCK_TYPE_COMMON,
3064 AMD_IP_BLOCK_TYPE_GMC,
3065 AMD_IP_BLOCK_TYPE_PSP,
3066 AMD_IP_BLOCK_TYPE_IH,
3067 };
3068
3069 for (i = 0; i < adev->num_ip_blocks; i++) {
3070 int j;
3071 struct amdgpu_ip_block *block;
3072
3073 block = &adev->ip_blocks[i];
3074 block->status.hw = false;
3075
3076 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3077
3078 if (block->version->type != ip_order[j] ||
3079 !block->status.valid)
3080 continue;
3081
3082 r = block->version->funcs->hw_init(adev);
3083 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3084 if (r)
3085 return r;
3086 block->status.hw = true;
3087 }
3088 }
3089
3090 return 0;
3091 }
3092
3093 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3094 {
3095 int i, r;
3096
3097 static enum amd_ip_block_type ip_order[] = {
3098 AMD_IP_BLOCK_TYPE_SMC,
3099 AMD_IP_BLOCK_TYPE_DCE,
3100 AMD_IP_BLOCK_TYPE_GFX,
3101 AMD_IP_BLOCK_TYPE_SDMA,
3102 AMD_IP_BLOCK_TYPE_UVD,
3103 AMD_IP_BLOCK_TYPE_VCE,
3104 AMD_IP_BLOCK_TYPE_VCN
3105 };
3106
3107 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3108 int j;
3109 struct amdgpu_ip_block *block;
3110
3111 for (j = 0; j < adev->num_ip_blocks; j++) {
3112 block = &adev->ip_blocks[j];
3113
3114 if (block->version->type != ip_order[i] ||
3115 !block->status.valid ||
3116 block->status.hw)
3117 continue;
3118
3119 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3120 r = block->version->funcs->resume(adev);
3121 else
3122 r = block->version->funcs->hw_init(adev);
3123
3124 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3125 if (r)
3126 return r;
3127 block->status.hw = true;
3128 }
3129 }
3130
3131 return 0;
3132 }
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3147 {
3148 int i, r;
3149
3150 for (i = 0; i < adev->num_ip_blocks; i++) {
3151 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3152 continue;
3153 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3154 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3155 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3156 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3157
3158 r = adev->ip_blocks[i].version->funcs->resume(adev);
3159 if (r) {
3160 DRM_ERROR("resume of IP block <%s> failed %d\n",
3161 adev->ip_blocks[i].version->funcs->name, r);
3162 return r;
3163 }
3164 adev->ip_blocks[i].status.hw = true;
3165 }
3166 }
3167
3168 return 0;
3169 }
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3185 {
3186 int i, r;
3187
3188 for (i = 0; i < adev->num_ip_blocks; i++) {
3189 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3190 continue;
3191 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3192 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3193 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3194 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3195 continue;
3196 r = adev->ip_blocks[i].version->funcs->resume(adev);
3197 if (r) {
3198 DRM_ERROR("resume of IP block <%s> failed %d\n",
3199 adev->ip_blocks[i].version->funcs->name, r);
3200 return r;
3201 }
3202 adev->ip_blocks[i].status.hw = true;
3203 }
3204
3205 return 0;
3206 }
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3221 {
3222 int r;
3223
3224 r = amdgpu_amdkfd_resume_iommu(adev);
3225 if (r)
3226 return r;
3227
3228 r = amdgpu_device_ip_resume_phase1(adev);
3229 if (r)
3230 return r;
3231
3232 r = amdgpu_device_fw_loading(adev);
3233 if (r)
3234 return r;
3235
3236 r = amdgpu_device_ip_resume_phase2(adev);
3237
3238 return r;
3239 }
3240
3241
3242
3243
3244
3245
3246
3247
3248 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3249 {
3250 if (amdgpu_sriov_vf(adev)) {
3251 if (adev->is_atom_fw) {
3252 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3253 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3254 } else {
3255 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3256 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3257 }
3258
3259 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3260 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3261 }
3262 }
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3273 {
3274 switch (asic_type) {
3275 #ifdef CONFIG_DRM_AMDGPU_SI
3276 case CHIP_HAINAN:
3277 #endif
3278 case CHIP_TOPAZ:
3279
3280 return false;
3281 #if defined(CONFIG_DRM_AMD_DC)
3282 case CHIP_TAHITI:
3283 case CHIP_PITCAIRN:
3284 case CHIP_VERDE:
3285 case CHIP_OLAND:
3286
3287
3288
3289
3290
3291
3292
3293 #if defined(CONFIG_DRM_AMD_DC_SI)
3294 return amdgpu_dc > 0;
3295 #else
3296 return false;
3297 #endif
3298 case CHIP_BONAIRE:
3299 case CHIP_KAVERI:
3300 case CHIP_KABINI:
3301 case CHIP_MULLINS:
3302
3303
3304
3305
3306
3307
3308
3309 return amdgpu_dc > 0;
3310 default:
3311 return amdgpu_dc != 0;
3312 #else
3313 default:
3314 if (amdgpu_dc > 0)
3315 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3316 "but isn't supported by ASIC, ignoring\n");
3317 return false;
3318 #endif
3319 }
3320 }
3321
3322
3323
3324
3325
3326
3327
3328
3329 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3330 {
3331 if (amdgpu_sriov_vf(adev) ||
3332 adev->enable_virtual_display ||
3333 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3334 return false;
3335
3336 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3337 }
3338
3339 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3340 {
3341 struct amdgpu_device *adev =
3342 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3343 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3344
3345
3346 if (WARN_ON(!hive))
3347 return;
3348
3349
3350
3351
3352
3353
3354
3355 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3356
3357 task_barrier_enter(&hive->tb);
3358 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3359
3360 if (adev->asic_reset_res)
3361 goto fail;
3362
3363 task_barrier_exit(&hive->tb);
3364 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3365
3366 if (adev->asic_reset_res)
3367 goto fail;
3368
3369 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3370 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3371 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3372 } else {
3373
3374 task_barrier_full(&hive->tb);
3375 adev->asic_reset_res = amdgpu_asic_reset(adev);
3376 }
3377
3378 fail:
3379 if (adev->asic_reset_res)
3380 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3381 adev->asic_reset_res, adev_to_drm(adev)->unique);
3382 amdgpu_put_xgmi_hive(hive);
3383 }
3384
3385 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3386 {
3387 char *input = amdgpu_lockup_timeout;
3388 char *timeout_setting = NULL;
3389 int index = 0;
3390 long timeout;
3391 int ret = 0;
3392
3393
3394
3395
3396
3397
3398
3399 adev->gfx_timeout = msecs_to_jiffies(10000);
3400 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3401 if (amdgpu_sriov_vf(adev))
3402 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3403 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3404 else
3405 adev->compute_timeout = msecs_to_jiffies(60000);
3406
3407 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3408 while ((timeout_setting = strsep(&input, ",")) &&
3409 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3410 ret = kstrtol(timeout_setting, 0, &timeout);
3411 if (ret)
3412 return ret;
3413
3414 if (timeout == 0) {
3415 index++;
3416 continue;
3417 } else if (timeout < 0) {
3418 timeout = MAX_SCHEDULE_TIMEOUT;
3419 dev_warn(adev->dev, "lockup timeout disabled");
3420 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3421 } else {
3422 timeout = msecs_to_jiffies(timeout);
3423 }
3424
3425 switch (index++) {
3426 case 0:
3427 adev->gfx_timeout = timeout;
3428 break;
3429 case 1:
3430 adev->compute_timeout = timeout;
3431 break;
3432 case 2:
3433 adev->sdma_timeout = timeout;
3434 break;
3435 case 3:
3436 adev->video_timeout = timeout;
3437 break;
3438 default:
3439 break;
3440 }
3441 }
3442
3443
3444
3445
3446 if (index == 1) {
3447 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3448 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3449 adev->compute_timeout = adev->gfx_timeout;
3450 }
3451 }
3452
3453 return ret;
3454 }
3455
3456
3457
3458
3459
3460
3461
3462
3463 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3464 {
3465 struct iommu_domain *domain;
3466
3467 domain = iommu_get_domain_for_dev(adev->dev);
3468 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3469 adev->ram_is_direct_mapped = true;
3470 }
3471
3472 static const struct attribute *amdgpu_dev_attributes[] = {
3473 &dev_attr_product_name.attr,
3474 &dev_attr_product_number.attr,
3475 &dev_attr_serial_number.attr,
3476 &dev_attr_pcie_replay_count.attr,
3477 NULL
3478 };
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490 int amdgpu_device_init(struct amdgpu_device *adev,
3491 uint32_t flags)
3492 {
3493 struct drm_device *ddev = adev_to_drm(adev);
3494 struct pci_dev *pdev = adev->pdev;
3495 int r, i;
3496 bool px = false;
3497 u32 max_MBps;
3498
3499 adev->shutdown = false;
3500 adev->flags = flags;
3501
3502 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3503 adev->asic_type = amdgpu_force_asic_type;
3504 else
3505 adev->asic_type = flags & AMD_ASIC_MASK;
3506
3507 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3508 if (amdgpu_emu_mode == 1)
3509 adev->usec_timeout *= 10;
3510 adev->gmc.gart_size = 512 * 1024 * 1024;
3511 adev->accel_working = false;
3512 adev->num_rings = 0;
3513 adev->mman.buffer_funcs = NULL;
3514 adev->mman.buffer_funcs_ring = NULL;
3515 adev->vm_manager.vm_pte_funcs = NULL;
3516 adev->vm_manager.vm_pte_num_scheds = 0;
3517 adev->gmc.gmc_funcs = NULL;
3518 adev->harvest_ip_mask = 0x0;
3519 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3520 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3521
3522 adev->smc_rreg = &amdgpu_invalid_rreg;
3523 adev->smc_wreg = &amdgpu_invalid_wreg;
3524 adev->pcie_rreg = &amdgpu_invalid_rreg;
3525 adev->pcie_wreg = &amdgpu_invalid_wreg;
3526 adev->pciep_rreg = &amdgpu_invalid_rreg;
3527 adev->pciep_wreg = &amdgpu_invalid_wreg;
3528 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3529 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3530 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3531 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3532 adev->didt_rreg = &amdgpu_invalid_rreg;
3533 adev->didt_wreg = &amdgpu_invalid_wreg;
3534 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3535 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3536 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3537 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3538
3539 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3540 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3541 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3542
3543
3544
3545 mutex_init(&adev->firmware.mutex);
3546 mutex_init(&adev->pm.mutex);
3547 mutex_init(&adev->gfx.gpu_clock_mutex);
3548 mutex_init(&adev->srbm_mutex);
3549 mutex_init(&adev->gfx.pipe_reserve_mutex);
3550 mutex_init(&adev->gfx.gfx_off_mutex);
3551 mutex_init(&adev->grbm_idx_mutex);
3552 mutex_init(&adev->mn_lock);
3553 mutex_init(&adev->virt.vf_errors.lock);
3554 hash_init(adev->mn_hash);
3555 mutex_init(&adev->psp.mutex);
3556 mutex_init(&adev->notifier_lock);
3557 mutex_init(&adev->pm.stable_pstate_ctx_lock);
3558 mutex_init(&adev->benchmark_mutex);
3559
3560 amdgpu_device_init_apu_flags(adev);
3561
3562 r = amdgpu_device_check_arguments(adev);
3563 if (r)
3564 return r;
3565
3566 spin_lock_init(&adev->mmio_idx_lock);
3567 spin_lock_init(&adev->smc_idx_lock);
3568 spin_lock_init(&adev->pcie_idx_lock);
3569 spin_lock_init(&adev->uvd_ctx_idx_lock);
3570 spin_lock_init(&adev->didt_idx_lock);
3571 spin_lock_init(&adev->gc_cac_idx_lock);
3572 spin_lock_init(&adev->se_cac_idx_lock);
3573 spin_lock_init(&adev->audio_endpt_idx_lock);
3574 spin_lock_init(&adev->mm_stats.lock);
3575
3576 INIT_LIST_HEAD(&adev->shadow_list);
3577 mutex_init(&adev->shadow_list_lock);
3578
3579 INIT_LIST_HEAD(&adev->reset_list);
3580
3581 INIT_LIST_HEAD(&adev->ras_list);
3582
3583 INIT_DELAYED_WORK(&adev->delayed_init_work,
3584 amdgpu_device_delayed_init_work_handler);
3585 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3586 amdgpu_device_delay_enable_gfx_off);
3587
3588 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3589
3590 adev->gfx.gfx_off_req_count = 1;
3591 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3592
3593 atomic_set(&adev->throttling_logging_enabled, 1);
3594
3595
3596
3597
3598
3599
3600
3601 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3602 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3603
3604
3605
3606 if (adev->asic_type >= CHIP_BONAIRE) {
3607 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3608 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3609 } else {
3610 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3611 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3612 }
3613
3614 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3615 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3616
3617 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3618 if (adev->rmmio == NULL) {
3619 return -ENOMEM;
3620 }
3621 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3622 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3623
3624 amdgpu_device_get_pcie_info(adev);
3625
3626 if (amdgpu_mcbp)
3627 DRM_INFO("MCBP is enabled\n");
3628
3629
3630
3631
3632
3633
3634 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3635 if (!adev->reset_domain)
3636 return -ENOMEM;
3637
3638
3639 amdgpu_detect_virtualization(adev);
3640
3641 r = amdgpu_device_get_job_timeout_settings(adev);
3642 if (r) {
3643 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3644 return r;
3645 }
3646
3647
3648 r = amdgpu_device_ip_early_init(adev);
3649 if (r)
3650 return r;
3651
3652
3653 amdgpu_gmc_tmz_set(adev);
3654
3655 amdgpu_gmc_noretry_set(adev);
3656
3657 if (adev->gmc.xgmi.supported) {
3658 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3659 if (r)
3660 return r;
3661 }
3662
3663
3664 if (amdgpu_sriov_vf(adev))
3665 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3666 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3667 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3668 else
3669 adev->have_atomics_support =
3670 !pci_enable_atomic_ops_to_root(adev->pdev,
3671 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3672 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3673 if (!adev->have_atomics_support)
3674 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3675
3676
3677 amdgpu_device_doorbell_init(adev);
3678
3679 if (amdgpu_emu_mode == 1) {
3680
3681 emu_soc_asic_init(adev);
3682 goto fence_driver_init;
3683 }
3684
3685 amdgpu_reset_init(adev);
3686
3687
3688 amdgpu_device_detect_sriov_bios(adev);
3689
3690
3691
3692
3693 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3694 if (adev->gmc.xgmi.num_physical_nodes) {
3695 dev_info(adev->dev, "Pending hive reset.\n");
3696 adev->gmc.xgmi.pending_reset = true;
3697
3698 for (i = 0; i < adev->num_ip_blocks; i++) {
3699 if (!adev->ip_blocks[i].status.valid)
3700 continue;
3701 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3702 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3703 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3704 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3705 DRM_DEBUG("IP %s disabled for hw_init.\n",
3706 adev->ip_blocks[i].version->funcs->name);
3707 adev->ip_blocks[i].status.hw = true;
3708 }
3709 }
3710 } else {
3711 r = amdgpu_asic_reset(adev);
3712 if (r) {
3713 dev_err(adev->dev, "asic reset on init failed\n");
3714 goto failed;
3715 }
3716 }
3717 }
3718
3719 pci_enable_pcie_error_reporting(adev->pdev);
3720
3721
3722 if (amdgpu_device_need_post(adev)) {
3723 if (!adev->bios) {
3724 dev_err(adev->dev, "no vBIOS found\n");
3725 r = -EINVAL;
3726 goto failed;
3727 }
3728 DRM_INFO("GPU posting now...\n");
3729 r = amdgpu_device_asic_init(adev);
3730 if (r) {
3731 dev_err(adev->dev, "gpu post error!\n");
3732 goto failed;
3733 }
3734 }
3735
3736 if (adev->is_atom_fw) {
3737
3738 r = amdgpu_atomfirmware_get_clock_info(adev);
3739 if (r) {
3740 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3741 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3742 goto failed;
3743 }
3744 } else {
3745
3746 r = amdgpu_atombios_get_clock_info(adev);
3747 if (r) {
3748 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3749 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3750 goto failed;
3751 }
3752
3753 if (!amdgpu_device_has_dc_support(adev))
3754 amdgpu_atombios_i2c_init(adev);
3755 }
3756
3757 fence_driver_init:
3758
3759 r = amdgpu_fence_driver_sw_init(adev);
3760 if (r) {
3761 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3762 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3763 goto failed;
3764 }
3765
3766
3767 drm_mode_config_init(adev_to_drm(adev));
3768
3769 r = amdgpu_device_ip_init(adev);
3770 if (r) {
3771
3772 if (amdgpu_sriov_vf(adev) &&
3773 !amdgpu_sriov_runtime(adev) &&
3774 amdgpu_virt_mmio_blocked(adev) &&
3775 !amdgpu_virt_wait_reset(adev)) {
3776 dev_err(adev->dev, "VF exclusive mode timeout\n");
3777
3778 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3779 adev->virt.ops = NULL;
3780 r = -EAGAIN;
3781 goto release_ras_con;
3782 }
3783 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3784 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3785 goto release_ras_con;
3786 }
3787
3788 amdgpu_fence_driver_hw_init(adev);
3789
3790 dev_info(adev->dev,
3791 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3792 adev->gfx.config.max_shader_engines,
3793 adev->gfx.config.max_sh_per_se,
3794 adev->gfx.config.max_cu_per_sh,
3795 adev->gfx.cu_info.number);
3796
3797 adev->accel_working = true;
3798
3799 amdgpu_vm_check_compute_bug(adev);
3800
3801
3802 if (amdgpu_moverate >= 0)
3803 max_MBps = amdgpu_moverate;
3804 else
3805 max_MBps = 8;
3806
3807 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3808
3809 r = amdgpu_pm_sysfs_init(adev);
3810 if (r) {
3811 adev->pm_sysfs_en = false;
3812 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3813 } else
3814 adev->pm_sysfs_en = true;
3815
3816 r = amdgpu_ucode_sysfs_init(adev);
3817 if (r) {
3818 adev->ucode_sysfs_en = false;
3819 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3820 } else
3821 adev->ucode_sysfs_en = true;
3822
3823 r = amdgpu_psp_sysfs_init(adev);
3824 if (r) {
3825 adev->psp_sysfs_en = false;
3826 if (!amdgpu_sriov_vf(adev))
3827 DRM_ERROR("Creating psp sysfs failed\n");
3828 } else
3829 adev->psp_sysfs_en = true;
3830
3831
3832
3833
3834
3835
3836 amdgpu_register_gpu_instance(adev);
3837
3838
3839
3840
3841 if (!adev->gmc.xgmi.pending_reset) {
3842 r = amdgpu_device_ip_late_init(adev);
3843 if (r) {
3844 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3845 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3846 goto release_ras_con;
3847 }
3848
3849 amdgpu_ras_resume(adev);
3850 queue_delayed_work(system_wq, &adev->delayed_init_work,
3851 msecs_to_jiffies(AMDGPU_RESUME_MS));
3852 }
3853
3854 if (amdgpu_sriov_vf(adev))
3855 flush_delayed_work(&adev->delayed_init_work);
3856
3857 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3858 if (r)
3859 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3860
3861 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3862 r = amdgpu_pmu_init(adev);
3863 if (r)
3864 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3865
3866
3867 if (amdgpu_device_cache_pci_state(adev->pdev))
3868 pci_restore_state(pdev);
3869
3870
3871
3872
3873 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3874 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3875
3876 if (amdgpu_device_supports_px(ddev)) {
3877 px = true;
3878 vga_switcheroo_register_client(adev->pdev,
3879 &amdgpu_switcheroo_ops, px);
3880 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3881 }
3882
3883 if (adev->gmc.xgmi.pending_reset)
3884 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3885 msecs_to_jiffies(AMDGPU_RESUME_MS));
3886
3887 amdgpu_device_check_iommu_direct_map(adev);
3888
3889 return 0;
3890
3891 release_ras_con:
3892 amdgpu_release_ras_context(adev);
3893
3894 failed:
3895 amdgpu_vf_error_trans_all(adev);
3896
3897 return r;
3898 }
3899
3900 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3901 {
3902
3903
3904 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3905
3906
3907 amdgpu_device_doorbell_fini(adev);
3908
3909 iounmap(adev->rmmio);
3910 adev->rmmio = NULL;
3911 if (adev->mman.aper_base_kaddr)
3912 iounmap(adev->mman.aper_base_kaddr);
3913 adev->mman.aper_base_kaddr = NULL;
3914
3915
3916 if (!adev->gmc.xgmi.connected_to_cpu) {
3917 arch_phys_wc_del(adev->gmc.vram_mtrr);
3918 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3919 }
3920 }
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3931 {
3932 dev_info(adev->dev, "amdgpu: finishing device.\n");
3933 flush_delayed_work(&adev->delayed_init_work);
3934 adev->shutdown = true;
3935
3936
3937
3938
3939 if (amdgpu_sriov_vf(adev)) {
3940 amdgpu_virt_request_full_gpu(adev, false);
3941 amdgpu_virt_fini_data_exchange(adev);
3942 }
3943
3944
3945 amdgpu_irq_disable_all(adev);
3946 if (adev->mode_info.mode_config_initialized){
3947 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
3948 drm_helper_force_disable_all(adev_to_drm(adev));
3949 else
3950 drm_atomic_helper_shutdown(adev_to_drm(adev));
3951 }
3952 amdgpu_fence_driver_hw_fini(adev);
3953
3954 if (adev->mman.initialized) {
3955 flush_delayed_work(&adev->mman.bdev.wq);
3956 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3957 }
3958
3959 if (adev->pm_sysfs_en)
3960 amdgpu_pm_sysfs_fini(adev);
3961 if (adev->ucode_sysfs_en)
3962 amdgpu_ucode_sysfs_fini(adev);
3963 if (adev->psp_sysfs_en)
3964 amdgpu_psp_sysfs_fini(adev);
3965 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3966
3967
3968 amdgpu_ras_pre_fini(adev);
3969
3970 amdgpu_device_ip_fini_early(adev);
3971
3972 amdgpu_irq_fini_hw(adev);
3973
3974 if (adev->mman.initialized)
3975 ttm_device_clear_dma_mappings(&adev->mman.bdev);
3976
3977 amdgpu_gart_dummy_page_fini(adev);
3978
3979 if (drm_dev_is_unplugged(adev_to_drm(adev)))
3980 amdgpu_device_unmap_mmio(adev);
3981
3982 }
3983
3984 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
3985 {
3986 int idx;
3987
3988 amdgpu_fence_driver_sw_fini(adev);
3989 amdgpu_device_ip_fini(adev);
3990 release_firmware(adev->firmware.gpu_info_fw);
3991 adev->firmware.gpu_info_fw = NULL;
3992 adev->accel_working = false;
3993
3994 amdgpu_reset_fini(adev);
3995
3996
3997 if (!amdgpu_device_has_dc_support(adev))
3998 amdgpu_i2c_fini(adev);
3999
4000 if (amdgpu_emu_mode != 1)
4001 amdgpu_atombios_fini(adev);
4002
4003 kfree(adev->bios);
4004 adev->bios = NULL;
4005 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
4006 vga_switcheroo_unregister_client(adev->pdev);
4007 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4008 }
4009 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4010 vga_client_unregister(adev->pdev);
4011
4012 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4013
4014 iounmap(adev->rmmio);
4015 adev->rmmio = NULL;
4016 amdgpu_device_doorbell_fini(adev);
4017 drm_dev_exit(idx);
4018 }
4019
4020 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4021 amdgpu_pmu_fini(adev);
4022 if (adev->mman.discovery_bin)
4023 amdgpu_discovery_fini(adev);
4024
4025 amdgpu_reset_put_reset_domain(adev->reset_domain);
4026 adev->reset_domain = NULL;
4027
4028 kfree(adev->pci_state);
4029
4030 }
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041 static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
4042 {
4043
4044 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4045 return;
4046
4047 if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
4048 DRM_WARN("evicting device resources failed\n");
4049
4050 }
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4066 {
4067 struct amdgpu_device *adev = drm_to_adev(dev);
4068 int r = 0;
4069
4070 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4071 return 0;
4072
4073 adev->in_suspend = true;
4074
4075 if (amdgpu_sriov_vf(adev)) {
4076 amdgpu_virt_fini_data_exchange(adev);
4077 r = amdgpu_virt_request_full_gpu(adev, false);
4078 if (r)
4079 return r;
4080 }
4081
4082 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4083 DRM_WARN("smart shift update failed\n");
4084
4085 drm_kms_helper_poll_disable(dev);
4086
4087 if (fbcon)
4088 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4089
4090 cancel_delayed_work_sync(&adev->delayed_init_work);
4091
4092 amdgpu_ras_suspend(adev);
4093
4094 amdgpu_device_ip_suspend_phase1(adev);
4095
4096 if (!adev->in_s0ix)
4097 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4098
4099 amdgpu_device_evict_resources(adev);
4100
4101 amdgpu_fence_driver_hw_fini(adev);
4102
4103 amdgpu_device_ip_suspend_phase2(adev);
4104
4105 if (amdgpu_sriov_vf(adev))
4106 amdgpu_virt_release_full_gpu(adev, false);
4107
4108 return 0;
4109 }
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4122 {
4123 struct amdgpu_device *adev = drm_to_adev(dev);
4124 int r = 0;
4125
4126 if (amdgpu_sriov_vf(adev)) {
4127 r = amdgpu_virt_request_full_gpu(adev, true);
4128 if (r)
4129 return r;
4130 }
4131
4132 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4133 return 0;
4134
4135 if (adev->in_s0ix)
4136 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4137
4138
4139 if (amdgpu_device_need_post(adev)) {
4140 r = amdgpu_device_asic_init(adev);
4141 if (r)
4142 dev_err(adev->dev, "amdgpu asic init failed\n");
4143 }
4144
4145 r = amdgpu_device_ip_resume(adev);
4146
4147
4148 if (amdgpu_sriov_vf(adev)) {
4149 amdgpu_virt_init_data_exchange(adev);
4150 amdgpu_virt_release_full_gpu(adev, true);
4151 }
4152
4153 if (r) {
4154 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4155 return r;
4156 }
4157 amdgpu_fence_driver_hw_init(adev);
4158
4159 r = amdgpu_device_ip_late_init(adev);
4160 if (r)
4161 return r;
4162
4163 queue_delayed_work(system_wq, &adev->delayed_init_work,
4164 msecs_to_jiffies(AMDGPU_RESUME_MS));
4165
4166 if (!adev->in_s0ix) {
4167 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4168 if (r)
4169 return r;
4170 }
4171
4172
4173 flush_delayed_work(&adev->delayed_init_work);
4174
4175 if (fbcon)
4176 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4177
4178 drm_kms_helper_poll_enable(dev);
4179
4180 amdgpu_ras_resume(adev);
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191 #ifdef CONFIG_PM
4192 dev->dev->power.disable_depth++;
4193 #endif
4194 if (!amdgpu_device_has_dc_support(adev))
4195 drm_helper_hpd_irq_event(dev);
4196 else
4197 drm_kms_helper_hotplug_event(dev);
4198 #ifdef CONFIG_PM
4199 dev->dev->power.disable_depth--;
4200 #endif
4201 adev->in_suspend = false;
4202
4203 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4204 DRM_WARN("smart shift update failed\n");
4205
4206 return 0;
4207 }
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4220 {
4221 int i;
4222 bool asic_hang = false;
4223
4224 if (amdgpu_sriov_vf(adev))
4225 return true;
4226
4227 if (amdgpu_asic_need_full_reset(adev))
4228 return true;
4229
4230 for (i = 0; i < adev->num_ip_blocks; i++) {
4231 if (!adev->ip_blocks[i].status.valid)
4232 continue;
4233 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4234 adev->ip_blocks[i].status.hang =
4235 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4236 if (adev->ip_blocks[i].status.hang) {
4237 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4238 asic_hang = true;
4239 }
4240 }
4241 return asic_hang;
4242 }
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4256 {
4257 int i, r = 0;
4258
4259 for (i = 0; i < adev->num_ip_blocks; i++) {
4260 if (!adev->ip_blocks[i].status.valid)
4261 continue;
4262 if (adev->ip_blocks[i].status.hang &&
4263 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4264 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4265 if (r)
4266 return r;
4267 }
4268 }
4269
4270 return 0;
4271 }
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4283 {
4284 int i;
4285
4286 if (amdgpu_asic_need_full_reset(adev))
4287 return true;
4288
4289 for (i = 0; i < adev->num_ip_blocks; i++) {
4290 if (!adev->ip_blocks[i].status.valid)
4291 continue;
4292 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4293 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4294 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4295 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4296 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4297 if (adev->ip_blocks[i].status.hang) {
4298 dev_info(adev->dev, "Some block need full reset!\n");
4299 return true;
4300 }
4301 }
4302 }
4303 return false;
4304 }
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4318 {
4319 int i, r = 0;
4320
4321 for (i = 0; i < adev->num_ip_blocks; i++) {
4322 if (!adev->ip_blocks[i].status.valid)
4323 continue;
4324 if (adev->ip_blocks[i].status.hang &&
4325 adev->ip_blocks[i].version->funcs->soft_reset) {
4326 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4327 if (r)
4328 return r;
4329 }
4330 }
4331
4332 return 0;
4333 }
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4347 {
4348 int i, r = 0;
4349
4350 for (i = 0; i < adev->num_ip_blocks; i++) {
4351 if (!adev->ip_blocks[i].status.valid)
4352 continue;
4353 if (adev->ip_blocks[i].status.hang &&
4354 adev->ip_blocks[i].version->funcs->post_soft_reset)
4355 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4356 if (r)
4357 return r;
4358 }
4359
4360 return 0;
4361 }
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4376 {
4377 struct dma_fence *fence = NULL, *next = NULL;
4378 struct amdgpu_bo *shadow;
4379 struct amdgpu_bo_vm *vmbo;
4380 long r = 1, tmo;
4381
4382 if (amdgpu_sriov_runtime(adev))
4383 tmo = msecs_to_jiffies(8000);
4384 else
4385 tmo = msecs_to_jiffies(100);
4386
4387 dev_info(adev->dev, "recover vram bo from shadow start\n");
4388 mutex_lock(&adev->shadow_list_lock);
4389 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4390 shadow = &vmbo->bo;
4391
4392 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4393 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4394 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4395 continue;
4396
4397 r = amdgpu_bo_restore_shadow(shadow, &next);
4398 if (r)
4399 break;
4400
4401 if (fence) {
4402 tmo = dma_fence_wait_timeout(fence, false, tmo);
4403 dma_fence_put(fence);
4404 fence = next;
4405 if (tmo == 0) {
4406 r = -ETIMEDOUT;
4407 break;
4408 } else if (tmo < 0) {
4409 r = tmo;
4410 break;
4411 }
4412 } else {
4413 fence = next;
4414 }
4415 }
4416 mutex_unlock(&adev->shadow_list_lock);
4417
4418 if (fence)
4419 tmo = dma_fence_wait_timeout(fence, false, tmo);
4420 dma_fence_put(fence);
4421
4422 if (r < 0 || tmo <= 0) {
4423 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4424 return -EIO;
4425 }
4426
4427 dev_info(adev->dev, "recover vram bo from shadow done\n");
4428 return 0;
4429 }
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4442 bool from_hypervisor)
4443 {
4444 int r;
4445 struct amdgpu_hive_info *hive = NULL;
4446 int retry_limit = 0;
4447
4448 retry:
4449 amdgpu_amdkfd_pre_reset(adev);
4450
4451 if (from_hypervisor)
4452 r = amdgpu_virt_request_full_gpu(adev, true);
4453 else
4454 r = amdgpu_virt_reset_gpu(adev);
4455 if (r)
4456 return r;
4457
4458
4459 r = amdgpu_device_ip_reinit_early_sriov(adev);
4460 if (r)
4461 goto error;
4462
4463 amdgpu_virt_init_data_exchange(adev);
4464
4465 r = amdgpu_device_fw_loading(adev);
4466 if (r)
4467 return r;
4468
4469
4470 r = amdgpu_device_ip_reinit_late_sriov(adev);
4471 if (r)
4472 goto error;
4473
4474 hive = amdgpu_get_xgmi_hive(adev);
4475
4476 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4477 r = amdgpu_xgmi_update_topology(hive, adev);
4478
4479 if (hive)
4480 amdgpu_put_xgmi_hive(hive);
4481
4482 if (!r) {
4483 amdgpu_irq_gpu_reset_resume_helper(adev);
4484 r = amdgpu_ib_ring_tests(adev);
4485
4486 amdgpu_amdkfd_post_reset(adev);
4487 }
4488
4489 error:
4490 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4491 amdgpu_inc_vram_lost(adev);
4492 r = amdgpu_device_recover_vram(adev);
4493 }
4494 amdgpu_virt_release_full_gpu(adev, true);
4495
4496 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4497 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4498 retry_limit++;
4499 goto retry;
4500 } else
4501 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4502 }
4503
4504 return r;
4505 }
4506
4507
4508
4509
4510
4511
4512
4513
4514 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4515 {
4516 int i;
4517 struct drm_sched_job *job;
4518
4519 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4520 struct amdgpu_ring *ring = adev->rings[i];
4521
4522 if (!ring || !ring->sched.thread)
4523 continue;
4524
4525 spin_lock(&ring->sched.job_list_lock);
4526 job = list_first_entry_or_null(&ring->sched.pending_list,
4527 struct drm_sched_job, list);
4528 spin_unlock(&ring->sched.job_list_lock);
4529 if (job)
4530 return true;
4531 }
4532 return false;
4533 }
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4544 {
4545 if (!amdgpu_device_ip_check_soft_reset(adev)) {
4546 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
4547 return false;
4548 }
4549
4550 if (amdgpu_gpu_recovery == 0)
4551 goto disabled;
4552
4553 if (amdgpu_sriov_vf(adev))
4554 return true;
4555
4556 if (amdgpu_gpu_recovery == -1) {
4557 switch (adev->asic_type) {
4558 #ifdef CONFIG_DRM_AMDGPU_SI
4559 case CHIP_VERDE:
4560 case CHIP_TAHITI:
4561 case CHIP_PITCAIRN:
4562 case CHIP_OLAND:
4563 case CHIP_HAINAN:
4564 #endif
4565 #ifdef CONFIG_DRM_AMDGPU_CIK
4566 case CHIP_KAVERI:
4567 case CHIP_KABINI:
4568 case CHIP_MULLINS:
4569 #endif
4570 case CHIP_CARRIZO:
4571 case CHIP_STONEY:
4572 case CHIP_CYAN_SKILLFISH:
4573 goto disabled;
4574 default:
4575 break;
4576 }
4577 }
4578
4579 return true;
4580
4581 disabled:
4582 dev_info(adev->dev, "GPU recovery disabled.\n");
4583 return false;
4584 }
4585
4586 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4587 {
4588 u32 i;
4589 int ret = 0;
4590
4591 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4592
4593 dev_info(adev->dev, "GPU mode1 reset\n");
4594
4595
4596 pci_clear_master(adev->pdev);
4597
4598 amdgpu_device_cache_pci_state(adev->pdev);
4599
4600 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4601 dev_info(adev->dev, "GPU smu mode1 reset\n");
4602 ret = amdgpu_dpm_mode1_reset(adev);
4603 } else {
4604 dev_info(adev->dev, "GPU psp mode1 reset\n");
4605 ret = psp_gpu_reset(adev);
4606 }
4607
4608 if (ret)
4609 dev_err(adev->dev, "GPU mode1 reset failed\n");
4610
4611 amdgpu_device_load_pci_state(adev->pdev);
4612
4613
4614 for (i = 0; i < adev->usec_timeout; i++) {
4615 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4616
4617 if (memsize != 0xffffffff)
4618 break;
4619 udelay(1);
4620 }
4621
4622 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4623 return ret;
4624 }
4625
4626 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4627 struct amdgpu_reset_context *reset_context)
4628 {
4629 int i, r = 0;
4630 struct amdgpu_job *job = NULL;
4631 bool need_full_reset =
4632 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4633
4634 if (reset_context->reset_req_dev == adev)
4635 job = reset_context->job;
4636
4637 if (amdgpu_sriov_vf(adev)) {
4638
4639 amdgpu_virt_fini_data_exchange(adev);
4640 }
4641
4642 amdgpu_fence_driver_isr_toggle(adev, true);
4643
4644
4645 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4646 struct amdgpu_ring *ring = adev->rings[i];
4647
4648 if (!ring || !ring->sched.thread)
4649 continue;
4650
4651
4652
4653 amdgpu_fence_driver_clear_job_fences(ring);
4654
4655
4656 amdgpu_fence_driver_force_completion(ring);
4657 }
4658
4659 amdgpu_fence_driver_isr_toggle(adev, false);
4660
4661 if (job && job->vm)
4662 drm_sched_increase_karma(&job->base);
4663
4664 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4665
4666 if (r == -ENOSYS)
4667 r = 0;
4668 else
4669 return r;
4670
4671
4672 if (!amdgpu_sriov_vf(adev)) {
4673
4674 if (!need_full_reset)
4675 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4676
4677 if (!need_full_reset) {
4678 amdgpu_device_ip_pre_soft_reset(adev);
4679 r = amdgpu_device_ip_soft_reset(adev);
4680 amdgpu_device_ip_post_soft_reset(adev);
4681 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4682 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4683 need_full_reset = true;
4684 }
4685 }
4686
4687 if (need_full_reset)
4688 r = amdgpu_device_ip_suspend(adev);
4689 if (need_full_reset)
4690 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4691 else
4692 clear_bit(AMDGPU_NEED_FULL_RESET,
4693 &reset_context->flags);
4694 }
4695
4696 return r;
4697 }
4698
4699 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4700 {
4701 int i;
4702
4703 lockdep_assert_held(&adev->reset_domain->sem);
4704
4705 for (i = 0; i < adev->num_regs; i++) {
4706 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4707 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4708 adev->reset_dump_reg_value[i]);
4709 }
4710
4711 return 0;
4712 }
4713
4714 #ifdef CONFIG_DEV_COREDUMP
4715 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4716 size_t count, void *data, size_t datalen)
4717 {
4718 struct drm_printer p;
4719 struct amdgpu_device *adev = data;
4720 struct drm_print_iterator iter;
4721 int i;
4722
4723 iter.data = buffer;
4724 iter.offset = 0;
4725 iter.start = offset;
4726 iter.remain = count;
4727
4728 p = drm_coredump_printer(&iter);
4729
4730 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4731 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4732 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4733 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4734 if (adev->reset_task_info.pid)
4735 drm_printf(&p, "process_name: %s PID: %d\n",
4736 adev->reset_task_info.process_name,
4737 adev->reset_task_info.pid);
4738
4739 if (adev->reset_vram_lost)
4740 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4741 if (adev->num_regs) {
4742 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4743
4744 for (i = 0; i < adev->num_regs; i++)
4745 drm_printf(&p, "0x%08x: 0x%08x\n",
4746 adev->reset_dump_reg_list[i],
4747 adev->reset_dump_reg_value[i]);
4748 }
4749
4750 return count - iter.remain;
4751 }
4752
4753 static void amdgpu_devcoredump_free(void *data)
4754 {
4755 }
4756
4757 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4758 {
4759 struct drm_device *dev = adev_to_drm(adev);
4760
4761 ktime_get_ts64(&adev->reset_time);
4762 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4763 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4764 }
4765 #endif
4766
4767 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4768 struct amdgpu_reset_context *reset_context)
4769 {
4770 struct amdgpu_device *tmp_adev = NULL;
4771 bool need_full_reset, skip_hw_reset, vram_lost = false;
4772 int r = 0;
4773
4774
4775 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4776 reset_list);
4777 amdgpu_reset_reg_dumps(tmp_adev);
4778
4779 reset_context->reset_device_list = device_list_handle;
4780 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4781
4782 if (r == -ENOSYS)
4783 r = 0;
4784 else
4785 return r;
4786
4787
4788 need_full_reset =
4789 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4790 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4791
4792
4793
4794
4795
4796 if (!skip_hw_reset && need_full_reset) {
4797 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4798
4799 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4800 tmp_adev->gmc.xgmi.pending_reset = false;
4801 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4802 r = -EALREADY;
4803 } else
4804 r = amdgpu_asic_reset(tmp_adev);
4805
4806 if (r) {
4807 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4808 r, adev_to_drm(tmp_adev)->unique);
4809 break;
4810 }
4811 }
4812
4813
4814 if (!r) {
4815 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4816 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4817 flush_work(&tmp_adev->xgmi_reset_work);
4818 r = tmp_adev->asic_reset_res;
4819 if (r)
4820 break;
4821 }
4822 }
4823 }
4824 }
4825
4826 if (!r && amdgpu_ras_intr_triggered()) {
4827 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4828 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4829 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4830 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4831 }
4832
4833 amdgpu_ras_intr_cleared();
4834 }
4835
4836 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4837 if (need_full_reset) {
4838
4839 r = amdgpu_device_asic_init(tmp_adev);
4840 if (r) {
4841 dev_warn(tmp_adev->dev, "asic atom init failed!");
4842 } else {
4843 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4844 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4845 if (r)
4846 goto out;
4847
4848 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4849 if (r)
4850 goto out;
4851
4852 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4853 #ifdef CONFIG_DEV_COREDUMP
4854 tmp_adev->reset_vram_lost = vram_lost;
4855 memset(&tmp_adev->reset_task_info, 0,
4856 sizeof(tmp_adev->reset_task_info));
4857 if (reset_context->job && reset_context->job->vm)
4858 tmp_adev->reset_task_info =
4859 reset_context->job->vm->task_info;
4860 amdgpu_reset_capture_coredumpm(tmp_adev);
4861 #endif
4862 if (vram_lost) {
4863 DRM_INFO("VRAM is lost due to GPU reset!\n");
4864 amdgpu_inc_vram_lost(tmp_adev);
4865 }
4866
4867 r = amdgpu_device_fw_loading(tmp_adev);
4868 if (r)
4869 return r;
4870
4871 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4872 if (r)
4873 goto out;
4874
4875 if (vram_lost)
4876 amdgpu_device_fill_reset_magic(tmp_adev);
4877
4878
4879
4880
4881
4882 amdgpu_register_gpu_instance(tmp_adev);
4883
4884 if (!reset_context->hive &&
4885 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4886 amdgpu_xgmi_add_device(tmp_adev);
4887
4888 r = amdgpu_device_ip_late_init(tmp_adev);
4889 if (r)
4890 goto out;
4891
4892 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4905
4906 amdgpu_ras_resume(tmp_adev);
4907 } else {
4908 r = -EINVAL;
4909 goto out;
4910 }
4911
4912
4913 if (reset_context->hive &&
4914 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4915 r = amdgpu_xgmi_update_topology(
4916 reset_context->hive, tmp_adev);
4917 }
4918 }
4919
4920 out:
4921 if (!r) {
4922 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4923 r = amdgpu_ib_ring_tests(tmp_adev);
4924 if (r) {
4925 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4926 need_full_reset = true;
4927 r = -EAGAIN;
4928 goto end;
4929 }
4930 }
4931
4932 if (!r)
4933 r = amdgpu_device_recover_vram(tmp_adev);
4934 else
4935 tmp_adev->asic_reset_res = r;
4936 }
4937
4938 end:
4939 if (need_full_reset)
4940 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4941 else
4942 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4943 return r;
4944 }
4945
4946 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
4947 {
4948
4949 switch (amdgpu_asic_reset_method(adev)) {
4950 case AMD_RESET_METHOD_MODE1:
4951 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4952 break;
4953 case AMD_RESET_METHOD_MODE2:
4954 adev->mp1_state = PP_MP1_STATE_RESET;
4955 break;
4956 default:
4957 adev->mp1_state = PP_MP1_STATE_NONE;
4958 break;
4959 }
4960 }
4961
4962 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
4963 {
4964 amdgpu_vf_error_trans_all(adev);
4965 adev->mp1_state = PP_MP1_STATE_NONE;
4966 }
4967
4968 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4969 {
4970 struct pci_dev *p = NULL;
4971
4972 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4973 adev->pdev->bus->number, 1);
4974 if (p) {
4975 pm_runtime_enable(&(p->dev));
4976 pm_runtime_resume(&(p->dev));
4977 }
4978 }
4979
4980 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4981 {
4982 enum amd_reset_method reset_method;
4983 struct pci_dev *p = NULL;
4984 u64 expires;
4985
4986
4987
4988
4989
4990 reset_method = amdgpu_asic_reset_method(adev);
4991 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4992 (reset_method != AMD_RESET_METHOD_MODE1))
4993 return -EINVAL;
4994
4995 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4996 adev->pdev->bus->number, 1);
4997 if (!p)
4998 return -ENODEV;
4999
5000 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5001 if (!expires)
5002
5003
5004
5005
5006
5007
5008 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5009
5010 while (!pm_runtime_status_suspended(&(p->dev))) {
5011 if (!pm_runtime_suspend(&(p->dev)))
5012 break;
5013
5014 if (expires < ktime_get_mono_fast_ns()) {
5015 dev_warn(adev->dev, "failed to suspend display audio\n");
5016
5017 return -ETIMEDOUT;
5018 }
5019 }
5020
5021 pm_runtime_disable(&(p->dev));
5022
5023 return 0;
5024 }
5025
5026 static void amdgpu_device_recheck_guilty_jobs(
5027 struct amdgpu_device *adev, struct list_head *device_list_handle,
5028 struct amdgpu_reset_context *reset_context)
5029 {
5030 int i, r = 0;
5031
5032 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5033 struct amdgpu_ring *ring = adev->rings[i];
5034 int ret = 0;
5035 struct drm_sched_job *s_job;
5036
5037 if (!ring || !ring->sched.thread)
5038 continue;
5039
5040 s_job = list_first_entry_or_null(&ring->sched.pending_list,
5041 struct drm_sched_job, list);
5042 if (s_job == NULL)
5043 continue;
5044
5045
5046 drm_sched_reset_karma(s_job);
5047 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
5048
5049 if (!s_job->s_fence->parent) {
5050 DRM_WARN("Failed to get a HW fence for job!");
5051 continue;
5052 }
5053
5054 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
5055 if (ret == 0) {
5056 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
5057 ring->sched.name, s_job->id);
5058
5059
5060 amdgpu_fence_driver_isr_toggle(adev, true);
5061
5062
5063 amdgpu_fence_driver_clear_job_fences(ring);
5064
5065 amdgpu_fence_driver_isr_toggle(adev, false);
5066
5067
5068
5069
5070 dma_fence_put(s_job->s_fence->parent);
5071 s_job->s_fence->parent = NULL;
5072
5073
5074 drm_sched_increase_karma(s_job);
5075 retry:
5076
5077 if (amdgpu_sriov_vf(adev)) {
5078 amdgpu_virt_fini_data_exchange(adev);
5079 r = amdgpu_device_reset_sriov(adev, false);
5080 if (r)
5081 adev->asic_reset_res = r;
5082 } else {
5083 clear_bit(AMDGPU_SKIP_HW_RESET,
5084 &reset_context->flags);
5085 r = amdgpu_do_asic_reset(device_list_handle,
5086 reset_context);
5087 if (r && r == -EAGAIN)
5088 goto retry;
5089 }
5090
5091
5092
5093
5094
5095 atomic_inc(&adev->gpu_reset_counter);
5096 continue;
5097 }
5098
5099
5100 atomic_dec(ring->sched.score);
5101 dma_fence_get(&s_job->s_fence->finished);
5102 dma_fence_signal(&s_job->s_fence->finished);
5103 dma_fence_put(&s_job->s_fence->finished);
5104
5105
5106 spin_lock(&ring->sched.job_list_lock);
5107 list_del_init(&s_job->list);
5108 spin_unlock(&ring->sched.job_list_lock);
5109 ring->sched.ops->free_job(s_job);
5110 }
5111 }
5112
5113 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5114 {
5115 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5116
5117 #if defined(CONFIG_DEBUG_FS)
5118 if (!amdgpu_sriov_vf(adev))
5119 cancel_work(&adev->reset_work);
5120 #endif
5121
5122 if (adev->kfd.dev)
5123 cancel_work(&adev->kfd.reset_work);
5124
5125 if (amdgpu_sriov_vf(adev))
5126 cancel_work(&adev->virt.flr_work);
5127
5128 if (con && adev->ras_enabled)
5129 cancel_work(&con->recovery_work);
5130
5131 }
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5146 struct amdgpu_job *job,
5147 struct amdgpu_reset_context *reset_context)
5148 {
5149 struct list_head device_list, *device_list_handle = NULL;
5150 bool job_signaled = false;
5151 struct amdgpu_hive_info *hive = NULL;
5152 struct amdgpu_device *tmp_adev = NULL;
5153 int i, r = 0;
5154 bool need_emergency_restart = false;
5155 bool audio_suspended = false;
5156 int tmp_vram_lost_counter;
5157
5158
5159
5160
5161 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5162
5163
5164
5165
5166
5167 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5168 DRM_WARN("Emergency reboot.");
5169
5170 ksys_sync_helper();
5171 emergency_restart();
5172 }
5173
5174 dev_info(adev->dev, "GPU %s begin!\n",
5175 need_emergency_restart ? "jobs stop":"reset");
5176
5177 if (!amdgpu_sriov_vf(adev))
5178 hive = amdgpu_get_xgmi_hive(adev);
5179 if (hive)
5180 mutex_lock(&hive->hive_lock);
5181
5182 reset_context->job = job;
5183 reset_context->hive = hive;
5184
5185
5186
5187
5188
5189 INIT_LIST_HEAD(&device_list);
5190 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5191 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
5192 list_add_tail(&tmp_adev->reset_list, &device_list);
5193 if (!list_is_first(&adev->reset_list, &device_list))
5194 list_rotate_to_front(&adev->reset_list, &device_list);
5195 device_list_handle = &device_list;
5196 } else {
5197 list_add_tail(&adev->reset_list, &device_list);
5198 device_list_handle = &device_list;
5199 }
5200
5201
5202 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5203 reset_list);
5204 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5205
5206
5207 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5208
5209 amdgpu_device_set_mp1_state(tmp_adev);
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5222 audio_suspended = true;
5223
5224 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5225
5226 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5227
5228 if (!amdgpu_sriov_vf(tmp_adev))
5229 amdgpu_amdkfd_pre_reset(tmp_adev);
5230
5231
5232
5233
5234
5235 amdgpu_unregister_gpu_instance(tmp_adev);
5236
5237 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5238
5239
5240 if (!need_emergency_restart &&
5241 amdgpu_device_ip_need_full_reset(tmp_adev))
5242 amdgpu_ras_suspend(tmp_adev);
5243
5244 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5245 struct amdgpu_ring *ring = tmp_adev->rings[i];
5246
5247 if (!ring || !ring->sched.thread)
5248 continue;
5249
5250 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5251
5252 if (need_emergency_restart)
5253 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5254 }
5255 atomic_inc(&tmp_adev->gpu_reset_counter);
5256 }
5257
5258 if (need_emergency_restart)
5259 goto skip_sched_resume;
5260
5261
5262
5263
5264
5265
5266
5267 if (job && dma_fence_is_signaled(&job->hw_fence)) {
5268 job_signaled = true;
5269 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5270 goto skip_hw_reset;
5271 }
5272
5273 retry:
5274 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5275 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5276
5277 if (r) {
5278 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5279 r, adev_to_drm(tmp_adev)->unique);
5280 tmp_adev->asic_reset_res = r;
5281 }
5282
5283
5284
5285
5286
5287 amdgpu_device_stop_pending_resets(tmp_adev);
5288 }
5289
5290 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
5291
5292
5293 if (amdgpu_sriov_vf(adev)) {
5294 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5295 if (r)
5296 adev->asic_reset_res = r;
5297
5298
5299 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5300 amdgpu_ras_resume(adev);
5301 } else {
5302 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5303 if (r && r == -EAGAIN)
5304 goto retry;
5305 }
5306
5307 skip_hw_reset:
5308
5309
5310 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5311
5312
5313
5314
5315
5316
5317
5318
5319 if (amdgpu_gpu_recovery == 2 &&
5320 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
5321 amdgpu_device_recheck_guilty_jobs(
5322 tmp_adev, device_list_handle, reset_context);
5323
5324 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5325 struct amdgpu_ring *ring = tmp_adev->rings[i];
5326
5327 if (!ring || !ring->sched.thread)
5328 continue;
5329
5330
5331 if (!tmp_adev->asic_reset_res && !job_signaled)
5332 drm_sched_resubmit_jobs(&ring->sched);
5333
5334 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5335 }
5336
5337 if (adev->enable_mes)
5338 amdgpu_mes_self_test(tmp_adev);
5339
5340 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5341 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5342 }
5343
5344 if (tmp_adev->asic_reset_res)
5345 r = tmp_adev->asic_reset_res;
5346
5347 tmp_adev->asic_reset_res = 0;
5348
5349 if (r) {
5350
5351 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5352 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5353 } else {
5354 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5355 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5356 DRM_WARN("smart shift update failed\n");
5357 }
5358 }
5359
5360 skip_sched_resume:
5361 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5362
5363 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5364 amdgpu_amdkfd_post_reset(tmp_adev);
5365
5366
5367
5368
5369 if (!adev->kfd.init_complete)
5370 amdgpu_amdkfd_device_init(adev);
5371
5372 if (audio_suspended)
5373 amdgpu_device_resume_display_audio(tmp_adev);
5374
5375 amdgpu_device_unset_mp1_state(tmp_adev);
5376 }
5377
5378 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5379 reset_list);
5380 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5381
5382 if (hive) {
5383 mutex_unlock(&hive->hive_lock);
5384 amdgpu_put_xgmi_hive(hive);
5385 }
5386
5387 if (r)
5388 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5389
5390 atomic_set(&adev->reset_domain->reset_res, r);
5391 return r;
5392 }
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5404 {
5405 struct pci_dev *pdev;
5406 enum pci_bus_speed speed_cap, platform_speed_cap;
5407 enum pcie_link_width platform_link_width;
5408
5409 if (amdgpu_pcie_gen_cap)
5410 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5411
5412 if (amdgpu_pcie_lane_cap)
5413 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5414
5415
5416 if (pci_is_root_bus(adev->pdev->bus)) {
5417 if (adev->pm.pcie_gen_mask == 0)
5418 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5419 if (adev->pm.pcie_mlw_mask == 0)
5420 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5421 return;
5422 }
5423
5424 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5425 return;
5426
5427 pcie_bandwidth_available(adev->pdev, NULL,
5428 &platform_speed_cap, &platform_link_width);
5429
5430 if (adev->pm.pcie_gen_mask == 0) {
5431
5432 pdev = adev->pdev;
5433 speed_cap = pcie_get_speed_cap(pdev);
5434 if (speed_cap == PCI_SPEED_UNKNOWN) {
5435 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5438 } else {
5439 if (speed_cap == PCIE_SPEED_32_0GT)
5440 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5442 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5443 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5444 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5445 else if (speed_cap == PCIE_SPEED_16_0GT)
5446 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5447 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5448 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5449 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5450 else if (speed_cap == PCIE_SPEED_8_0GT)
5451 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5452 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5453 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5454 else if (speed_cap == PCIE_SPEED_5_0GT)
5455 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5456 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5457 else
5458 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5459 }
5460
5461 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5462 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5463 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5464 } else {
5465 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5466 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5467 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5468 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5469 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5470 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5471 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5472 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5473 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5474 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5475 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5476 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5477 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5478 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5479 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5480 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5481 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5483 else
5484 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5485
5486 }
5487 }
5488 if (adev->pm.pcie_mlw_mask == 0) {
5489 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5490 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5491 } else {
5492 switch (platform_link_width) {
5493 case PCIE_LNK_X32:
5494 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5497 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5498 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5499 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5501 break;
5502 case PCIE_LNK_X16:
5503 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5509 break;
5510 case PCIE_LNK_X12:
5511 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5516 break;
5517 case PCIE_LNK_X8:
5518 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5522 break;
5523 case PCIE_LNK_X4:
5524 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5527 break;
5528 case PCIE_LNK_X2:
5529 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5531 break;
5532 case PCIE_LNK_X1:
5533 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5534 break;
5535 default:
5536 break;
5537 }
5538 }
5539 }
5540 }
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5553 struct amdgpu_device *peer_adev)
5554 {
5555 #ifdef CONFIG_HSA_AMD_P2P
5556 uint64_t address_mask = peer_adev->dev->dma_mask ?
5557 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5558 resource_size_t aper_limit =
5559 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5560 bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
5561 !(pci_p2pdma_distance_many(adev->pdev,
5562 &peer_adev->dev, 1, true) < 0);
5563
5564 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5565 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5566 !(adev->gmc.aper_base & address_mask ||
5567 aper_limit & address_mask));
5568 #else
5569 return false;
5570 #endif
5571 }
5572
5573 int amdgpu_device_baco_enter(struct drm_device *dev)
5574 {
5575 struct amdgpu_device *adev = drm_to_adev(dev);
5576 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5577
5578 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5579 return -ENOTSUPP;
5580
5581 if (ras && adev->ras_enabled &&
5582 adev->nbio.funcs->enable_doorbell_interrupt)
5583 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5584
5585 return amdgpu_dpm_baco_enter(adev);
5586 }
5587
5588 int amdgpu_device_baco_exit(struct drm_device *dev)
5589 {
5590 struct amdgpu_device *adev = drm_to_adev(dev);
5591 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5592 int ret = 0;
5593
5594 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5595 return -ENOTSUPP;
5596
5597 ret = amdgpu_dpm_baco_exit(adev);
5598 if (ret)
5599 return ret;
5600
5601 if (ras && adev->ras_enabled &&
5602 adev->nbio.funcs->enable_doorbell_interrupt)
5603 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5604
5605 if (amdgpu_passthrough(adev) &&
5606 adev->nbio.funcs->clear_doorbell_interrupt)
5607 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5608
5609 return 0;
5610 }
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5622 {
5623 struct drm_device *dev = pci_get_drvdata(pdev);
5624 struct amdgpu_device *adev = drm_to_adev(dev);
5625 int i;
5626
5627 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5628
5629 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5630 DRM_WARN("No support for XGMI hive yet...");
5631 return PCI_ERS_RESULT_DISCONNECT;
5632 }
5633
5634 adev->pci_channel_state = state;
5635
5636 switch (state) {
5637 case pci_channel_io_normal:
5638 return PCI_ERS_RESULT_CAN_RECOVER;
5639
5640 case pci_channel_io_frozen:
5641
5642
5643
5644
5645 amdgpu_device_lock_reset_domain(adev->reset_domain);
5646 amdgpu_device_set_mp1_state(adev);
5647
5648
5649
5650
5651
5652 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5653 struct amdgpu_ring *ring = adev->rings[i];
5654
5655 if (!ring || !ring->sched.thread)
5656 continue;
5657
5658 drm_sched_stop(&ring->sched, NULL);
5659 }
5660 atomic_inc(&adev->gpu_reset_counter);
5661 return PCI_ERS_RESULT_NEED_RESET;
5662 case pci_channel_io_perm_failure:
5663
5664 return PCI_ERS_RESULT_DISCONNECT;
5665 }
5666
5667 return PCI_ERS_RESULT_NEED_RESET;
5668 }
5669
5670
5671
5672
5673
5674 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5675 {
5676
5677 DRM_INFO("PCI error: mmio enabled callback!!\n");
5678
5679
5680
5681
5682
5683
5684
5685
5686 return PCI_ERS_RESULT_RECOVERED;
5687 }
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5698 {
5699 struct drm_device *dev = pci_get_drvdata(pdev);
5700 struct amdgpu_device *adev = drm_to_adev(dev);
5701 int r, i;
5702 struct amdgpu_reset_context reset_context;
5703 u32 memsize;
5704 struct list_head device_list;
5705
5706 DRM_INFO("PCI error: slot reset callback!!\n");
5707
5708 memset(&reset_context, 0, sizeof(reset_context));
5709
5710 INIT_LIST_HEAD(&device_list);
5711 list_add_tail(&adev->reset_list, &device_list);
5712
5713
5714 msleep(500);
5715
5716
5717 amdgpu_device_load_pci_state(pdev);
5718
5719
5720 for (i = 0; i < adev->usec_timeout; i++) {
5721 memsize = amdgpu_asic_get_config_memsize(adev);
5722
5723 if (memsize != 0xffffffff)
5724 break;
5725 udelay(1);
5726 }
5727 if (memsize == 0xffffffff) {
5728 r = -ETIME;
5729 goto out;
5730 }
5731
5732 reset_context.method = AMD_RESET_METHOD_NONE;
5733 reset_context.reset_req_dev = adev;
5734 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5735 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5736
5737 adev->no_hw_access = true;
5738 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5739 adev->no_hw_access = false;
5740 if (r)
5741 goto out;
5742
5743 r = amdgpu_do_asic_reset(&device_list, &reset_context);
5744
5745 out:
5746 if (!r) {
5747 if (amdgpu_device_cache_pci_state(adev->pdev))
5748 pci_restore_state(adev->pdev);
5749
5750 DRM_INFO("PCIe error recovery succeeded\n");
5751 } else {
5752 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5753 amdgpu_device_unset_mp1_state(adev);
5754 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5755 }
5756
5757 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5758 }
5759
5760
5761
5762
5763
5764
5765
5766
5767 void amdgpu_pci_resume(struct pci_dev *pdev)
5768 {
5769 struct drm_device *dev = pci_get_drvdata(pdev);
5770 struct amdgpu_device *adev = drm_to_adev(dev);
5771 int i;
5772
5773
5774 DRM_INFO("PCI error: resume callback!!\n");
5775
5776
5777 if (adev->pci_channel_state != pci_channel_io_frozen)
5778 return;
5779
5780 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5781 struct amdgpu_ring *ring = adev->rings[i];
5782
5783 if (!ring || !ring->sched.thread)
5784 continue;
5785
5786
5787 drm_sched_resubmit_jobs(&ring->sched);
5788 drm_sched_start(&ring->sched, true);
5789 }
5790
5791 amdgpu_device_unset_mp1_state(adev);
5792 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5793 }
5794
5795 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5796 {
5797 struct drm_device *dev = pci_get_drvdata(pdev);
5798 struct amdgpu_device *adev = drm_to_adev(dev);
5799 int r;
5800
5801 r = pci_save_state(pdev);
5802 if (!r) {
5803 kfree(adev->pci_state);
5804
5805 adev->pci_state = pci_store_saved_state(pdev);
5806
5807 if (!adev->pci_state) {
5808 DRM_ERROR("Failed to store PCI saved state");
5809 return false;
5810 }
5811 } else {
5812 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5813 return false;
5814 }
5815
5816 return true;
5817 }
5818
5819 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5820 {
5821 struct drm_device *dev = pci_get_drvdata(pdev);
5822 struct amdgpu_device *adev = drm_to_adev(dev);
5823 int r;
5824
5825 if (!adev->pci_state)
5826 return false;
5827
5828 r = pci_load_saved_state(pdev, adev->pci_state);
5829
5830 if (!r) {
5831 pci_restore_state(pdev);
5832 } else {
5833 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5834 return false;
5835 }
5836
5837 return true;
5838 }
5839
5840 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5841 struct amdgpu_ring *ring)
5842 {
5843 #ifdef CONFIG_X86_64
5844 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5845 return;
5846 #endif
5847 if (adev->gmc.xgmi.connected_to_cpu)
5848 return;
5849
5850 if (ring && ring->funcs->emit_hdp_flush)
5851 amdgpu_ring_emit_hdp_flush(ring);
5852 else
5853 amdgpu_asic_flush_hdp(adev, ring);
5854 }
5855
5856 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5857 struct amdgpu_ring *ring)
5858 {
5859 #ifdef CONFIG_X86_64
5860 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5861 return;
5862 #endif
5863 if (adev->gmc.xgmi.connected_to_cpu)
5864 return;
5865
5866 amdgpu_asic_invalidate_hdp(adev, ring);
5867 }
5868
5869 int amdgpu_in_reset(struct amdgpu_device *adev)
5870 {
5871 return atomic_read(&adev->reset_domain->in_gpu_reset);
5872 }
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894 void amdgpu_device_halt(struct amdgpu_device *adev)
5895 {
5896 struct pci_dev *pdev = adev->pdev;
5897 struct drm_device *ddev = adev_to_drm(adev);
5898
5899 drm_dev_unplug(ddev);
5900
5901 amdgpu_irq_disable_all(adev);
5902
5903 amdgpu_fence_driver_hw_fini(adev);
5904
5905 adev->no_hw_access = true;
5906
5907 amdgpu_device_unmap_mmio(adev);
5908
5909 pci_disable_device(pdev);
5910 pci_wait_for_pending_transaction(pdev);
5911 }
5912
5913 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5914 u32 reg)
5915 {
5916 unsigned long flags, address, data;
5917 u32 r;
5918
5919 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5920 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5921
5922 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5923 WREG32(address, reg * 4);
5924 (void)RREG32(address);
5925 r = RREG32(data);
5926 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5927 return r;
5928 }
5929
5930 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5931 u32 reg, u32 v)
5932 {
5933 unsigned long flags, address, data;
5934
5935 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5936 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5937
5938 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5939 WREG32(address, reg * 4);
5940 (void)RREG32(address);
5941 WREG32(data, v);
5942 (void)RREG32(data);
5943 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5944 }