amd/amdgpu/gmc_v9_0.c

0001 /*
0002  * Copyright 2016 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023
0024 #include <linux/firmware.h>
0025 #include <linux/pci.h>
0026
0027 #include <drm/drm_cache.h>
0028
0029 #include "amdgpu.h"
0030 #include "gmc_v9_0.h"
0031 #include "amdgpu_atomfirmware.h"
0032 #include "amdgpu_gem.h"
0033
0034 #include "gc/gc_9_0_sh_mask.h"
0035 #include "dce/dce_12_0_offset.h"
0036 #include "dce/dce_12_0_sh_mask.h"
0037 #include "vega10_enum.h"
0038 #include "mmhub/mmhub_1_0_offset.h"
0039 #include "athub/athub_1_0_sh_mask.h"
0040 #include "athub/athub_1_0_offset.h"
0041 #include "oss/osssys_4_0_offset.h"
0042
0043 #include "soc15.h"
0044 #include "soc15d.h"
0045 #include "soc15_common.h"
0046 #include "umc/umc_6_0_sh_mask.h"
0047
0048 #include "gfxhub_v1_0.h"
0049 #include "mmhub_v1_0.h"
0050 #include "athub_v1_0.h"
0051 #include "gfxhub_v1_1.h"
0052 #include "mmhub_v9_4.h"
0053 #include "mmhub_v1_7.h"
0054 #include "umc_v6_1.h"
0055 #include "umc_v6_0.h"
0056 #include "umc_v6_7.h"
0057 #include "hdp_v4_0.h"
0058 #include "mca_v3_0.h"
0059
0060 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
0061
0062 #include "amdgpu_ras.h"
0063 #include "amdgpu_xgmi.h"
0064
0065 #include "amdgpu_reset.h"
0066
0067 /* add these here since we already include dce12 headers and these are for DCN */
0068 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
0069 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
0070 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
0071 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
0072 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
0073 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
0074 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0                                                                  0x049d
0075 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX                                                         2
0076
0077 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2                                                          0x05ea
0078 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX                                                 2
0079
0080
0081 static const char *gfxhub_client_ids[] = {
0082     "CB",
0083     "DB",
0084     "IA",
0085     "WD",
0086     "CPF",
0087     "CPC",
0088     "CPG",
0089     "RLC",
0090     "TCP",
0091     "SQC (inst)",
0092     "SQC (data)",
0093     "SQG",
0094     "PA",
0095 };
0096
0097 static const char *mmhub_client_ids_raven[][2] = {
0098     [0][0] = "MP1",
0099     [1][0] = "MP0",
0100     [2][0] = "VCN",
0101     [3][0] = "VCNU",
0102     [4][0] = "HDP",
0103     [5][0] = "DCE",
0104     [13][0] = "UTCL2",
0105     [19][0] = "TLS",
0106     [26][0] = "OSS",
0107     [27][0] = "SDMA0",
0108     [0][1] = "MP1",
0109     [1][1] = "MP0",
0110     [2][1] = "VCN",
0111     [3][1] = "VCNU",
0112     [4][1] = "HDP",
0113     [5][1] = "XDP",
0114     [6][1] = "DBGU0",
0115     [7][1] = "DCE",
0116     [8][1] = "DCEDWB0",
0117     [9][1] = "DCEDWB1",
0118     [26][1] = "OSS",
0119     [27][1] = "SDMA0",
0120 };
0121
0122 static const char *mmhub_client_ids_renoir[][2] = {
0123     [0][0] = "MP1",
0124     [1][0] = "MP0",
0125     [2][0] = "HDP",
0126     [4][0] = "DCEDMC",
0127     [5][0] = "DCEVGA",
0128     [13][0] = "UTCL2",
0129     [19][0] = "TLS",
0130     [26][0] = "OSS",
0131     [27][0] = "SDMA0",
0132     [28][0] = "VCN",
0133     [29][0] = "VCNU",
0134     [30][0] = "JPEG",
0135     [0][1] = "MP1",
0136     [1][1] = "MP0",
0137     [2][1] = "HDP",
0138     [3][1] = "XDP",
0139     [6][1] = "DBGU0",
0140     [7][1] = "DCEDMC",
0141     [8][1] = "DCEVGA",
0142     [9][1] = "DCEDWB",
0143     [26][1] = "OSS",
0144     [27][1] = "SDMA0",
0145     [28][1] = "VCN",
0146     [29][1] = "VCNU",
0147     [30][1] = "JPEG",
0148 };
0149
0150 static const char *mmhub_client_ids_vega10[][2] = {
0151     [0][0] = "MP0",
0152     [1][0] = "UVD",
0153     [2][0] = "UVDU",
0154     [3][0] = "HDP",
0155     [13][0] = "UTCL2",
0156     [14][0] = "OSS",
0157     [15][0] = "SDMA1",
0158     [32+0][0] = "VCE0",
0159     [32+1][0] = "VCE0U",
0160     [32+2][0] = "XDMA",
0161     [32+3][0] = "DCE",
0162     [32+4][0] = "MP1",
0163     [32+14][0] = "SDMA0",
0164     [0][1] = "MP0",
0165     [1][1] = "UVD",
0166     [2][1] = "UVDU",
0167     [3][1] = "DBGU0",
0168     [4][1] = "HDP",
0169     [5][1] = "XDP",
0170     [14][1] = "OSS",
0171     [15][1] = "SDMA0",
0172     [32+0][1] = "VCE0",
0173     [32+1][1] = "VCE0U",
0174     [32+2][1] = "XDMA",
0175     [32+3][1] = "DCE",
0176     [32+4][1] = "DCEDWB",
0177     [32+5][1] = "MP1",
0178     [32+6][1] = "DBGU1",
0179     [32+14][1] = "SDMA1",
0180 };
0181
0182 static const char *mmhub_client_ids_vega12[][2] = {
0183     [0][0] = "MP0",
0184     [1][0] = "VCE0",
0185     [2][0] = "VCE0U",
0186     [3][0] = "HDP",
0187     [13][0] = "UTCL2",
0188     [14][0] = "OSS",
0189     [15][0] = "SDMA1",
0190     [32+0][0] = "DCE",
0191     [32+1][0] = "XDMA",
0192     [32+2][0] = "UVD",
0193     [32+3][0] = "UVDU",
0194     [32+4][0] = "MP1",
0195     [32+15][0] = "SDMA0",
0196     [0][1] = "MP0",
0197     [1][1] = "VCE0",
0198     [2][1] = "VCE0U",
0199     [3][1] = "DBGU0",
0200     [4][1] = "HDP",
0201     [5][1] = "XDP",
0202     [14][1] = "OSS",
0203     [15][1] = "SDMA0",
0204     [32+0][1] = "DCE",
0205     [32+1][1] = "DCEDWB",
0206     [32+2][1] = "XDMA",
0207     [32+3][1] = "UVD",
0208     [32+4][1] = "UVDU",
0209     [32+5][1] = "MP1",
0210     [32+6][1] = "DBGU1",
0211     [32+15][1] = "SDMA1",
0212 };
0213
0214 static const char *mmhub_client_ids_vega20[][2] = {
0215     [0][0] = "XDMA",
0216     [1][0] = "DCE",
0217     [2][0] = "VCE0",
0218     [3][0] = "VCE0U",
0219     [4][0] = "UVD",
0220     [5][0] = "UVD1U",
0221     [13][0] = "OSS",
0222     [14][0] = "HDP",
0223     [15][0] = "SDMA0",
0224     [32+0][0] = "UVD",
0225     [32+1][0] = "UVDU",
0226     [32+2][0] = "MP1",
0227     [32+3][0] = "MP0",
0228     [32+12][0] = "UTCL2",
0229     [32+14][0] = "SDMA1",
0230     [0][1] = "XDMA",
0231     [1][1] = "DCE",
0232     [2][1] = "DCEDWB",
0233     [3][1] = "VCE0",
0234     [4][1] = "VCE0U",
0235     [5][1] = "UVD1",
0236     [6][1] = "UVD1U",
0237     [7][1] = "DBGU0",
0238     [8][1] = "XDP",
0239     [13][1] = "OSS",
0240     [14][1] = "HDP",
0241     [15][1] = "SDMA0",
0242     [32+0][1] = "UVD",
0243     [32+1][1] = "UVDU",
0244     [32+2][1] = "DBGU1",
0245     [32+3][1] = "MP1",
0246     [32+4][1] = "MP0",
0247     [32+14][1] = "SDMA1",
0248 };
0249
0250 static const char *mmhub_client_ids_arcturus[][2] = {
0251     [0][0] = "DBGU1",
0252     [1][0] = "XDP",
0253     [2][0] = "MP1",
0254     [14][0] = "HDP",
0255     [171][0] = "JPEG",
0256     [172][0] = "VCN",
0257     [173][0] = "VCNU",
0258     [203][0] = "JPEG1",
0259     [204][0] = "VCN1",
0260     [205][0] = "VCN1U",
0261     [256][0] = "SDMA0",
0262     [257][0] = "SDMA1",
0263     [258][0] = "SDMA2",
0264     [259][0] = "SDMA3",
0265     [260][0] = "SDMA4",
0266     [261][0] = "SDMA5",
0267     [262][0] = "SDMA6",
0268     [263][0] = "SDMA7",
0269     [384][0] = "OSS",
0270     [0][1] = "DBGU1",
0271     [1][1] = "XDP",
0272     [2][1] = "MP1",
0273     [14][1] = "HDP",
0274     [171][1] = "JPEG",
0275     [172][1] = "VCN",
0276     [173][1] = "VCNU",
0277     [203][1] = "JPEG1",
0278     [204][1] = "VCN1",
0279     [205][1] = "VCN1U",
0280     [256][1] = "SDMA0",
0281     [257][1] = "SDMA1",
0282     [258][1] = "SDMA2",
0283     [259][1] = "SDMA3",
0284     [260][1] = "SDMA4",
0285     [261][1] = "SDMA5",
0286     [262][1] = "SDMA6",
0287     [263][1] = "SDMA7",
0288     [384][1] = "OSS",
0289 };
0290
0291 static const char *mmhub_client_ids_aldebaran[][2] = {
0292     [2][0] = "MP1",
0293     [3][0] = "MP0",
0294     [32+1][0] = "DBGU_IO0",
0295     [32+2][0] = "DBGU_IO2",
0296     [32+4][0] = "MPIO",
0297     [96+11][0] = "JPEG0",
0298     [96+12][0] = "VCN0",
0299     [96+13][0] = "VCNU0",
0300     [128+11][0] = "JPEG1",
0301     [128+12][0] = "VCN1",
0302     [128+13][0] = "VCNU1",
0303     [160+1][0] = "XDP",
0304     [160+14][0] = "HDP",
0305     [256+0][0] = "SDMA0",
0306     [256+1][0] = "SDMA1",
0307     [256+2][0] = "SDMA2",
0308     [256+3][0] = "SDMA3",
0309     [256+4][0] = "SDMA4",
0310     [384+0][0] = "OSS",
0311     [2][1] = "MP1",
0312     [3][1] = "MP0",
0313     [32+1][1] = "DBGU_IO0",
0314     [32+2][1] = "DBGU_IO2",
0315     [32+4][1] = "MPIO",
0316     [96+11][1] = "JPEG0",
0317     [96+12][1] = "VCN0",
0318     [96+13][1] = "VCNU0",
0319     [128+11][1] = "JPEG1",
0320     [128+12][1] = "VCN1",
0321     [128+13][1] = "VCNU1",
0322     [160+1][1] = "XDP",
0323     [160+14][1] = "HDP",
0324     [256+0][1] = "SDMA0",
0325     [256+1][1] = "SDMA1",
0326     [256+2][1] = "SDMA2",
0327     [256+3][1] = "SDMA3",
0328     [256+4][1] = "SDMA4",
0329     [384+0][1] = "OSS",
0330 };
0331
0332 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
0333 {
0334     SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
0335     SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
0336 };
0337
0338 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
0339 {
0340     SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
0341     SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
0342 };
0343
0344 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
0345     (0x000143c0 + 0x00000000),
0346     (0x000143c0 + 0x00000800),
0347     (0x000143c0 + 0x00001000),
0348     (0x000143c0 + 0x00001800),
0349     (0x000543c0 + 0x00000000),
0350     (0x000543c0 + 0x00000800),
0351     (0x000543c0 + 0x00001000),
0352     (0x000543c0 + 0x00001800),
0353     (0x000943c0 + 0x00000000),
0354     (0x000943c0 + 0x00000800),
0355     (0x000943c0 + 0x00001000),
0356     (0x000943c0 + 0x00001800),
0357     (0x000d43c0 + 0x00000000),
0358     (0x000d43c0 + 0x00000800),
0359     (0x000d43c0 + 0x00001000),
0360     (0x000d43c0 + 0x00001800),
0361     (0x001143c0 + 0x00000000),
0362     (0x001143c0 + 0x00000800),
0363     (0x001143c0 + 0x00001000),
0364     (0x001143c0 + 0x00001800),
0365     (0x001543c0 + 0x00000000),
0366     (0x001543c0 + 0x00000800),
0367     (0x001543c0 + 0x00001000),
0368     (0x001543c0 + 0x00001800),
0369     (0x001943c0 + 0x00000000),
0370     (0x001943c0 + 0x00000800),
0371     (0x001943c0 + 0x00001000),
0372     (0x001943c0 + 0x00001800),
0373     (0x001d43c0 + 0x00000000),
0374     (0x001d43c0 + 0x00000800),
0375     (0x001d43c0 + 0x00001000),
0376     (0x001d43c0 + 0x00001800),
0377 };
0378
0379 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
0380     (0x000143e0 + 0x00000000),
0381     (0x000143e0 + 0x00000800),
0382     (0x000143e0 + 0x00001000),
0383     (0x000143e0 + 0x00001800),
0384     (0x000543e0 + 0x00000000),
0385     (0x000543e0 + 0x00000800),
0386     (0x000543e0 + 0x00001000),
0387     (0x000543e0 + 0x00001800),
0388     (0x000943e0 + 0x00000000),
0389     (0x000943e0 + 0x00000800),
0390     (0x000943e0 + 0x00001000),
0391     (0x000943e0 + 0x00001800),
0392     (0x000d43e0 + 0x00000000),
0393     (0x000d43e0 + 0x00000800),
0394     (0x000d43e0 + 0x00001000),
0395     (0x000d43e0 + 0x00001800),
0396     (0x001143e0 + 0x00000000),
0397     (0x001143e0 + 0x00000800),
0398     (0x001143e0 + 0x00001000),
0399     (0x001143e0 + 0x00001800),
0400     (0x001543e0 + 0x00000000),
0401     (0x001543e0 + 0x00000800),
0402     (0x001543e0 + 0x00001000),
0403     (0x001543e0 + 0x00001800),
0404     (0x001943e0 + 0x00000000),
0405     (0x001943e0 + 0x00000800),
0406     (0x001943e0 + 0x00001000),
0407     (0x001943e0 + 0x00001800),
0408     (0x001d43e0 + 0x00000000),
0409     (0x001d43e0 + 0x00000800),
0410     (0x001d43e0 + 0x00001000),
0411     (0x001d43e0 + 0x00001800),
0412 };
0413
0414 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
0415         struct amdgpu_irq_src *src,
0416         unsigned type,
0417         enum amdgpu_interrupt_state state)
0418 {
0419     u32 bits, i, tmp, reg;
0420
0421     /* Devices newer then VEGA10/12 shall have these programming
0422          sequences performed by PSP BL */
0423     if (adev->asic_type >= CHIP_VEGA20)
0424         return 0;
0425
0426     bits = 0x7f;
0427
0428     switch (state) {
0429     case AMDGPU_IRQ_STATE_DISABLE:
0430         for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
0431             reg = ecc_umc_mcumc_ctrl_addrs[i];
0432             tmp = RREG32(reg);
0433             tmp &= ~bits;
0434             WREG32(reg, tmp);
0435         }
0436         for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
0437             reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
0438             tmp = RREG32(reg);
0439             tmp &= ~bits;
0440             WREG32(reg, tmp);
0441         }
0442         break;
0443     case AMDGPU_IRQ_STATE_ENABLE:
0444         for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
0445             reg = ecc_umc_mcumc_ctrl_addrs[i];
0446             tmp = RREG32(reg);
0447             tmp |= bits;
0448             WREG32(reg, tmp);
0449         }
0450         for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
0451             reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
0452             tmp = RREG32(reg);
0453             tmp |= bits;
0454             WREG32(reg, tmp);
0455         }
0456         break;
0457     default:
0458         break;
0459     }
0460
0461     return 0;
0462 }
0463
0464 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
0465                     struct amdgpu_irq_src *src,
0466                     unsigned type,
0467                     enum amdgpu_interrupt_state state)
0468 {
0469     struct amdgpu_vmhub *hub;
0470     u32 tmp, reg, bits, i, j;
0471
0472     bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0473         VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0474         VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0475         VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0476         VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0477         VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
0478         VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
0479
0480     switch (state) {
0481     case AMDGPU_IRQ_STATE_DISABLE:
0482         for (j = 0; j < adev->num_vmhubs; j++) {
0483             hub = &adev->vmhub[j];
0484             for (i = 0; i < 16; i++) {
0485                 reg = hub->vm_context0_cntl + i;
0486
0487                 if (j == AMDGPU_GFXHUB_0)
0488                     tmp = RREG32_SOC15_IP(GC, reg);
0489                 else
0490                     tmp = RREG32_SOC15_IP(MMHUB, reg);
0491
0492                 tmp &= ~bits;
0493
0494                 if (j == AMDGPU_GFXHUB_0)
0495                     WREG32_SOC15_IP(GC, reg, tmp);
0496                 else
0497                     WREG32_SOC15_IP(MMHUB, reg, tmp);
0498             }
0499         }
0500         break;
0501     case AMDGPU_IRQ_STATE_ENABLE:
0502         for (j = 0; j < adev->num_vmhubs; j++) {
0503             hub = &adev->vmhub[j];
0504             for (i = 0; i < 16; i++) {
0505                 reg = hub->vm_context0_cntl + i;
0506
0507                 if (j == AMDGPU_GFXHUB_0)
0508                     tmp = RREG32_SOC15_IP(GC, reg);
0509                 else
0510                     tmp = RREG32_SOC15_IP(MMHUB, reg);
0511
0512                 tmp |= bits;
0513
0514                 if (j == AMDGPU_GFXHUB_0)
0515                     WREG32_SOC15_IP(GC, reg, tmp);
0516                 else
0517                     WREG32_SOC15_IP(MMHUB, reg, tmp);
0518             }
0519         }
0520         break;
0521     default:
0522         break;
0523     }
0524
0525     return 0;
0526 }
0527
0528 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
0529                       struct amdgpu_irq_src *source,
0530                       struct amdgpu_iv_entry *entry)
0531 {
0532     bool retry_fault = !!(entry->src_data[1] & 0x80);
0533     bool write_fault = !!(entry->src_data[1] & 0x20);
0534     uint32_t status = 0, cid = 0, rw = 0;
0535     struct amdgpu_task_info task_info;
0536     struct amdgpu_vmhub *hub;
0537     const char *mmhub_cid;
0538     const char *hub_name;
0539     u64 addr;
0540
0541     addr = (u64)entry->src_data[0] << 12;
0542     addr |= ((u64)entry->src_data[1] & 0xf) << 44;
0543
0544     if (retry_fault) {
0545         /* Returning 1 here also prevents sending the IV to the KFD */
0546
0547         /* Process it onyl if it's the first fault for this address */
0548         if (entry->ih != &adev->irq.ih_soft &&
0549             amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
0550                          entry->timestamp))
0551             return 1;
0552
0553         /* Delegate it to a different ring if the hardware hasn't
0554          * already done it.
0555          */
0556         if (entry->ih == &adev->irq.ih) {
0557             amdgpu_irq_delegate(adev, entry, 8);
0558             return 1;
0559         }
0560
0561         /* Try to handle the recoverable page faults by filling page
0562          * tables
0563          */
0564         if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
0565             return 1;
0566     }
0567
0568     if (!printk_ratelimit())
0569         return 0;
0570
0571     if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
0572         hub_name = "mmhub0";
0573         hub = &adev->vmhub[AMDGPU_MMHUB_0];
0574     } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
0575         hub_name = "mmhub1";
0576         hub = &adev->vmhub[AMDGPU_MMHUB_1];
0577     } else {
0578         hub_name = "gfxhub0";
0579         hub = &adev->vmhub[AMDGPU_GFXHUB_0];
0580     }
0581
0582     memset(&task_info, 0, sizeof(struct amdgpu_task_info));
0583     amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
0584
0585     dev_err(adev->dev,
0586         "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
0587         "pasid:%u, for process %s pid %d thread %s pid %d)\n",
0588         hub_name, retry_fault ? "retry" : "no-retry",
0589         entry->src_id, entry->ring_id, entry->vmid,
0590         entry->pasid, task_info.process_name, task_info.tgid,
0591         task_info.task_name, task_info.pid);
0592     dev_err(adev->dev, "  in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
0593         addr, entry->client_id,
0594         soc15_ih_clientid_name[entry->client_id]);
0595
0596     if (amdgpu_sriov_vf(adev))
0597         return 0;
0598
0599     /*
0600      * Issue a dummy read to wait for the status register to
0601      * be updated to avoid reading an incorrect value due to
0602      * the new fast GRBM interface.
0603      */
0604     if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
0605         (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
0606         RREG32(hub->vm_l2_pro_fault_status);
0607
0608     status = RREG32(hub->vm_l2_pro_fault_status);
0609     cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
0610     rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
0611     WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
0612
0613
0614     dev_err(adev->dev,
0615         "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
0616         status);
0617     if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
0618         dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
0619             cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
0620             gfxhub_client_ids[cid],
0621             cid);
0622     } else {
0623         switch (adev->ip_versions[MMHUB_HWIP][0]) {
0624         case IP_VERSION(9, 0, 0):
0625             mmhub_cid = mmhub_client_ids_vega10[cid][rw];
0626             break;
0627         case IP_VERSION(9, 3, 0):
0628             mmhub_cid = mmhub_client_ids_vega12[cid][rw];
0629             break;
0630         case IP_VERSION(9, 4, 0):
0631             mmhub_cid = mmhub_client_ids_vega20[cid][rw];
0632             break;
0633         case IP_VERSION(9, 4, 1):
0634             mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
0635             break;
0636         case IP_VERSION(9, 1, 0):
0637         case IP_VERSION(9, 2, 0):
0638             mmhub_cid = mmhub_client_ids_raven[cid][rw];
0639             break;
0640         case IP_VERSION(1, 5, 0):
0641         case IP_VERSION(2, 4, 0):
0642             mmhub_cid = mmhub_client_ids_renoir[cid][rw];
0643             break;
0644         case IP_VERSION(9, 4, 2):
0645             mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
0646             break;
0647         default:
0648             mmhub_cid = NULL;
0649             break;
0650         }
0651         dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
0652             mmhub_cid ? mmhub_cid : "unknown", cid);
0653     }
0654     dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
0655         REG_GET_FIELD(status,
0656         VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
0657     dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
0658         REG_GET_FIELD(status,
0659         VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
0660     dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
0661         REG_GET_FIELD(status,
0662         VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
0663     dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
0664         REG_GET_FIELD(status,
0665         VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
0666     dev_err(adev->dev, "\t RW: 0x%x\n", rw);
0667     return 0;
0668 }
0669
0670 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
0671     .set = gmc_v9_0_vm_fault_interrupt_state,
0672     .process = gmc_v9_0_process_interrupt,
0673 };
0674
0675
0676 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
0677     .set = gmc_v9_0_ecc_interrupt_state,
0678     .process = amdgpu_umc_process_ecc_irq,
0679 };
0680
0681 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
0682 {
0683     adev->gmc.vm_fault.num_types = 1;
0684     adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
0685
0686     if (!amdgpu_sriov_vf(adev) &&
0687         !adev->gmc.xgmi.connected_to_cpu) {
0688         adev->gmc.ecc_irq.num_types = 1;
0689         adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
0690     }
0691 }
0692
0693 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
0694                     uint32_t flush_type)
0695 {
0696     u32 req = 0;
0697
0698     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
0699                 PER_VMID_INVALIDATE_REQ, 1 << vmid);
0700     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
0701     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
0702     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
0703     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
0704     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
0705     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
0706     req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
0707                 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
0708
0709     return req;
0710 }
0711
0712 /**
0713  * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
0714  *
0715  * @adev: amdgpu_device pointer
0716  * @vmhub: vmhub type
0717  *
0718  */
0719 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
0720                        uint32_t vmhub)
0721 {
0722     if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
0723         return false;
0724
0725     return ((vmhub == AMDGPU_MMHUB_0 ||
0726          vmhub == AMDGPU_MMHUB_1) &&
0727         (!amdgpu_sriov_vf(adev)) &&
0728         (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
0729            (adev->apu_flags & AMD_APU_IS_PICASSO))));
0730 }
0731
0732 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
0733                     uint8_t vmid, uint16_t *p_pasid)
0734 {
0735     uint32_t value;
0736
0737     value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
0738              + vmid);
0739     *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0740
0741     return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0742 }
0743
0744 /*
0745  * GART
0746  * VMID 0 is the physical GPU addresses as used by the kernel.
0747  * VMIDs 1-15 are used for userspace clients and are handled
0748  * by the amdgpu vm/hsa code.
0749  */
0750
0751 /**
0752  * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
0753  *
0754  * @adev: amdgpu_device pointer
0755  * @vmid: vm instance to flush
0756  * @vmhub: which hub to flush
0757  * @flush_type: the flush type
0758  *
0759  * Flush the TLB for the requested page table using certain type.
0760  */
0761 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
0762                     uint32_t vmhub, uint32_t flush_type)
0763 {
0764     bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
0765     const unsigned eng = 17;
0766     u32 j, inv_req, inv_req2, tmp;
0767     struct amdgpu_vmhub *hub;
0768
0769     BUG_ON(vmhub >= adev->num_vmhubs);
0770
0771     hub = &adev->vmhub[vmhub];
0772     if (adev->gmc.xgmi.num_physical_nodes &&
0773         adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
0774         /* Vega20+XGMI caches PTEs in TC and TLB. Add a
0775          * heavy-weight TLB flush (type 2), which flushes
0776          * both. Due to a race condition with concurrent
0777          * memory accesses using the same TLB cache line, we
0778          * still need a second TLB flush after this.
0779          */
0780         inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
0781         inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
0782     } else {
0783         inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
0784         inv_req2 = 0;
0785     }
0786
0787     /* This is necessary for a HW workaround under SRIOV as well
0788      * as GFXOFF under bare metal
0789      */
0790     if (adev->gfx.kiq.ring.sched.ready &&
0791         (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
0792         down_read_trylock(&adev->reset_domain->sem)) {
0793         uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
0794         uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
0795
0796         amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
0797                            1 << vmid);
0798         up_read(&adev->reset_domain->sem);
0799         return;
0800     }
0801
0802     spin_lock(&adev->gmc.invalidate_lock);
0803
0804     /*
0805      * It may lose gpuvm invalidate acknowldege state across power-gating
0806      * off cycle, add semaphore acquire before invalidation and semaphore
0807      * release after invalidation to avoid entering power gated state
0808      * to WA the Issue
0809      */
0810
0811     /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
0812     if (use_semaphore) {
0813         for (j = 0; j < adev->usec_timeout; j++) {
0814             /* a read return value of 1 means semaphore acquire */
0815             if (vmhub == AMDGPU_GFXHUB_0)
0816                 tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
0817             else
0818                 tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
0819
0820             if (tmp & 0x1)
0821                 break;
0822             udelay(1);
0823         }
0824
0825         if (j >= adev->usec_timeout)
0826             DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
0827     }
0828
0829     do {
0830         if (vmhub == AMDGPU_GFXHUB_0)
0831             WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
0832         else
0833             WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
0834
0835         /*
0836          * Issue a dummy read to wait for the ACK register to
0837          * be cleared to avoid a false ACK due to the new fast
0838          * GRBM interface.
0839          */
0840         if ((vmhub == AMDGPU_GFXHUB_0) &&
0841             (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
0842             RREG32_NO_KIQ(hub->vm_inv_eng0_req +
0843                       hub->eng_distance * eng);
0844
0845         for (j = 0; j < adev->usec_timeout; j++) {
0846             if (vmhub == AMDGPU_GFXHUB_0)
0847                 tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
0848             else
0849                 tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
0850
0851             if (tmp & (1 << vmid))
0852                 break;
0853             udelay(1);
0854         }
0855
0856         inv_req = inv_req2;
0857         inv_req2 = 0;
0858     } while (inv_req);
0859
0860     /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
0861     if (use_semaphore) {
0862         /*
0863          * add semaphore release after invalidation,
0864          * write with 0 means semaphore release
0865          */
0866         if (vmhub == AMDGPU_GFXHUB_0)
0867             WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
0868         else
0869             WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
0870     }
0871
0872     spin_unlock(&adev->gmc.invalidate_lock);
0873
0874     if (j < adev->usec_timeout)
0875         return;
0876
0877     DRM_ERROR("Timeout waiting for VM flush ACK!\n");
0878 }
0879
0880 /**
0881  * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
0882  *
0883  * @adev: amdgpu_device pointer
0884  * @pasid: pasid to be flush
0885  * @flush_type: the flush type
0886  * @all_hub: flush all hubs
0887  *
0888  * Flush the TLB for the requested pasid.
0889  */
0890 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
0891                     uint16_t pasid, uint32_t flush_type,
0892                     bool all_hub)
0893 {
0894     int vmid, i;
0895     signed long r;
0896     uint32_t seq;
0897     uint16_t queried_pasid;
0898     bool ret;
0899     u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
0900     struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
0901     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
0902
0903     if (amdgpu_in_reset(adev))
0904         return -EIO;
0905
0906     if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
0907         /* Vega20+XGMI caches PTEs in TC and TLB. Add a
0908          * heavy-weight TLB flush (type 2), which flushes
0909          * both. Due to a race condition with concurrent
0910          * memory accesses using the same TLB cache line, we
0911          * still need a second TLB flush after this.
0912          */
0913         bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
0914                        adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
0915         /* 2 dwords flush + 8 dwords fence */
0916         unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
0917
0918         if (vega20_xgmi_wa)
0919             ndw += kiq->pmf->invalidate_tlbs_size;
0920
0921         spin_lock(&adev->gfx.kiq.ring_lock);
0922         /* 2 dwords flush + 8 dwords fence */
0923         amdgpu_ring_alloc(ring, ndw);
0924         if (vega20_xgmi_wa)
0925             kiq->pmf->kiq_invalidate_tlbs(ring,
0926                               pasid, 2, all_hub);
0927         kiq->pmf->kiq_invalidate_tlbs(ring,
0928                     pasid, flush_type, all_hub);
0929         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
0930         if (r) {
0931             amdgpu_ring_undo(ring);
0932             spin_unlock(&adev->gfx.kiq.ring_lock);
0933             up_read(&adev->reset_domain->sem);
0934             return -ETIME;
0935         }
0936
0937         amdgpu_ring_commit(ring);
0938         spin_unlock(&adev->gfx.kiq.ring_lock);
0939         r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
0940         if (r < 1) {
0941             dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
0942             up_read(&adev->reset_domain->sem);
0943             return -ETIME;
0944         }
0945         up_read(&adev->reset_domain->sem);
0946         return 0;
0947     }
0948
0949     for (vmid = 1; vmid < 16; vmid++) {
0950
0951         ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
0952                 &queried_pasid);
0953         if (ret && queried_pasid == pasid) {
0954             if (all_hub) {
0955                 for (i = 0; i < adev->num_vmhubs; i++)
0956                     gmc_v9_0_flush_gpu_tlb(adev, vmid,
0957                             i, flush_type);
0958             } else {
0959                 gmc_v9_0_flush_gpu_tlb(adev, vmid,
0960                         AMDGPU_GFXHUB_0, flush_type);
0961             }
0962             break;
0963         }
0964     }
0965
0966     return 0;
0967
0968 }
0969
0970 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
0971                         unsigned vmid, uint64_t pd_addr)
0972 {
0973     bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
0974     struct amdgpu_device *adev = ring->adev;
0975     struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
0976     uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
0977     unsigned eng = ring->vm_inv_eng;
0978
0979     /*
0980      * It may lose gpuvm invalidate acknowldege state across power-gating
0981      * off cycle, add semaphore acquire before invalidation and semaphore
0982      * release after invalidation to avoid entering power gated state
0983      * to WA the Issue
0984      */
0985
0986     /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
0987     if (use_semaphore)
0988         /* a read return value of 1 means semaphore acuqire */
0989         amdgpu_ring_emit_reg_wait(ring,
0990                       hub->vm_inv_eng0_sem +
0991                       hub->eng_distance * eng, 0x1, 0x1);
0992
0993     amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
0994                   (hub->ctx_addr_distance * vmid),
0995                   lower_32_bits(pd_addr));
0996
0997     amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
0998                   (hub->ctx_addr_distance * vmid),
0999                   upper_32_bits(pd_addr));
1000
1001     amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
1002                         hub->eng_distance * eng,
1003                         hub->vm_inv_eng0_ack +
1004                         hub->eng_distance * eng,
1005                         req, 1 << vmid);
1006
1007     /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
1008     if (use_semaphore)
1009         /*
1010          * add semaphore release after invalidation,
1011          * write with 0 means semaphore release
1012          */
1013         amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
1014                       hub->eng_distance * eng, 0);
1015
1016     return pd_addr;
1017 }
1018
1019 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
1020                     unsigned pasid)
1021 {
1022     struct amdgpu_device *adev = ring->adev;
1023     uint32_t reg;
1024
1025     /* Do nothing because there's no lut register for mmhub1. */
1026     if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
1027         return;
1028
1029     if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
1030         reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
1031     else
1032         reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
1033
1034     amdgpu_ring_emit_wreg(ring, reg, pasid);
1035 }
1036
1037 /*
1038  * PTE format on VEGA 10:
1039  * 63:59 reserved
1040  * 58:57 mtype
1041  * 56 F
1042  * 55 L
1043  * 54 P
1044  * 53 SW
1045  * 52 T
1046  * 50:48 reserved
1047  * 47:12 4k physical page base address
1048  * 11:7 fragment
1049  * 6 write
1050  * 5 read
1051  * 4 exe
1052  * 3 Z
1053  * 2 snooped
1054  * 1 system
1055  * 0 valid
1056  *
1057  * PDE format on VEGA 10:
1058  * 63:59 block fragment size
1059  * 58:55 reserved
1060  * 54 P
1061  * 53:48 reserved
1062  * 47:6 physical base address of PD or PTE
1063  * 5:3 reserved
1064  * 2 C
1065  * 1 system
1066  * 0 valid
1067  */
1068
1069 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1070
1071 {
1072     switch (flags) {
1073     case AMDGPU_VM_MTYPE_DEFAULT:
1074         return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1075     case AMDGPU_VM_MTYPE_NC:
1076         return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1077     case AMDGPU_VM_MTYPE_WC:
1078         return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1079     case AMDGPU_VM_MTYPE_RW:
1080         return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1081     case AMDGPU_VM_MTYPE_CC:
1082         return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1083     case AMDGPU_VM_MTYPE_UC:
1084         return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1085     default:
1086         return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1087     }
1088 }
1089
1090 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1091                 uint64_t *addr, uint64_t *flags)
1092 {
1093     if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1094         *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
1095     BUG_ON(*addr & 0xFFFF00000000003FULL);
1096
1097     if (!adev->gmc.translate_further)
1098         return;
1099
1100     if (level == AMDGPU_VM_PDB1) {
1101         /* Set the block fragment size */
1102         if (!(*flags & AMDGPU_PDE_PTE))
1103             *flags |= AMDGPU_PDE_BFS(0x9);
1104
1105     } else if (level == AMDGPU_VM_PDB0) {
1106         if (*flags & AMDGPU_PDE_PTE) {
1107             *flags &= ~AMDGPU_PDE_PTE;
1108             if (!(*flags & AMDGPU_PTE_VALID))
1109                 *addr |= 1 << PAGE_SHIFT;
1110         } else {
1111             *flags |= AMDGPU_PTE_TF;
1112         }
1113     }
1114 }
1115
1116 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1117                 struct amdgpu_bo_va_mapping *mapping,
1118                 uint64_t *flags)
1119 {
1120     *flags &= ~AMDGPU_PTE_EXECUTABLE;
1121     *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1122
1123     *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1124     *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1125
1126     if (mapping->flags & AMDGPU_PTE_PRT) {
1127         *flags |= AMDGPU_PTE_PRT;
1128         *flags &= ~AMDGPU_PTE_VALID;
1129     }
1130
1131     if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1132          adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
1133         !(*flags & AMDGPU_PTE_SYSTEM) &&
1134         mapping->bo_va->is_xgmi)
1135         *flags |= AMDGPU_PTE_SNOOPED;
1136
1137     if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
1138         *flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
1139 }
1140
1141 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1142 {
1143     u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1144     unsigned size;
1145
1146     /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
1147
1148     if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1149         size = AMDGPU_VBIOS_VGA_ALLOCATION;
1150     } else {
1151         u32 viewport;
1152
1153         switch (adev->ip_versions[DCE_HWIP][0]) {
1154         case IP_VERSION(1, 0, 0):
1155         case IP_VERSION(1, 0, 1):
1156             viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1157             size = (REG_GET_FIELD(viewport,
1158                           HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1159                 REG_GET_FIELD(viewport,
1160                           HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1161                 4);
1162             break;
1163         case IP_VERSION(2, 1, 0):
1164             viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
1165             size = (REG_GET_FIELD(viewport,
1166                           HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1167                 REG_GET_FIELD(viewport,
1168                           HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1169                 4);
1170             break;
1171         default:
1172             viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1173             size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1174                 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1175                 4);
1176             break;
1177         }
1178     }
1179
1180     return size;
1181 }
1182
1183 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1184     .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1185     .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1186     .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1187     .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1188     .map_mtype = gmc_v9_0_map_mtype,
1189     .get_vm_pde = gmc_v9_0_get_vm_pde,
1190     .get_vm_pte = gmc_v9_0_get_vm_pte,
1191     .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1192 };
1193
1194 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1195 {
1196     adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1197 }
1198
1199 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1200 {
1201     switch (adev->ip_versions[UMC_HWIP][0]) {
1202     case IP_VERSION(6, 0, 0):
1203         adev->umc.funcs = &umc_v6_0_funcs;
1204         break;
1205     case IP_VERSION(6, 1, 1):
1206         adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1207         adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1208         adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1209         adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1210         adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1211         adev->umc.ras = &umc_v6_1_ras;
1212         break;
1213     case IP_VERSION(6, 1, 2):
1214         adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1215         adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1216         adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1217         adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1218         adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1219         adev->umc.ras = &umc_v6_1_ras;
1220         break;
1221     case IP_VERSION(6, 7, 0):
1222         adev->umc.max_ras_err_cnt_per_query =
1223             UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL;
1224         adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
1225         adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
1226         adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
1227         if (!adev->gmc.xgmi.connected_to_cpu)
1228             adev->umc.ras = &umc_v6_7_ras;
1229         if (1 & adev->smuio.funcs->get_die_id(adev))
1230             adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
1231         else
1232             adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
1233         break;
1234     default:
1235         break;
1236     }
1237
1238     if (adev->umc.ras) {
1239         amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
1240
1241         strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
1242         adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
1243         adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
1244         adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
1245
1246         /* If don't define special ras_late_init function, use default ras_late_init */
1247         if (!adev->umc.ras->ras_block.ras_late_init)
1248                 adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
1249
1250         /* If not defined special ras_cb function, use default ras_cb */
1251         if (!adev->umc.ras->ras_block.ras_cb)
1252             adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
1253     }
1254 }
1255
1256 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1257 {
1258     switch (adev->ip_versions[MMHUB_HWIP][0]) {
1259     case IP_VERSION(9, 4, 1):
1260         adev->mmhub.funcs = &mmhub_v9_4_funcs;
1261         break;
1262     case IP_VERSION(9, 4, 2):
1263         adev->mmhub.funcs = &mmhub_v1_7_funcs;
1264         break;
1265     default:
1266         adev->mmhub.funcs = &mmhub_v1_0_funcs;
1267         break;
1268     }
1269 }
1270
1271 static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
1272 {
1273     switch (adev->ip_versions[MMHUB_HWIP][0]) {
1274     case IP_VERSION(9, 4, 0):
1275         adev->mmhub.ras = &mmhub_v1_0_ras;
1276         break;
1277     case IP_VERSION(9, 4, 1):
1278         adev->mmhub.ras = &mmhub_v9_4_ras;
1279         break;
1280     case IP_VERSION(9, 4, 2):
1281         adev->mmhub.ras = &mmhub_v1_7_ras;
1282         break;
1283     default:
1284         /* mmhub ras is not available */
1285         break;
1286     }
1287
1288     if (adev->mmhub.ras) {
1289         amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block);
1290
1291         strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub");
1292         adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
1293         adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
1294         adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm;
1295     }
1296 }
1297
1298 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1299 {
1300     adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1301 }
1302
1303 static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
1304 {
1305     adev->hdp.ras = &hdp_v4_0_ras;
1306     amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block);
1307     adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm;
1308 }
1309
1310 static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
1311 {
1312     /* is UMC the right IP to check for MCA?  Maybe DF? */
1313     switch (adev->ip_versions[UMC_HWIP][0]) {
1314     case IP_VERSION(6, 7, 0):
1315         if (!adev->gmc.xgmi.connected_to_cpu)
1316             adev->mca.funcs = &mca_v3_0_funcs;
1317         break;
1318     default:
1319         break;
1320     }
1321 }
1322
1323 static int gmc_v9_0_early_init(void *handle)
1324 {
1325     int r;
1326     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1327
1328     /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
1329     if (adev->asic_type == CHIP_VEGA20 ||
1330         adev->asic_type == CHIP_ARCTURUS)
1331         adev->gmc.xgmi.supported = true;
1332
1333     if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
1334         adev->gmc.xgmi.supported = true;
1335         adev->gmc.xgmi.connected_to_cpu =
1336             adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
1337     }
1338
1339     gmc_v9_0_set_gmc_funcs(adev);
1340     gmc_v9_0_set_irq_funcs(adev);
1341     gmc_v9_0_set_umc_funcs(adev);
1342     gmc_v9_0_set_mmhub_funcs(adev);
1343     gmc_v9_0_set_mmhub_ras_funcs(adev);
1344     gmc_v9_0_set_gfxhub_funcs(adev);
1345     gmc_v9_0_set_hdp_ras_funcs(adev);
1346     gmc_v9_0_set_mca_funcs(adev);
1347
1348     adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1349     adev->gmc.shared_aperture_end =
1350         adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1351     adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1352     adev->gmc.private_aperture_end =
1353         adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1354
1355     r = amdgpu_gmc_ras_early_init(adev);
1356     if (r)
1357         return r;
1358
1359     return 0;
1360 }
1361
1362 static int gmc_v9_0_late_init(void *handle)
1363 {
1364     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1365     int r;
1366
1367     r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1368     if (r)
1369         return r;
1370
1371     /*
1372      * Workaround performance drop issue with VBIOS enables partial
1373      * writes, while disables HBM ECC for vega10.
1374      */
1375     if (!amdgpu_sriov_vf(adev) &&
1376         (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) {
1377         if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1378             if (adev->df.funcs &&
1379                 adev->df.funcs->enable_ecc_force_par_wr_rmw)
1380                 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1381         }
1382     }
1383
1384     if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
1385         if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
1386             adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
1387             adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
1388
1389         if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops &&
1390             adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
1391             adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev);
1392     }
1393
1394     r = amdgpu_gmc_ras_late_init(adev);
1395     if (r)
1396         return r;
1397
1398     return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1399 }
1400
1401 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1402                     struct amdgpu_gmc *mc)
1403 {
1404     u64 base = adev->mmhub.funcs->get_fb_location(adev);
1405
1406     /* add the xgmi offset of the physical node */
1407     base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1408     if (adev->gmc.xgmi.connected_to_cpu) {
1409         amdgpu_gmc_sysvm_location(adev, mc);
1410     } else {
1411         amdgpu_gmc_vram_location(adev, mc, base);
1412         amdgpu_gmc_gart_location(adev, mc);
1413         amdgpu_gmc_agp_location(adev, mc);
1414     }
1415     /* base offset of vram pages */
1416     adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1417
1418     /* XXX: add the xgmi offset of the physical node? */
1419     adev->vm_manager.vram_base_offset +=
1420         adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1421 }
1422
1423 /**
1424  * gmc_v9_0_mc_init - initialize the memory controller driver params
1425  *
1426  * @adev: amdgpu_device pointer
1427  *
1428  * Look up the amount of vram, vram width, and decide how to place
1429  * vram and gart within the GPU's physical address space.
1430  * Returns 0 for success.
1431  */
1432 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1433 {
1434     int r;
1435
1436     /* size in MB on si */
1437     adev->gmc.mc_vram_size =
1438         adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1439     adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1440
1441     if (!(adev->flags & AMD_IS_APU) &&
1442         !adev->gmc.xgmi.connected_to_cpu) {
1443         r = amdgpu_device_resize_fb_bar(adev);
1444         if (r)
1445             return r;
1446     }
1447     adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1448     adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1449
1450 #ifdef CONFIG_X86_64
1451     /*
1452      * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi
1453      * interface can use VRAM through here as it appears system reserved
1454      * memory in host address space.
1455      *
1456      * For APUs, VRAM is just the stolen system memory and can be accessed
1457      * directly.
1458      *
1459      * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
1460      */
1461
1462     /* check whether both host-gpu and gpu-gpu xgmi links exist */
1463     if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
1464         (adev->gmc.xgmi.supported &&
1465          adev->gmc.xgmi.connected_to_cpu)) {
1466         adev->gmc.aper_base =
1467             adev->gfxhub.funcs->get_mc_fb_offset(adev) +
1468             adev->gmc.xgmi.physical_node_id *
1469             adev->gmc.xgmi.node_segment_size;
1470         adev->gmc.aper_size = adev->gmc.real_vram_size;
1471     }
1472
1473 #endif
1474     /* In case the PCI BAR is larger than the actual amount of vram */
1475     adev->gmc.visible_vram_size = adev->gmc.aper_size;
1476     if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1477         adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1478
1479     /* set the gart size */
1480     if (amdgpu_gart_size == -1) {
1481         switch (adev->ip_versions[GC_HWIP][0]) {
1482         case IP_VERSION(9, 0, 1):  /* all engines support GPUVM */
1483         case IP_VERSION(9, 2, 1):  /* all engines support GPUVM */
1484         case IP_VERSION(9, 4, 0):
1485         case IP_VERSION(9, 4, 1):
1486         case IP_VERSION(9, 4, 2):
1487         default:
1488             adev->gmc.gart_size = 512ULL << 20;
1489             break;
1490         case IP_VERSION(9, 1, 0):   /* DCE SG support */
1491         case IP_VERSION(9, 2, 2):   /* DCE SG support */
1492         case IP_VERSION(9, 3, 0):
1493             adev->gmc.gart_size = 1024ULL << 20;
1494             break;
1495         }
1496     } else {
1497         adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1498     }
1499
1500     adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
1501
1502     gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1503
1504     return 0;
1505 }
1506
1507 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1508 {
1509     int r;
1510
1511     if (adev->gart.bo) {
1512         WARN(1, "VEGA10 PCIE GART already initialized\n");
1513         return 0;
1514     }
1515
1516     if (adev->gmc.xgmi.connected_to_cpu) {
1517         adev->gmc.vmid0_page_table_depth = 1;
1518         adev->gmc.vmid0_page_table_block_size = 12;
1519     } else {
1520         adev->gmc.vmid0_page_table_depth = 0;
1521         adev->gmc.vmid0_page_table_block_size = 0;
1522     }
1523
1524     /* Initialize common gart structure */
1525     r = amdgpu_gart_init(adev);
1526     if (r)
1527         return r;
1528     adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1529     adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1530                  AMDGPU_PTE_EXECUTABLE;
1531
1532     r = amdgpu_gart_table_vram_alloc(adev);
1533     if (r)
1534         return r;
1535
1536     if (adev->gmc.xgmi.connected_to_cpu) {
1537         r = amdgpu_gmc_pdb0_alloc(adev);
1538     }
1539
1540     return r;
1541 }
1542
1543 /**
1544  * gmc_v9_0_save_registers - saves regs
1545  *
1546  * @adev: amdgpu_device pointer
1547  *
1548  * This saves potential register values that should be
1549  * restored upon resume
1550  */
1551 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1552 {
1553     if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
1554         (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1)))
1555         adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1556 }
1557
1558 static int gmc_v9_0_sw_init(void *handle)
1559 {
1560     int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
1561     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1562
1563     adev->gfxhub.funcs->init(adev);
1564
1565     adev->mmhub.funcs->init(adev);
1566     if (adev->mca.funcs)
1567         adev->mca.funcs->init(adev);
1568
1569     spin_lock_init(&adev->gmc.invalidate_lock);
1570
1571     r = amdgpu_atomfirmware_get_vram_info(adev,
1572         &vram_width, &vram_type, &vram_vendor);
1573     if (amdgpu_sriov_vf(adev))
1574         /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1575          * and DF related registers is not readable, seems hardcord is the
1576          * only way to set the correct vram_width
1577          */
1578         adev->gmc.vram_width = 2048;
1579     else if (amdgpu_emu_mode != 1)
1580         adev->gmc.vram_width = vram_width;
1581
1582     if (!adev->gmc.vram_width) {
1583         int chansize, numchan;
1584
1585         /* hbm memory channel size */
1586         if (adev->flags & AMD_IS_APU)
1587             chansize = 64;
1588         else
1589             chansize = 128;
1590         if (adev->df.funcs &&
1591             adev->df.funcs->get_hbm_channel_number) {
1592             numchan = adev->df.funcs->get_hbm_channel_number(adev);
1593             adev->gmc.vram_width = numchan * chansize;
1594         }
1595     }
1596
1597     adev->gmc.vram_type = vram_type;
1598     adev->gmc.vram_vendor = vram_vendor;
1599     switch (adev->ip_versions[GC_HWIP][0]) {
1600     case IP_VERSION(9, 1, 0):
1601     case IP_VERSION(9, 2, 2):
1602         adev->num_vmhubs = 2;
1603
1604         if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1605             amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1606         } else {
1607             /* vm_size is 128TB + 512GB for legacy 3-level page support */
1608             amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1609             adev->gmc.translate_further =
1610                 adev->vm_manager.num_level > 1;
1611         }
1612         break;
1613     case IP_VERSION(9, 0, 1):
1614     case IP_VERSION(9, 2, 1):
1615     case IP_VERSION(9, 4, 0):
1616     case IP_VERSION(9, 3, 0):
1617     case IP_VERSION(9, 4, 2):
1618         adev->num_vmhubs = 2;
1619
1620
1621         /*
1622          * To fulfill 4-level page support,
1623          * vm size is 256TB (48bit), maximum size of Vega10,
1624          * block size 512 (9bit)
1625          */
1626         /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1627         if (amdgpu_sriov_vf(adev))
1628             amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1629         else
1630             amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1631         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
1632             adev->gmc.translate_further = adev->vm_manager.num_level > 1;
1633         break;
1634     case IP_VERSION(9, 4, 1):
1635         adev->num_vmhubs = 3;
1636
1637         /* Keep the vm size same with Vega20 */
1638         amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1639         adev->gmc.translate_further = adev->vm_manager.num_level > 1;
1640         break;
1641     default:
1642         break;
1643     }
1644
1645     /* This interrupt is VMC page fault.*/
1646     r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1647                 &adev->gmc.vm_fault);
1648     if (r)
1649         return r;
1650
1651     if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
1652         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1653                     &adev->gmc.vm_fault);
1654         if (r)
1655             return r;
1656     }
1657
1658     r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1659                 &adev->gmc.vm_fault);
1660
1661     if (r)
1662         return r;
1663
1664     if (!amdgpu_sriov_vf(adev) &&
1665         !adev->gmc.xgmi.connected_to_cpu) {
1666         /* interrupt sent to DF. */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1668                       &adev->gmc.ecc_irq);
1669         if (r)
1670             return r;
1671     }
1672
1673     /* Set the internal MC address mask
1674      * This is the max address of the GPU's
1675      * internal address space.
1676      */
1677     adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1678
1679     dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44;
1680     r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
1681     if (r) {
1682         printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1683         return r;
1684     }
1685     adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
1686
1687     r = gmc_v9_0_mc_init(adev);
1688     if (r)
1689         return r;
1690
1691     amdgpu_gmc_get_vbios_allocations(adev);
1692
1693     /* Memory manager */
1694     r = amdgpu_bo_init(adev);
1695     if (r)
1696         return r;
1697
1698     r = gmc_v9_0_gart_init(adev);
1699     if (r)
1700         return r;
1701
1702     /*
1703      * number of VMs
1704      * VMID 0 is reserved for System
1705      * amdgpu graphics/compute will use VMIDs 1..n-1
1706      * amdkfd will use VMIDs n..15
1707      *
1708      * The first KFD VMID is 8 for GPUs with graphics, 3 for
1709      * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1710      * for video processing.
1711      */
1712     adev->vm_manager.first_kfd_vmid =
1713         (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1714          adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) ? 3 : 8;
1715
1716     amdgpu_vm_manager_init(adev);
1717
1718     gmc_v9_0_save_registers(adev);
1719
1720     return 0;
1721 }
1722
1723 static int gmc_v9_0_sw_fini(void *handle)
1724 {
1725     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1726
1727     amdgpu_gmc_ras_fini(adev);
1728     amdgpu_gem_force_release(adev);
1729     amdgpu_vm_manager_fini(adev);
1730     amdgpu_gart_table_vram_free(adev);
1731     amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
1732     amdgpu_bo_fini(adev);
1733
1734     return 0;
1735 }
1736
1737 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1738 {
1739
1740     switch (adev->ip_versions[MMHUB_HWIP][0]) {
1741     case IP_VERSION(9, 0, 0):
1742         if (amdgpu_sriov_vf(adev))
1743             break;
1744         fallthrough;
1745     case IP_VERSION(9, 4, 0):
1746         soc15_program_register_sequence(adev,
1747                         golden_settings_mmhub_1_0_0,
1748                         ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1749         soc15_program_register_sequence(adev,
1750                         golden_settings_athub_1_0_0,
1751                         ARRAY_SIZE(golden_settings_athub_1_0_0));
1752         break;
1753     case IP_VERSION(9, 1, 0):
1754     case IP_VERSION(9, 2, 0):
1755         /* TODO for renoir */
1756         soc15_program_register_sequence(adev,
1757                         golden_settings_athub_1_0_0,
1758                         ARRAY_SIZE(golden_settings_athub_1_0_0));
1759         break;
1760     default:
1761         break;
1762     }
1763 }
1764
1765 /**
1766  * gmc_v9_0_restore_registers - restores regs
1767  *
1768  * @adev: amdgpu_device pointer
1769  *
1770  * This restores register values, saved at suspend.
1771  */
1772 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1773 {
1774     if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
1775         (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) {
1776         WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1777         WARN_ON(adev->gmc.sdpif_register !=
1778             RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1779     }
1780 }
1781
1782 /**
1783  * gmc_v9_0_gart_enable - gart enable
1784  *
1785  * @adev: amdgpu_device pointer
1786  */
1787 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1788 {
1789     int r;
1790
1791     if (adev->gmc.xgmi.connected_to_cpu)
1792         amdgpu_gmc_init_pdb0(adev);
1793
1794     if (adev->gart.bo == NULL) {
1795         dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1796         return -EINVAL;
1797     }
1798
1799     amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
1800     r = adev->gfxhub.funcs->gart_enable(adev);
1801     if (r)
1802         return r;
1803
1804     r = adev->mmhub.funcs->gart_enable(adev);
1805     if (r)
1806         return r;
1807
1808     DRM_INFO("PCIE GART of %uM enabled.\n",
1809          (unsigned)(adev->gmc.gart_size >> 20));
1810     if (adev->gmc.pdb0_bo)
1811         DRM_INFO("PDB0 located at 0x%016llX\n",
1812                 (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
1813     DRM_INFO("PTB located at 0x%016llX\n",
1814             (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1815
1816     return 0;
1817 }
1818
1819 static int gmc_v9_0_hw_init(void *handle)
1820 {
1821     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1822     bool value;
1823     int i, r;
1824
1825     /* The sequence of these two function calls matters.*/
1826     gmc_v9_0_init_golden_registers(adev);
1827
1828     if (adev->mode_info.num_crtc) {
1829         /* Lockout access through VGA aperture*/
1830         WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1831         /* disable VGA render */
1832         WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1833     }
1834
1835     if (adev->mmhub.funcs->update_power_gating)
1836         adev->mmhub.funcs->update_power_gating(adev, true);
1837
1838     adev->hdp.funcs->init_registers(adev);
1839
1840     /* After HDP is initialized, flush HDP.*/
1841     adev->hdp.funcs->flush_hdp(adev, NULL);
1842
1843     if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1844         value = false;
1845     else
1846         value = true;
1847
1848     if (!amdgpu_sriov_vf(adev)) {
1849         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1850         adev->mmhub.funcs->set_fault_enable_default(adev, value);
1851     }
1852     for (i = 0; i < adev->num_vmhubs; ++i)
1853         gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1854
1855     if (adev->umc.funcs && adev->umc.funcs->init_registers)
1856         adev->umc.funcs->init_registers(adev);
1857
1858     r = gmc_v9_0_gart_enable(adev);
1859     if (r)
1860         return r;
1861
1862     if (amdgpu_emu_mode == 1)
1863         return amdgpu_gmc_vram_checking(adev);
1864     else
1865         return r;
1866 }
1867
1868 /**
1869  * gmc_v9_0_gart_disable - gart disable
1870  *
1871  * @adev: amdgpu_device pointer
1872  *
1873  * This disables all VM page table.
1874  */
1875 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1876 {
1877     adev->gfxhub.funcs->gart_disable(adev);
1878     adev->mmhub.funcs->gart_disable(adev);
1879 }
1880
1881 static int gmc_v9_0_hw_fini(void *handle)
1882 {
1883     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1884
1885     gmc_v9_0_gart_disable(adev);
1886
1887     if (amdgpu_sriov_vf(adev)) {
1888         /* full access mode, so don't touch any GMC register */
1889         DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1890         return 0;
1891     }
1892
1893     /*
1894      * Pair the operations did in gmc_v9_0_hw_init and thus maintain
1895      * a correct cached state for GMC. Otherwise, the "gate" again
1896      * operation on S3 resuming will fail due to wrong cached state.
1897      */
1898     if (adev->mmhub.funcs->update_power_gating)
1899         adev->mmhub.funcs->update_power_gating(adev, false);
1900
1901     amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1902     amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1903
1904     return 0;
1905 }
1906
1907 static int gmc_v9_0_suspend(void *handle)
1908 {
1909     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1910
1911     return gmc_v9_0_hw_fini(adev);
1912 }
1913
1914 static int gmc_v9_0_resume(void *handle)
1915 {
1916     int r;
1917     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1918
1919     r = gmc_v9_0_hw_init(adev);
1920     if (r)
1921         return r;
1922
1923     amdgpu_vmid_reset_all(adev);
1924
1925     return 0;
1926 }
1927
1928 static bool gmc_v9_0_is_idle(void *handle)
1929 {
1930     /* MC is always ready in GMC v9.*/
1931     return true;
1932 }
1933
1934 static int gmc_v9_0_wait_for_idle(void *handle)
1935 {
1936     /* There is no need to wait for MC idle in GMC v9.*/
1937     return 0;
1938 }
1939
1940 static int gmc_v9_0_soft_reset(void *handle)
1941 {
1942     /* XXX for emulation.*/
1943     return 0;
1944 }
1945
1946 static int gmc_v9_0_set_clockgating_state(void *handle,
1947                     enum amd_clockgating_state state)
1948 {
1949     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1950
1951     adev->mmhub.funcs->set_clockgating(adev, state);
1952
1953     athub_v1_0_set_clockgating(adev, state);
1954
1955     return 0;
1956 }
1957
1958 static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
1959 {
1960     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1961
1962     adev->mmhub.funcs->get_clockgating(adev, flags);
1963
1964     athub_v1_0_get_clockgating(adev, flags);
1965 }
1966
1967 static int gmc_v9_0_set_powergating_state(void *handle,
1968                     enum amd_powergating_state state)
1969 {
1970     return 0;
1971 }
1972
1973 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1974     .name = "gmc_v9_0",
1975     .early_init = gmc_v9_0_early_init,
1976     .late_init = gmc_v9_0_late_init,
1977     .sw_init = gmc_v9_0_sw_init,
1978     .sw_fini = gmc_v9_0_sw_fini,
1979     .hw_init = gmc_v9_0_hw_init,
1980     .hw_fini = gmc_v9_0_hw_fini,
1981     .suspend = gmc_v9_0_suspend,
1982     .resume = gmc_v9_0_resume,
1983     .is_idle = gmc_v9_0_is_idle,
1984     .wait_for_idle = gmc_v9_0_wait_for_idle,
1985     .soft_reset = gmc_v9_0_soft_reset,
1986     .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1987     .set_powergating_state = gmc_v9_0_set_powergating_state,
1988     .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1989 };
1990
1991 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1992 {
1993     .type = AMD_IP_BLOCK_TYPE_GMC,
1994     .major = 9,
1995     .minor = 0,
1996     .rev = 0,
1997     .funcs = &gmc_v9_0_ip_funcs,
1998 };