Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2018 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  *
0023  */
0024 #ifndef _AMDGPU_RAS_H
0025 #define _AMDGPU_RAS_H
0026 
0027 #include <linux/debugfs.h>
0028 #include <linux/list.h>
0029 #include "ta_ras_if.h"
0030 #include "amdgpu_ras_eeprom.h"
0031 
0032 struct amdgpu_iv_entry;
0033 
0034 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS       (0x1 << 0)
0035 
0036 enum amdgpu_ras_block {
0037     AMDGPU_RAS_BLOCK__UMC = 0,
0038     AMDGPU_RAS_BLOCK__SDMA,
0039     AMDGPU_RAS_BLOCK__GFX,
0040     AMDGPU_RAS_BLOCK__MMHUB,
0041     AMDGPU_RAS_BLOCK__ATHUB,
0042     AMDGPU_RAS_BLOCK__PCIE_BIF,
0043     AMDGPU_RAS_BLOCK__HDP,
0044     AMDGPU_RAS_BLOCK__XGMI_WAFL,
0045     AMDGPU_RAS_BLOCK__DF,
0046     AMDGPU_RAS_BLOCK__SMN,
0047     AMDGPU_RAS_BLOCK__SEM,
0048     AMDGPU_RAS_BLOCK__MP0,
0049     AMDGPU_RAS_BLOCK__MP1,
0050     AMDGPU_RAS_BLOCK__FUSE,
0051     AMDGPU_RAS_BLOCK__MCA,
0052     AMDGPU_RAS_BLOCK__VCN,
0053     AMDGPU_RAS_BLOCK__JPEG,
0054 
0055     AMDGPU_RAS_BLOCK__LAST
0056 };
0057 
0058 enum amdgpu_ras_mca_block {
0059     AMDGPU_RAS_MCA_BLOCK__MP0 = 0,
0060     AMDGPU_RAS_MCA_BLOCK__MP1,
0061     AMDGPU_RAS_MCA_BLOCK__MPIO,
0062     AMDGPU_RAS_MCA_BLOCK__IOHC,
0063 
0064     AMDGPU_RAS_MCA_BLOCK__LAST
0065 };
0066 
0067 #define AMDGPU_RAS_BLOCK_COUNT  AMDGPU_RAS_BLOCK__LAST
0068 #define AMDGPU_RAS_MCA_BLOCK_COUNT  AMDGPU_RAS_MCA_BLOCK__LAST
0069 #define AMDGPU_RAS_BLOCK_MASK   ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
0070 
0071 enum amdgpu_ras_gfx_subblock {
0072     /* CPC */
0073     AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
0074     AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
0075         AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
0076     AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
0077     AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
0078     AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
0079     AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
0080     AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
0081     AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
0082     AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
0083     AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
0084         AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
0085     /* CPF */
0086     AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
0087     AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
0088         AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
0089     AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
0090     AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
0091     AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
0092     /* CPG */
0093     AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
0094     AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
0095         AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
0096     AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
0097     AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
0098     AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
0099     /* GDS */
0100     AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
0101     AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
0102     AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
0103     AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
0104     AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
0105     AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
0106     AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
0107         AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
0108     /* SPI */
0109     AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
0110     /* SQ */
0111     AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
0112     AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
0113     AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
0114     AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
0115     AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
0116     AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
0117     /* SQC (3 ranges) */
0118     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
0119     /* SQC range 0 */
0120     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
0121         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
0122     AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
0123         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
0124     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
0125     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
0126     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
0127     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
0128     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
0129     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
0130     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
0131         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
0132     /* SQC range 1 */
0133     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
0134     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
0135         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
0136     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
0137     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
0138     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
0139     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
0140     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
0141     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
0142     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
0143     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
0144     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
0145         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
0146     /* SQC range 2 */
0147     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
0148     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
0149         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
0150     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
0151     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
0152     AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
0153     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
0154     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
0155     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
0156     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
0157     AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
0158     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
0159         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
0160     AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
0161         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
0162     /* TA */
0163     AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
0164     AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
0165         AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
0166     AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
0167     AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
0168     AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
0169     AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
0170     AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
0171     /* TCA */
0172     AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
0173     AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
0174         AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
0175     AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
0176     AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
0177         AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
0178     /* TCC (5 sub-ranges) */
0179     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
0180     /* TCC range 0 */
0181     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
0182         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
0183     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
0184         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
0185     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
0186     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
0187     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
0188     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
0189     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
0190     AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
0191     AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
0192     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
0193         AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
0194     /* TCC range 1 */
0195     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
0196     AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
0197         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
0198     AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
0199     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
0200         AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
0201     /* TCC range 2 */
0202     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
0203     AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
0204         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
0205     AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
0206     AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
0207     AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
0208     AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
0209     AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
0210     AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
0211     AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
0212     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
0213         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
0214     /* TCC range 3 */
0215     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
0216     AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
0217         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
0218     AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
0219     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
0220         AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
0221     /* TCC range 4 */
0222     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
0223     AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
0224         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
0225     AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
0226     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
0227         AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
0228     AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
0229         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
0230     /* TCI */
0231     AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
0232     /* TCP */
0233     AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
0234     AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
0235         AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
0236     AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
0237     AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
0238     AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
0239     AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
0240     AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
0241     AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
0242     AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
0243         AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
0244     /* TD */
0245     AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
0246     AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
0247         AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
0248     AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
0249     AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
0250     AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
0251     /* EA (3 sub-ranges) */
0252     AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
0253     /* EA range 0 */
0254     AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
0255         AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
0256     AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
0257         AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
0258     AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
0259     AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
0260     AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
0261     AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
0262     AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
0263     AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
0264     AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
0265     AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
0266         AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
0267     /* EA range 1 */
0268     AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
0269     AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
0270         AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
0271     AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
0272     AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
0273     AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
0274     AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
0275     AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
0276     AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
0277     AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
0278         AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
0279     /* EA range 2 */
0280     AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
0281     AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
0282         AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
0283     AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
0284     AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
0285     AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
0286     AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
0287         AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
0288     AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
0289         AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
0290     /* UTC VM L2 bank */
0291     AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
0292     /* UTC VM walker */
0293     AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
0294     /* UTC ATC L2 2MB cache */
0295     AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
0296     /* UTC ATC L2 4KB cache */
0297     AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
0298     AMDGPU_RAS_BLOCK__GFX_MAX
0299 };
0300 
0301 enum amdgpu_ras_error_type {
0302     AMDGPU_RAS_ERROR__NONE                          = 0,
0303     AMDGPU_RAS_ERROR__PARITY                        = 1,
0304     AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE                    = 2,
0305     AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE                   = 4,
0306     AMDGPU_RAS_ERROR__POISON                        = 8,
0307 };
0308 
0309 enum amdgpu_ras_ret {
0310     AMDGPU_RAS_SUCCESS = 0,
0311     AMDGPU_RAS_FAIL,
0312     AMDGPU_RAS_UE,
0313     AMDGPU_RAS_CE,
0314     AMDGPU_RAS_PT,
0315 };
0316 
0317 struct ras_common_if {
0318     enum amdgpu_ras_block block;
0319     enum amdgpu_ras_error_type type;
0320     uint32_t sub_block_index;
0321     char name[32];
0322 };
0323 
0324 #define MAX_UMC_CHANNEL_NUM 32
0325 
0326 struct ecc_info_per_ch {
0327     uint16_t ce_count_lo_chip;
0328     uint16_t ce_count_hi_chip;
0329     uint64_t mca_umc_status;
0330     uint64_t mca_umc_addr;
0331     uint64_t mca_ceumc_addr;
0332 };
0333 
0334 struct umc_ecc_info {
0335     struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
0336 
0337     /* Determine smu ecctable whether support
0338      * record correctable error address
0339      */
0340     int record_ce_addr_supported;
0341 };
0342 
0343 struct amdgpu_ras {
0344     /* ras infrastructure */
0345     /* for ras itself. */
0346     uint32_t features;
0347     struct list_head head;
0348     /* sysfs */
0349     struct device_attribute features_attr;
0350     struct bin_attribute badpages_attr;
0351     struct dentry *de_ras_eeprom_table;
0352     /* block array */
0353     struct ras_manager *objs;
0354 
0355     /* gpu recovery */
0356     struct work_struct recovery_work;
0357     atomic_t in_recovery;
0358     struct amdgpu_device *adev;
0359     /* error handler data */
0360     struct ras_err_handler_data *eh_data;
0361     struct mutex recovery_lock;
0362 
0363     uint32_t flags;
0364     bool reboot;
0365     struct amdgpu_ras_eeprom_control eeprom_control;
0366 
0367     bool error_query_ready;
0368 
0369     /* bad page count threshold */
0370     uint32_t bad_page_cnt_threshold;
0371 
0372     /* disable ras error count harvest in recovery */
0373     bool disable_ras_err_cnt_harvest;
0374 
0375     /* is poison mode supported */
0376     bool poison_supported;
0377 
0378     /* RAS count errors delayed work */
0379     struct delayed_work ras_counte_delay_work;
0380     atomic_t ras_ue_count;
0381     atomic_t ras_ce_count;
0382 
0383     /* record umc error info queried from smu */
0384     struct umc_ecc_info umc_ecc;
0385 
0386     /* Indicates smu whether need update bad channel info */
0387     bool update_channel_flag;
0388 };
0389 
0390 struct ras_fs_data {
0391     char sysfs_name[32];
0392     char debugfs_name[32];
0393 };
0394 
0395 struct ras_err_data {
0396     unsigned long ue_count;
0397     unsigned long ce_count;
0398     unsigned long err_addr_cnt;
0399     struct eeprom_table_record *err_addr;
0400 };
0401 
0402 struct ras_err_handler_data {
0403     /* point to bad page records array */
0404     struct eeprom_table_record *bps;
0405     /* the count of entries */
0406     int count;
0407     /* the space can place new entries */
0408     int space_left;
0409 };
0410 
0411 typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
0412         void *err_data,
0413         struct amdgpu_iv_entry *entry);
0414 
0415 struct ras_ih_data {
0416     /* interrupt bottom half */
0417     struct work_struct ih_work;
0418     int inuse;
0419     /* IP callback */
0420     ras_ih_cb cb;
0421     /* full of entries */
0422     unsigned char *ring;
0423     unsigned int ring_size;
0424     unsigned int element_size;
0425     unsigned int aligned_element_size;
0426     unsigned int rptr;
0427     unsigned int wptr;
0428 };
0429 
0430 struct ras_manager {
0431     struct ras_common_if head;
0432     /* reference count */
0433     int use;
0434     /* ras block link */
0435     struct list_head node;
0436     /* the device */
0437     struct amdgpu_device *adev;
0438     /* sysfs */
0439     struct device_attribute sysfs_attr;
0440     int attr_inuse;
0441 
0442     /* fs node name */
0443     struct ras_fs_data fs_data;
0444 
0445     /* IH data */
0446     struct ras_ih_data ih_data;
0447 
0448     struct ras_err_data err_data;
0449 };
0450 
0451 struct ras_badpage {
0452     unsigned int bp;
0453     unsigned int size;
0454     unsigned int flags;
0455 };
0456 
0457 /* interfaces for IP */
0458 struct ras_fs_if {
0459     struct ras_common_if head;
0460     const char* sysfs_name;
0461     char debugfs_name[32];
0462 };
0463 
0464 struct ras_query_if {
0465     struct ras_common_if head;
0466     unsigned long ue_count;
0467     unsigned long ce_count;
0468 };
0469 
0470 struct ras_inject_if {
0471     struct ras_common_if head;
0472     uint64_t address;
0473     uint64_t value;
0474 };
0475 
0476 struct ras_cure_if {
0477     struct ras_common_if head;
0478     uint64_t address;
0479 };
0480 
0481 struct ras_ih_if {
0482     struct ras_common_if head;
0483     ras_ih_cb cb;
0484 };
0485 
0486 struct ras_dispatch_if {
0487     struct ras_common_if head;
0488     struct amdgpu_iv_entry *entry;
0489 };
0490 
0491 struct ras_debug_if {
0492     union {
0493         struct ras_common_if head;
0494         struct ras_inject_if inject;
0495     };
0496     int op;
0497 };
0498 
0499 struct amdgpu_ras_block_object {
0500     struct ras_common_if  ras_comm;
0501 
0502     int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
0503                 enum amdgpu_ras_block block, uint32_t sub_block_index);
0504     int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
0505     void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
0506     ras_ih_cb ras_cb;
0507     const struct amdgpu_ras_block_hw_ops *hw_ops;
0508 };
0509 
0510 struct amdgpu_ras_block_hw_ops {
0511     int  (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
0512     void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
0513     void (*query_ras_error_status)(struct amdgpu_device *adev);
0514     void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
0515     void (*reset_ras_error_count)(struct amdgpu_device *adev);
0516     void (*reset_ras_error_status)(struct amdgpu_device *adev);
0517     bool (*query_poison_status)(struct amdgpu_device *adev);
0518     bool (*handle_poison_consumption)(struct amdgpu_device *adev);
0519 };
0520 
0521 /* work flow
0522  * vbios
0523  * 1: ras feature enable (enabled by default)
0524  * psp
0525  * 2: ras framework init (in ip_init)
0526  * IP
0527  * 3: IH add
0528  * 4: debugfs/sysfs create
0529  * 5: query/inject
0530  * 6: debugfs/sysfs remove
0531  * 7: IH remove
0532  * 8: feature disable
0533  */
0534 
0535 
0536 int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
0537 
0538 void amdgpu_ras_resume(struct amdgpu_device *adev);
0539 void amdgpu_ras_suspend(struct amdgpu_device *adev);
0540 
0541 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
0542                  unsigned long *ce_count,
0543                  unsigned long *ue_count);
0544 
0545 /* error handling functions */
0546 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
0547         struct eeprom_table_record *bps, int pages);
0548 
0549 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
0550 
0551 static inline enum ta_ras_block
0552 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
0553     switch (block) {
0554     case AMDGPU_RAS_BLOCK__UMC:
0555         return TA_RAS_BLOCK__UMC;
0556     case AMDGPU_RAS_BLOCK__SDMA:
0557         return TA_RAS_BLOCK__SDMA;
0558     case AMDGPU_RAS_BLOCK__GFX:
0559         return TA_RAS_BLOCK__GFX;
0560     case AMDGPU_RAS_BLOCK__MMHUB:
0561         return TA_RAS_BLOCK__MMHUB;
0562     case AMDGPU_RAS_BLOCK__ATHUB:
0563         return TA_RAS_BLOCK__ATHUB;
0564     case AMDGPU_RAS_BLOCK__PCIE_BIF:
0565         return TA_RAS_BLOCK__PCIE_BIF;
0566     case AMDGPU_RAS_BLOCK__HDP:
0567         return TA_RAS_BLOCK__HDP;
0568     case AMDGPU_RAS_BLOCK__XGMI_WAFL:
0569         return TA_RAS_BLOCK__XGMI_WAFL;
0570     case AMDGPU_RAS_BLOCK__DF:
0571         return TA_RAS_BLOCK__DF;
0572     case AMDGPU_RAS_BLOCK__SMN:
0573         return TA_RAS_BLOCK__SMN;
0574     case AMDGPU_RAS_BLOCK__SEM:
0575         return TA_RAS_BLOCK__SEM;
0576     case AMDGPU_RAS_BLOCK__MP0:
0577         return TA_RAS_BLOCK__MP0;
0578     case AMDGPU_RAS_BLOCK__MP1:
0579         return TA_RAS_BLOCK__MP1;
0580     case AMDGPU_RAS_BLOCK__FUSE:
0581         return TA_RAS_BLOCK__FUSE;
0582     case AMDGPU_RAS_BLOCK__MCA:
0583         return TA_RAS_BLOCK__MCA;
0584     default:
0585         WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
0586         return TA_RAS_BLOCK__UMC;
0587     }
0588 }
0589 
0590 static inline enum ta_ras_error_type
0591 amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
0592     switch (error) {
0593     case AMDGPU_RAS_ERROR__NONE:
0594         return TA_RAS_ERROR__NONE;
0595     case AMDGPU_RAS_ERROR__PARITY:
0596         return TA_RAS_ERROR__PARITY;
0597     case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
0598         return TA_RAS_ERROR__SINGLE_CORRECTABLE;
0599     case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
0600         return TA_RAS_ERROR__MULTI_UNCORRECTABLE;
0601     case AMDGPU_RAS_ERROR__POISON:
0602         return TA_RAS_ERROR__POISON;
0603     default:
0604         WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error);
0605         return TA_RAS_ERROR__NONE;
0606     }
0607 }
0608 
0609 /* called in ip_init and ip_fini */
0610 int amdgpu_ras_init(struct amdgpu_device *adev);
0611 int amdgpu_ras_late_init(struct amdgpu_device *adev);
0612 int amdgpu_ras_fini(struct amdgpu_device *adev);
0613 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
0614 
0615 int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
0616             struct ras_common_if *ras_block);
0617 
0618 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
0619               struct ras_common_if *ras_block);
0620 
0621 int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
0622         struct ras_common_if *head, bool enable);
0623 
0624 int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
0625         struct ras_common_if *head, bool enable);
0626 
0627 int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
0628         struct ras_common_if *head);
0629 
0630 int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
0631         struct ras_common_if *head);
0632 
0633 void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
0634 
0635 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
0636         struct ras_query_if *info);
0637 
0638 int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
0639         enum amdgpu_ras_block block);
0640 
0641 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
0642         struct ras_inject_if *info);
0643 
0644 int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
0645         struct ras_common_if *head);
0646 
0647 int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
0648         struct ras_common_if *head);
0649 
0650 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
0651         struct ras_dispatch_if *info);
0652 
0653 struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
0654         struct ras_common_if *head);
0655 
0656 extern atomic_t amdgpu_ras_in_intr;
0657 
0658 static inline bool amdgpu_ras_intr_triggered(void)
0659 {
0660     return !!atomic_read(&amdgpu_ras_in_intr);
0661 }
0662 
0663 static inline void amdgpu_ras_intr_cleared(void)
0664 {
0665     atomic_set(&amdgpu_ras_in_intr, 0);
0666 }
0667 
0668 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
0669 
0670 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
0671 
0672 bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
0673 
0674 void amdgpu_release_ras_context(struct amdgpu_device *adev);
0675 
0676 int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
0677 
0678 const char *get_ras_block_str(struct ras_common_if *ras_block);
0679 
0680 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
0681 
0682 int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
0683 
0684 int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
0685 
0686 struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
0687 
0688 int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
0689 
0690 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
0691                 struct amdgpu_ras_block_object *ras_block_obj);
0692 void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
0693 #endif