Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2021 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 #include "amdgpu_ras.h"
0024 #include "amdgpu.h"
0025 #include "amdgpu_mca.h"
0026 
0027 #include "umc/umc_6_7_0_offset.h"
0028 #include "umc/umc_6_7_0_sh_mask.h"
0029 
0030 void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
0031                           uint64_t mc_status_addr,
0032                           unsigned long *error_count)
0033 {
0034     uint64_t mc_status = RREG64_PCIE(mc_status_addr);
0035 
0036     if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0037         REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
0038         *error_count += 1;
0039 }
0040 
0041 void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
0042                         uint64_t mc_status_addr,
0043                         unsigned long *error_count)
0044 {
0045     uint64_t mc_status = RREG64_PCIE(mc_status_addr);
0046 
0047     if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
0048         (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
0049         REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0050         REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
0051         REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
0052         REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
0053         *error_count += 1;
0054 }
0055 
0056 void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
0057                   uint64_t mc_status_addr)
0058 {
0059     WREG64_PCIE(mc_status_addr, 0x0ULL);
0060 }
0061 
0062 void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
0063                       uint64_t mc_status_addr,
0064                       void *ras_error_status)
0065 {
0066     struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0067 
0068     amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
0069     amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
0070 
0071     amdgpu_mca_reset_error_count(adev, mc_status_addr);
0072 }