0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <linux/module.h>
0023 #include <linux/fdtable.h>
0024 #include <linux/uaccess.h>
0025 #include <linux/firmware.h>
0026 #include "amdgpu.h"
0027 #include "amdgpu_amdkfd.h"
0028 #include "amdgpu_amdkfd_arcturus.h"
0029 #include "sdma0/sdma0_4_2_2_offset.h"
0030 #include "sdma0/sdma0_4_2_2_sh_mask.h"
0031 #include "sdma1/sdma1_4_2_2_offset.h"
0032 #include "sdma1/sdma1_4_2_2_sh_mask.h"
0033 #include "sdma2/sdma2_4_2_2_offset.h"
0034 #include "sdma2/sdma2_4_2_2_sh_mask.h"
0035 #include "sdma3/sdma3_4_2_2_offset.h"
0036 #include "sdma3/sdma3_4_2_2_sh_mask.h"
0037 #include "sdma4/sdma4_4_2_2_offset.h"
0038 #include "sdma4/sdma4_4_2_2_sh_mask.h"
0039 #include "sdma5/sdma5_4_2_2_offset.h"
0040 #include "sdma5/sdma5_4_2_2_sh_mask.h"
0041 #include "sdma6/sdma6_4_2_2_offset.h"
0042 #include "sdma6/sdma6_4_2_2_sh_mask.h"
0043 #include "sdma7/sdma7_4_2_2_offset.h"
0044 #include "sdma7/sdma7_4_2_2_sh_mask.h"
0045 #include "v9_structs.h"
0046 #include "soc15.h"
0047 #include "soc15d.h"
0048 #include "amdgpu_amdkfd_gfx_v9.h"
0049 #include "gfxhub_v1_0.h"
0050 #include "mmhub_v9_4.h"
0051
0052 #define HQD_N_REGS 56
0053 #define DUMP_REG(addr) do { \
0054 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0055 break; \
0056 (*dump)[i][0] = (addr) << 2; \
0057 (*dump)[i++][1] = RREG32(addr); \
0058 } while (0)
0059
0060 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
0061 {
0062 return (struct v9_sdma_mqd *)mqd;
0063 }
0064
0065 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0066 unsigned int engine_id,
0067 unsigned int queue_id)
0068 {
0069 uint32_t sdma_engine_reg_base = 0;
0070 uint32_t sdma_rlc_reg_offset;
0071
0072 switch (engine_id) {
0073 default:
0074 dev_warn(adev->dev,
0075 "Invalid sdma engine id (%d), using engine id 0\n",
0076 engine_id);
0077 fallthrough;
0078 case 0:
0079 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0080 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0081 break;
0082 case 1:
0083 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
0084 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
0085 break;
0086 case 2:
0087 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
0088 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
0089 break;
0090 case 3:
0091 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
0092 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
0093 break;
0094 case 4:
0095 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
0096 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
0097 break;
0098 case 5:
0099 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
0100 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
0101 break;
0102 case 6:
0103 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
0104 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
0105 break;
0106 case 7:
0107 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
0108 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
0109 break;
0110 }
0111
0112 sdma_rlc_reg_offset = sdma_engine_reg_base
0113 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
0114
0115 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0116 queue_id, sdma_rlc_reg_offset);
0117
0118 return sdma_rlc_reg_offset;
0119 }
0120
0121 int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0122 uint32_t __user *wptr, struct mm_struct *mm)
0123 {
0124 struct v9_sdma_mqd *m;
0125 uint32_t sdma_rlc_reg_offset;
0126 unsigned long end_jiffies;
0127 uint32_t data;
0128 uint64_t data64;
0129 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0130
0131 m = get_sdma_mqd(mqd);
0132 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0133 m->sdma_queue_id);
0134
0135 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0136 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0137
0138 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0139 while (true) {
0140 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0141 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0142 break;
0143 if (time_after(jiffies, end_jiffies)) {
0144 pr_err("SDMA RLC not idle in %s\n", __func__);
0145 return -ETIME;
0146 }
0147 usleep_range(500, 1000);
0148 }
0149
0150 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
0151 m->sdmax_rlcx_doorbell_offset);
0152
0153 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0154 ENABLE, 1);
0155 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0156 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0157 m->sdmax_rlcx_rb_rptr);
0158 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
0159 m->sdmax_rlcx_rb_rptr_hi);
0160
0161 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
0162 if (read_user_wptr(mm, wptr64, data64)) {
0163 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0164 lower_32_bits(data64));
0165 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0166 upper_32_bits(data64));
0167 } else {
0168 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0169 m->sdmax_rlcx_rb_rptr);
0170 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0171 m->sdmax_rlcx_rb_rptr_hi);
0172 }
0173 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
0174
0175 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0176 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0177 m->sdmax_rlcx_rb_base_hi);
0178 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0179 m->sdmax_rlcx_rb_rptr_addr_lo);
0180 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0181 m->sdmax_rlcx_rb_rptr_addr_hi);
0182
0183 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0184 RB_ENABLE, 1);
0185 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0186
0187 return 0;
0188 }
0189
0190 int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
0191 uint32_t engine_id, uint32_t queue_id,
0192 uint32_t (**dump)[2], uint32_t *n_regs)
0193 {
0194 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0195 engine_id, queue_id);
0196 uint32_t i = 0, reg;
0197 #undef HQD_N_REGS
0198 #define HQD_N_REGS (19+6+7+10)
0199
0200 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0201 if (*dump == NULL)
0202 return -ENOMEM;
0203
0204 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0205 DUMP_REG(sdma_rlc_reg_offset + reg);
0206 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
0207 DUMP_REG(sdma_rlc_reg_offset + reg);
0208 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
0209 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
0210 DUMP_REG(sdma_rlc_reg_offset + reg);
0211 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
0212 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
0213 DUMP_REG(sdma_rlc_reg_offset + reg);
0214
0215 WARN_ON_ONCE(i != HQD_N_REGS);
0216 *n_regs = i;
0217
0218 return 0;
0219 }
0220
0221 bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
0222 void *mqd)
0223 {
0224 struct v9_sdma_mqd *m;
0225 uint32_t sdma_rlc_reg_offset;
0226 uint32_t sdma_rlc_rb_cntl;
0227
0228 m = get_sdma_mqd(mqd);
0229 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0230 m->sdma_queue_id);
0231
0232 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0233
0234 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0235 return true;
0236
0237 return false;
0238 }
0239
0240 int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0241 unsigned int utimeout)
0242 {
0243 struct v9_sdma_mqd *m;
0244 uint32_t sdma_rlc_reg_offset;
0245 uint32_t temp;
0246 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0247
0248 m = get_sdma_mqd(mqd);
0249 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0250 m->sdma_queue_id);
0251
0252 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0253 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0254 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0255
0256 while (true) {
0257 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0258 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0259 break;
0260 if (time_after(jiffies, end_jiffies)) {
0261 pr_err("SDMA RLC not idle in %s\n", __func__);
0262 return -ETIME;
0263 }
0264 usleep_range(500, 1000);
0265 }
0266
0267 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0268 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0269 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0270 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0271
0272 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0273 m->sdmax_rlcx_rb_rptr_hi =
0274 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
0275
0276 return 0;
0277 }
0278
0279 const struct kfd2kgd_calls arcturus_kfd2kgd = {
0280 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
0281 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
0282 .init_interrupts = kgd_gfx_v9_init_interrupts,
0283 .hqd_load = kgd_gfx_v9_hqd_load,
0284 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
0285 .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
0286 .hqd_dump = kgd_gfx_v9_hqd_dump,
0287 .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
0288 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
0289 .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
0290 .hqd_destroy = kgd_gfx_v9_hqd_destroy,
0291 .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
0292 .wave_control_execute = kgd_gfx_v9_wave_control_execute,
0293 .get_atc_vmid_pasid_mapping_info =
0294 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
0295 .set_vm_context_page_table_base =
0296 kgd_gfx_v9_set_vm_context_page_table_base,
0297 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
0298 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
0299 };