0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "amdgpu.h"
0024 #include "amdgpu_amdkfd.h"
0025 #include "cikd.h"
0026 #include "cik_sdma.h"
0027 #include "gfx_v7_0.h"
0028 #include "gca/gfx_7_2_d.h"
0029 #include "gca/gfx_7_2_enum.h"
0030 #include "gca/gfx_7_2_sh_mask.h"
0031 #include "oss/oss_2_0_d.h"
0032 #include "oss/oss_2_0_sh_mask.h"
0033 #include "gmc/gmc_7_1_d.h"
0034 #include "gmc/gmc_7_1_sh_mask.h"
0035 #include "cik_structs.h"
0036
0037 enum hqd_dequeue_request_type {
0038 NO_ACTION = 0,
0039 DRAIN_PIPE,
0040 RESET_WAVES
0041 };
0042
0043 enum {
0044 MAX_TRAPID = 8,
0045 MAX_WATCH_ADDRESSES = 4
0046 };
0047
0048 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0049 uint32_t queue, uint32_t vmid)
0050 {
0051 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
0052
0053 mutex_lock(&adev->srbm_mutex);
0054 WREG32(mmSRBM_GFX_CNTL, value);
0055 }
0056
0057 static void unlock_srbm(struct amdgpu_device *adev)
0058 {
0059 WREG32(mmSRBM_GFX_CNTL, 0);
0060 mutex_unlock(&adev->srbm_mutex);
0061 }
0062
0063 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0064 uint32_t queue_id)
0065 {
0066 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0067 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0068
0069 lock_srbm(adev, mec, pipe, queue_id, 0);
0070 }
0071
0072 static void release_queue(struct amdgpu_device *adev)
0073 {
0074 unlock_srbm(adev);
0075 }
0076
0077 static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
0078 uint32_t sh_mem_config,
0079 uint32_t sh_mem_ape1_base,
0080 uint32_t sh_mem_ape1_limit,
0081 uint32_t sh_mem_bases)
0082 {
0083 lock_srbm(adev, 0, 0, 0, vmid);
0084
0085 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
0086 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
0087 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
0088 WREG32(mmSH_MEM_BASES, sh_mem_bases);
0089
0090 unlock_srbm(adev);
0091 }
0092
0093 static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
0094 unsigned int vmid)
0095 {
0096
0097
0098
0099
0100
0101
0102 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
0103 ATC_VMID0_PASID_MAPPING__VALID_MASK;
0104
0105 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
0106
0107 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
0108 cpu_relax();
0109 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
0110
0111
0112 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
0113
0114 return 0;
0115 }
0116
0117 static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
0118 {
0119 uint32_t mec;
0120 uint32_t pipe;
0121
0122 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0123 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0124
0125 lock_srbm(adev, mec, pipe, 0, 0);
0126
0127 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0128 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0129
0130 unlock_srbm(adev);
0131
0132 return 0;
0133 }
0134
0135 static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
0136 {
0137 uint32_t retval;
0138
0139 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
0140 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
0141
0142 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
0143 m->sdma_engine_id, m->sdma_queue_id, retval);
0144
0145 return retval;
0146 }
0147
0148 static inline struct cik_mqd *get_mqd(void *mqd)
0149 {
0150 return (struct cik_mqd *)mqd;
0151 }
0152
0153 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
0154 {
0155 return (struct cik_sdma_rlc_registers *)mqd;
0156 }
0157
0158 static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
0159 uint32_t pipe_id, uint32_t queue_id,
0160 uint32_t __user *wptr, uint32_t wptr_shift,
0161 uint32_t wptr_mask, struct mm_struct *mm)
0162 {
0163 struct cik_mqd *m;
0164 uint32_t *mqd_hqd;
0165 uint32_t reg, wptr_val, data;
0166 bool valid_wptr = false;
0167
0168 m = get_mqd(mqd);
0169
0170 acquire_queue(adev, pipe_id, queue_id);
0171
0172
0173 mqd_hqd = &m->cp_mqd_base_addr_lo;
0174
0175 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
0176 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
0177
0178
0179
0180
0181 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0182 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0183 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
0184
0185
0186
0187
0188
0189 release_queue(adev);
0190 valid_wptr = read_user_wptr(mm, wptr, wptr_val);
0191 acquire_queue(adev, pipe_id, queue_id);
0192 if (valid_wptr)
0193 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
0194
0195 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0196 WREG32(mmCP_HQD_ACTIVE, data);
0197
0198 release_queue(adev);
0199
0200 return 0;
0201 }
0202
0203 static int kgd_hqd_dump(struct amdgpu_device *adev,
0204 uint32_t pipe_id, uint32_t queue_id,
0205 uint32_t (**dump)[2], uint32_t *n_regs)
0206 {
0207 uint32_t i = 0, reg;
0208 #define HQD_N_REGS (35+4)
0209 #define DUMP_REG(addr) do { \
0210 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0211 break; \
0212 (*dump)[i][0] = (addr) << 2; \
0213 (*dump)[i++][1] = RREG32(addr); \
0214 } while (0)
0215
0216 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0217 if (*dump == NULL)
0218 return -ENOMEM;
0219
0220 acquire_queue(adev, pipe_id, queue_id);
0221
0222 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
0223 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
0224 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
0225 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
0226
0227 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
0228 DUMP_REG(reg);
0229
0230 release_queue(adev);
0231
0232 WARN_ON_ONCE(i != HQD_N_REGS);
0233 *n_regs = i;
0234
0235 return 0;
0236 }
0237
0238 static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0239 uint32_t __user *wptr, struct mm_struct *mm)
0240 {
0241 struct cik_sdma_rlc_registers *m;
0242 unsigned long end_jiffies;
0243 uint32_t sdma_rlc_reg_offset;
0244 uint32_t data;
0245
0246 m = get_sdma_mqd(mqd);
0247 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0248
0249 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0250 m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0251
0252 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0253 while (true) {
0254 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0255 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0256 break;
0257 if (time_after(jiffies, end_jiffies)) {
0258 pr_err("SDMA RLC not idle in %s\n", __func__);
0259 return -ETIME;
0260 }
0261 usleep_range(500, 1000);
0262 }
0263
0264 data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
0265 ENABLE, 1);
0266 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0267 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0268 m->sdma_rlc_rb_rptr);
0269
0270 if (read_user_wptr(mm, wptr, data))
0271 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
0272 else
0273 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0274 m->sdma_rlc_rb_rptr);
0275
0276 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
0277 m->sdma_rlc_virtual_addr);
0278 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
0279 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0280 m->sdma_rlc_rb_base_hi);
0281 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0282 m->sdma_rlc_rb_rptr_addr_lo);
0283 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0284 m->sdma_rlc_rb_rptr_addr_hi);
0285
0286 data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
0287 RB_ENABLE, 1);
0288 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0289
0290 return 0;
0291 }
0292
0293 static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
0294 uint32_t engine_id, uint32_t queue_id,
0295 uint32_t (**dump)[2], uint32_t *n_regs)
0296 {
0297 uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
0298 queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
0299 uint32_t i = 0, reg;
0300 #undef HQD_N_REGS
0301 #define HQD_N_REGS (19+4)
0302
0303 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0304 if (*dump == NULL)
0305 return -ENOMEM;
0306
0307 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0308 DUMP_REG(sdma_offset + reg);
0309 for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
0310 reg++)
0311 DUMP_REG(sdma_offset + reg);
0312
0313 WARN_ON_ONCE(i != HQD_N_REGS);
0314 *n_regs = i;
0315
0316 return 0;
0317 }
0318
0319 static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
0320 uint64_t queue_address, uint32_t pipe_id,
0321 uint32_t queue_id)
0322 {
0323 uint32_t act;
0324 bool retval = false;
0325 uint32_t low, high;
0326
0327 acquire_queue(adev, pipe_id, queue_id);
0328 act = RREG32(mmCP_HQD_ACTIVE);
0329 if (act) {
0330 low = lower_32_bits(queue_address >> 8);
0331 high = upper_32_bits(queue_address >> 8);
0332
0333 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
0334 high == RREG32(mmCP_HQD_PQ_BASE_HI))
0335 retval = true;
0336 }
0337 release_queue(adev);
0338 return retval;
0339 }
0340
0341 static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
0342 {
0343 struct cik_sdma_rlc_registers *m;
0344 uint32_t sdma_rlc_reg_offset;
0345 uint32_t sdma_rlc_rb_cntl;
0346
0347 m = get_sdma_mqd(mqd);
0348 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0349
0350 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0351
0352 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0353 return true;
0354
0355 return false;
0356 }
0357
0358 static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
0359 enum kfd_preempt_type reset_type,
0360 unsigned int utimeout, uint32_t pipe_id,
0361 uint32_t queue_id)
0362 {
0363 uint32_t temp;
0364 enum hqd_dequeue_request_type type;
0365 unsigned long flags, end_jiffies;
0366 int retry;
0367
0368 if (amdgpu_in_reset(adev))
0369 return -EIO;
0370
0371 acquire_queue(adev, pipe_id, queue_id);
0372 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
0373
0374 switch (reset_type) {
0375 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0376 type = DRAIN_PIPE;
0377 break;
0378 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0379 type = RESET_WAVES;
0380 break;
0381 default:
0382 type = DRAIN_PIPE;
0383 break;
0384 }
0385
0386
0387
0388
0389
0390
0391
0392 local_irq_save(flags);
0393 preempt_disable();
0394 retry = 5000;
0395 while (true) {
0396 temp = RREG32(mmCP_HQD_IQ_TIMER);
0397 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
0398 pr_debug("HW is processing IQ\n");
0399 goto loop;
0400 }
0401 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
0402 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
0403 == 3)
0404 break;
0405
0406
0407
0408
0409 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
0410 >= 10)
0411 break;
0412 pr_debug("IQ timer is active\n");
0413 } else
0414 break;
0415 loop:
0416 if (!retry) {
0417 pr_err("CP HQD IQ timer status time out\n");
0418 break;
0419 }
0420 ndelay(100);
0421 --retry;
0422 }
0423 retry = 1000;
0424 while (true) {
0425 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
0426 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
0427 break;
0428 pr_debug("Dequeue request is pending\n");
0429
0430 if (!retry) {
0431 pr_err("CP HQD dequeue request time out\n");
0432 break;
0433 }
0434 ndelay(100);
0435 --retry;
0436 }
0437 local_irq_restore(flags);
0438 preempt_enable();
0439
0440 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
0441
0442 end_jiffies = (utimeout * HZ / 1000) + jiffies;
0443 while (true) {
0444 temp = RREG32(mmCP_HQD_ACTIVE);
0445 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0446 break;
0447 if (time_after(jiffies, end_jiffies)) {
0448 pr_err("cp queue preemption time out\n");
0449 release_queue(adev);
0450 return -ETIME;
0451 }
0452 usleep_range(500, 1000);
0453 }
0454
0455 release_queue(adev);
0456 return 0;
0457 }
0458
0459 static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0460 unsigned int utimeout)
0461 {
0462 struct cik_sdma_rlc_registers *m;
0463 uint32_t sdma_rlc_reg_offset;
0464 uint32_t temp;
0465 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0466
0467 m = get_sdma_mqd(mqd);
0468 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
0469
0470 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0471 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0472 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0473
0474 while (true) {
0475 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0476 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0477 break;
0478 if (time_after(jiffies, end_jiffies)) {
0479 pr_err("SDMA RLC not idle in %s\n", __func__);
0480 return -ETIME;
0481 }
0482 usleep_range(500, 1000);
0483 }
0484
0485 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0486 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0487 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0488 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0489
0490 m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0491
0492 return 0;
0493 }
0494
0495 static int kgd_wave_control_execute(struct amdgpu_device *adev,
0496 uint32_t gfx_index_val,
0497 uint32_t sq_cmd)
0498 {
0499 uint32_t data;
0500
0501 mutex_lock(&adev->grbm_idx_mutex);
0502
0503 WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
0504 WREG32(mmSQ_CMD, sq_cmd);
0505
0506
0507
0508 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
0509 GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
0510 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
0511
0512 WREG32(mmGRBM_GFX_INDEX, data);
0513
0514 mutex_unlock(&adev->grbm_idx_mutex);
0515
0516 return 0;
0517 }
0518
0519 static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
0520 uint8_t vmid, uint16_t *p_pasid)
0521 {
0522 uint32_t value;
0523
0524 value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
0525 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0526
0527 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0528 }
0529
0530 static void set_scratch_backing_va(struct amdgpu_device *adev,
0531 uint64_t va, uint32_t vmid)
0532 {
0533 lock_srbm(adev, 0, 0, 0, vmid);
0534 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
0535 unlock_srbm(adev);
0536 }
0537
0538 static void set_vm_context_page_table_base(struct amdgpu_device *adev,
0539 uint32_t vmid, uint64_t page_table_base)
0540 {
0541 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0542 pr_err("trying to set page table base for wrong VMID\n");
0543 return;
0544 }
0545 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
0546 lower_32_bits(page_table_base));
0547 }
0548
0549
0550
0551
0552
0553
0554
0555
0556 static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
0557 {
0558 uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
0559
0560 return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
0561 }
0562
0563 const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
0564 .program_sh_mem_settings = kgd_program_sh_mem_settings,
0565 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
0566 .init_interrupts = kgd_init_interrupts,
0567 .hqd_load = kgd_hqd_load,
0568 .hqd_sdma_load = kgd_hqd_sdma_load,
0569 .hqd_dump = kgd_hqd_dump,
0570 .hqd_sdma_dump = kgd_hqd_sdma_dump,
0571 .hqd_is_occupied = kgd_hqd_is_occupied,
0572 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
0573 .hqd_destroy = kgd_hqd_destroy,
0574 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
0575 .wave_control_execute = kgd_wave_control_execute,
0576 .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
0577 .set_scratch_backing_va = set_scratch_backing_va,
0578 .set_vm_context_page_table_base = set_vm_context_page_table_base,
0579 .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
0580 };