0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include "amdgpu.h"
0023 #include "amdgpu_amdkfd.h"
0024 #include "gc/gc_10_1_0_offset.h"
0025 #include "gc/gc_10_1_0_sh_mask.h"
0026 #include "athub/athub_2_0_0_offset.h"
0027 #include "athub/athub_2_0_0_sh_mask.h"
0028 #include "oss/osssys_5_0_0_offset.h"
0029 #include "oss/osssys_5_0_0_sh_mask.h"
0030 #include "soc15_common.h"
0031 #include "v10_structs.h"
0032 #include "nv.h"
0033 #include "nvd.h"
0034
0035 enum hqd_dequeue_request_type {
0036 NO_ACTION = 0,
0037 DRAIN_PIPE,
0038 RESET_WAVES,
0039 SAVE_WAVES
0040 };
0041
0042 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0043 uint32_t queue, uint32_t vmid)
0044 {
0045 mutex_lock(&adev->srbm_mutex);
0046 nv_grbm_select(adev, mec, pipe, queue, vmid);
0047 }
0048
0049 static void unlock_srbm(struct amdgpu_device *adev)
0050 {
0051 nv_grbm_select(adev, 0, 0, 0, 0);
0052 mutex_unlock(&adev->srbm_mutex);
0053 }
0054
0055 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0056 uint32_t queue_id)
0057 {
0058 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0059 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0060
0061 lock_srbm(adev, mec, pipe, queue_id, 0);
0062 }
0063
0064 static uint64_t get_queue_mask(struct amdgpu_device *adev,
0065 uint32_t pipe_id, uint32_t queue_id)
0066 {
0067 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
0068 queue_id;
0069
0070 return 1ull << bit;
0071 }
0072
0073 static void release_queue(struct amdgpu_device *adev)
0074 {
0075 unlock_srbm(adev);
0076 }
0077
0078 static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
0079 uint32_t sh_mem_config,
0080 uint32_t sh_mem_ape1_base,
0081 uint32_t sh_mem_ape1_limit,
0082 uint32_t sh_mem_bases)
0083 {
0084 lock_srbm(adev, 0, 0, 0, vmid);
0085
0086 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
0087 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
0088
0089
0090 unlock_srbm(adev);
0091 }
0092
0093 static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
0094 unsigned int vmid)
0095 {
0096
0097
0098
0099
0100
0101
0102
0103 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
0104 ATC_VMID0_PASID_MAPPING__VALID_MASK;
0105
0106 pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
0107
0108 pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
0109 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
0110 pasid_mapping);
0111
0112 #if 0
0113
0114 while (!(RREG32(SOC15_REG_OFFSET(
0115 ATHUB, 0,
0116 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
0117 (1U << vmid)))
0118 cpu_relax();
0119
0120 pr_debug("ATHUB mapping update finished\n");
0121 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
0122 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
0123 1U << vmid);
0124 #endif
0125
0126
0127 pr_debug("update mapping for IH block and mmhub");
0128 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
0129 pasid_mapping);
0130
0131 return 0;
0132 }
0133
0134
0135
0136
0137
0138 static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
0139 {
0140 uint32_t mec;
0141 uint32_t pipe;
0142
0143 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0144 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0145
0146 lock_srbm(adev, mec, pipe, 0, 0);
0147
0148 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
0149 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0150 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0151
0152 unlock_srbm(adev);
0153
0154 return 0;
0155 }
0156
0157 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0158 unsigned int engine_id,
0159 unsigned int queue_id)
0160 {
0161 uint32_t sdma_engine_reg_base[2] = {
0162 SOC15_REG_OFFSET(SDMA0, 0,
0163 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
0164
0165
0166
0167
0168
0169
0170 SOC15_REG_OFFSET(SDMA1, 0,
0171 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
0172 };
0173
0174 uint32_t retval = sdma_engine_reg_base[engine_id]
0175 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
0176
0177 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0178 queue_id, retval);
0179
0180 return retval;
0181 }
0182
0183 #if 0
0184 static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
0185 {
0186 uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
0187 mmTCP_WATCH0_ADDR_H;
0188
0189 pr_debug("kfd: reg watch base address: 0x%x\n", retval);
0190
0191 return retval;
0192 }
0193 #endif
0194
0195 static inline struct v10_compute_mqd *get_mqd(void *mqd)
0196 {
0197 return (struct v10_compute_mqd *)mqd;
0198 }
0199
0200 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
0201 {
0202 return (struct v10_sdma_mqd *)mqd;
0203 }
0204
0205 static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
0206 uint32_t pipe_id, uint32_t queue_id,
0207 uint32_t __user *wptr, uint32_t wptr_shift,
0208 uint32_t wptr_mask, struct mm_struct *mm)
0209 {
0210 struct v10_compute_mqd *m;
0211 uint32_t *mqd_hqd;
0212 uint32_t reg, hqd_base, data;
0213
0214 m = get_mqd(mqd);
0215
0216 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
0217 acquire_queue(adev, pipe_id, queue_id);
0218
0219
0220 mqd_hqd = &m->cp_mqd_base_addr_lo;
0221 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0222
0223 for (reg = hqd_base;
0224 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0225 WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
0226
0227
0228
0229 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0230 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0231 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
0232
0233 if (wptr) {
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250 uint32_t queue_size =
0251 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
0252 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
0253 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
0254
0255 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
0256 guessed_wptr += queue_size;
0257 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
0258 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
0259
0260 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
0261 lower_32_bits(guessed_wptr));
0262 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
0263 upper_32_bits(guessed_wptr));
0264 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
0265 lower_32_bits((uint64_t)wptr));
0266 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
0267 upper_32_bits((uint64_t)wptr));
0268 pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
0269 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0270 WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
0271 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0272 }
0273
0274
0275 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
0276 REG_SET_FIELD(m->cp_hqd_eop_rptr,
0277 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
0278
0279 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0280 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
0281
0282 release_queue(adev);
0283
0284 return 0;
0285 }
0286
0287 static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
0288 uint32_t pipe_id, uint32_t queue_id,
0289 uint32_t doorbell_off)
0290 {
0291 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0292 struct v10_compute_mqd *m;
0293 uint32_t mec, pipe;
0294 int r;
0295
0296 m = get_mqd(mqd);
0297
0298 acquire_queue(adev, pipe_id, queue_id);
0299
0300 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0301 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0302
0303 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0304 mec, pipe, queue_id);
0305
0306 spin_lock(&adev->gfx.kiq.ring_lock);
0307 r = amdgpu_ring_alloc(kiq_ring, 7);
0308 if (r) {
0309 pr_err("Failed to alloc KIQ (%d).\n", r);
0310 goto out_unlock;
0311 }
0312
0313 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0314 amdgpu_ring_write(kiq_ring,
0315 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
0316 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
0317 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
0318 PACKET3_MAP_QUEUES_PIPE(pipe) |
0319 PACKET3_MAP_QUEUES_ME((mec - 1)) |
0320 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
0321 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
0322 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
0323 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
0324 amdgpu_ring_write(kiq_ring,
0325 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
0326 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
0327 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
0328 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
0329 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
0330 amdgpu_ring_commit(kiq_ring);
0331
0332 out_unlock:
0333 spin_unlock(&adev->gfx.kiq.ring_lock);
0334 release_queue(adev);
0335
0336 return r;
0337 }
0338
0339 static int kgd_hqd_dump(struct amdgpu_device *adev,
0340 uint32_t pipe_id, uint32_t queue_id,
0341 uint32_t (**dump)[2], uint32_t *n_regs)
0342 {
0343 uint32_t i = 0, reg;
0344 #define HQD_N_REGS 56
0345 #define DUMP_REG(addr) do { \
0346 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0347 break; \
0348 (*dump)[i][0] = (addr) << 2; \
0349 (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
0350 } while (0)
0351
0352 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0353 if (*dump == NULL)
0354 return -ENOMEM;
0355
0356 acquire_queue(adev, pipe_id, queue_id);
0357
0358 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0359 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0360 DUMP_REG(reg);
0361
0362 release_queue(adev);
0363
0364 WARN_ON_ONCE(i != HQD_N_REGS);
0365 *n_regs = i;
0366
0367 return 0;
0368 }
0369
0370 static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0371 uint32_t __user *wptr, struct mm_struct *mm)
0372 {
0373 struct v10_sdma_mqd *m;
0374 uint32_t sdma_rlc_reg_offset;
0375 unsigned long end_jiffies;
0376 uint32_t data;
0377 uint64_t data64;
0378 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0379
0380 m = get_sdma_mqd(mqd);
0381 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0382 m->sdma_queue_id);
0383
0384 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0385 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0386
0387 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0388 while (true) {
0389 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0390 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0391 break;
0392 if (time_after(jiffies, end_jiffies)) {
0393 pr_err("SDMA RLC not idle in %s\n", __func__);
0394 return -ETIME;
0395 }
0396 usleep_range(500, 1000);
0397 }
0398
0399 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
0400 m->sdmax_rlcx_doorbell_offset);
0401
0402 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0403 ENABLE, 1);
0404 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0405 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0406 m->sdmax_rlcx_rb_rptr);
0407 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
0408 m->sdmax_rlcx_rb_rptr_hi);
0409
0410 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
0411 if (read_user_wptr(mm, wptr64, data64)) {
0412 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0413 lower_32_bits(data64));
0414 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0415 upper_32_bits(data64));
0416 } else {
0417 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0418 m->sdmax_rlcx_rb_rptr);
0419 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0420 m->sdmax_rlcx_rb_rptr_hi);
0421 }
0422 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
0423
0424 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0425 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0426 m->sdmax_rlcx_rb_base_hi);
0427 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0428 m->sdmax_rlcx_rb_rptr_addr_lo);
0429 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0430 m->sdmax_rlcx_rb_rptr_addr_hi);
0431
0432 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0433 RB_ENABLE, 1);
0434 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0435
0436 return 0;
0437 }
0438
0439 static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
0440 uint32_t engine_id, uint32_t queue_id,
0441 uint32_t (**dump)[2], uint32_t *n_regs)
0442 {
0443 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0444 engine_id, queue_id);
0445 uint32_t i = 0, reg;
0446 #undef HQD_N_REGS
0447 #define HQD_N_REGS (19+6+7+10)
0448
0449 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0450 if (*dump == NULL)
0451 return -ENOMEM;
0452
0453 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0454 DUMP_REG(sdma_rlc_reg_offset + reg);
0455 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
0456 DUMP_REG(sdma_rlc_reg_offset + reg);
0457 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
0458 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
0459 DUMP_REG(sdma_rlc_reg_offset + reg);
0460 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
0461 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
0462 DUMP_REG(sdma_rlc_reg_offset + reg);
0463
0464 WARN_ON_ONCE(i != HQD_N_REGS);
0465 *n_regs = i;
0466
0467 return 0;
0468 }
0469
0470 static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
0471 uint64_t queue_address, uint32_t pipe_id,
0472 uint32_t queue_id)
0473 {
0474 uint32_t act;
0475 bool retval = false;
0476 uint32_t low, high;
0477
0478 acquire_queue(adev, pipe_id, queue_id);
0479 act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0480 if (act) {
0481 low = lower_32_bits(queue_address >> 8);
0482 high = upper_32_bits(queue_address >> 8);
0483
0484 if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
0485 high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
0486 retval = true;
0487 }
0488 release_queue(adev);
0489 return retval;
0490 }
0491
0492 static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
0493 {
0494 struct v10_sdma_mqd *m;
0495 uint32_t sdma_rlc_reg_offset;
0496 uint32_t sdma_rlc_rb_cntl;
0497
0498 m = get_sdma_mqd(mqd);
0499 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0500 m->sdma_queue_id);
0501
0502 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0503
0504 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0505 return true;
0506
0507 return false;
0508 }
0509
0510 static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
0511 enum kfd_preempt_type reset_type,
0512 unsigned int utimeout, uint32_t pipe_id,
0513 uint32_t queue_id)
0514 {
0515 enum hqd_dequeue_request_type type;
0516 unsigned long end_jiffies;
0517 uint32_t temp;
0518 struct v10_compute_mqd *m = get_mqd(mqd);
0519
0520 if (amdgpu_in_reset(adev))
0521 return -EIO;
0522
0523 #if 0
0524 unsigned long flags;
0525 int retry;
0526 #endif
0527
0528 acquire_queue(adev, pipe_id, queue_id);
0529
0530 if (m->cp_hqd_vmid == 0)
0531 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
0532
0533 switch (reset_type) {
0534 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0535 type = DRAIN_PIPE;
0536 break;
0537 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0538 type = RESET_WAVES;
0539 break;
0540 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
0541 type = SAVE_WAVES;
0542 break;
0543 default:
0544 type = DRAIN_PIPE;
0545 break;
0546 }
0547
0548 #if 0
0549
0550
0551
0552
0553
0554
0555 local_irq_save(flags);
0556 preempt_disable();
0557 retry = 5000;
0558 while (true) {
0559 temp = RREG32(mmCP_HQD_IQ_TIMER);
0560 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
0561 pr_debug("HW is processing IQ\n");
0562 goto loop;
0563 }
0564 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
0565 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
0566 == 3)
0567 break;
0568
0569
0570
0571
0572 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
0573 >= 10)
0574 break;
0575 pr_debug("IQ timer is active\n");
0576 } else
0577 break;
0578 loop:
0579 if (!retry) {
0580 pr_err("CP HQD IQ timer status time out\n");
0581 break;
0582 }
0583 ndelay(100);
0584 --retry;
0585 }
0586 retry = 1000;
0587 while (true) {
0588 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
0589 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
0590 break;
0591 pr_debug("Dequeue request is pending\n");
0592
0593 if (!retry) {
0594 pr_err("CP HQD dequeue request time out\n");
0595 break;
0596 }
0597 ndelay(100);
0598 --retry;
0599 }
0600 local_irq_restore(flags);
0601 preempt_enable();
0602 #endif
0603
0604 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
0605
0606 end_jiffies = (utimeout * HZ / 1000) + jiffies;
0607 while (true) {
0608 temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0609 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0610 break;
0611 if (time_after(jiffies, end_jiffies)) {
0612 pr_err("cp queue preemption time out.\n");
0613 release_queue(adev);
0614 return -ETIME;
0615 }
0616 usleep_range(500, 1000);
0617 }
0618
0619 release_queue(adev);
0620 return 0;
0621 }
0622
0623 static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0624 unsigned int utimeout)
0625 {
0626 struct v10_sdma_mqd *m;
0627 uint32_t sdma_rlc_reg_offset;
0628 uint32_t temp;
0629 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0630
0631 m = get_sdma_mqd(mqd);
0632 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0633 m->sdma_queue_id);
0634
0635 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0636 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0637 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0638
0639 while (true) {
0640 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0641 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0642 break;
0643 if (time_after(jiffies, end_jiffies)) {
0644 pr_err("SDMA RLC not idle in %s\n", __func__);
0645 return -ETIME;
0646 }
0647 usleep_range(500, 1000);
0648 }
0649
0650 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0651 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0652 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0653 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0654
0655 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0656 m->sdmax_rlcx_rb_rptr_hi =
0657 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
0658
0659 return 0;
0660 }
0661
0662 static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
0663 uint8_t vmid, uint16_t *p_pasid)
0664 {
0665 uint32_t value;
0666
0667 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
0668 + vmid);
0669 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0670
0671 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0672 }
0673
0674 static int kgd_wave_control_execute(struct amdgpu_device *adev,
0675 uint32_t gfx_index_val,
0676 uint32_t sq_cmd)
0677 {
0678 uint32_t data = 0;
0679
0680 mutex_lock(&adev->grbm_idx_mutex);
0681
0682 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
0683 WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
0684
0685 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0686 INSTANCE_BROADCAST_WRITES, 1);
0687 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0688 SA_BROADCAST_WRITES, 1);
0689 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0690 SE_BROADCAST_WRITES, 1);
0691
0692 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
0693 mutex_unlock(&adev->grbm_idx_mutex);
0694
0695 return 0;
0696 }
0697
0698 static void set_vm_context_page_table_base(struct amdgpu_device *adev,
0699 uint32_t vmid, uint64_t page_table_base)
0700 {
0701 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0702 pr_err("trying to set page table base for wrong VMID %u\n",
0703 vmid);
0704 return;
0705 }
0706
0707
0708 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0709 }
0710
0711 static void program_trap_handler_settings(struct amdgpu_device *adev,
0712 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
0713 {
0714 lock_srbm(adev, 0, 0, 0, vmid);
0715
0716
0717
0718
0719 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
0720 lower_32_bits(tba_addr >> 8));
0721 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
0722 upper_32_bits(tba_addr >> 8) |
0723 (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
0724
0725
0726
0727
0728 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
0729 lower_32_bits(tma_addr >> 8));
0730 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
0731 upper_32_bits(tma_addr >> 8));
0732
0733 unlock_srbm(adev);
0734 }
0735
0736 const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
0737 .program_sh_mem_settings = kgd_program_sh_mem_settings,
0738 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
0739 .init_interrupts = kgd_init_interrupts,
0740 .hqd_load = kgd_hqd_load,
0741 .hiq_mqd_load = kgd_hiq_mqd_load,
0742 .hqd_sdma_load = kgd_hqd_sdma_load,
0743 .hqd_dump = kgd_hqd_dump,
0744 .hqd_sdma_dump = kgd_hqd_sdma_dump,
0745 .hqd_is_occupied = kgd_hqd_is_occupied,
0746 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
0747 .hqd_destroy = kgd_hqd_destroy,
0748 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
0749 .wave_control_execute = kgd_wave_control_execute,
0750 .get_atc_vmid_pasid_mapping_info =
0751 get_atc_vmid_pasid_mapping_info,
0752 .set_vm_context_page_table_base = set_vm_context_page_table_base,
0753 .program_trap_handler_settings = program_trap_handler_settings,
0754 };