0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <linux/mmu_context.h>
0023 #include "amdgpu.h"
0024 #include "amdgpu_amdkfd.h"
0025 #include "gc/gc_10_3_0_offset.h"
0026 #include "gc/gc_10_3_0_sh_mask.h"
0027 #include "oss/osssys_5_0_0_offset.h"
0028 #include "oss/osssys_5_0_0_sh_mask.h"
0029 #include "athub/athub_2_1_0_offset.h"
0030 #include "athub/athub_2_1_0_sh_mask.h"
0031 #include "soc15_common.h"
0032 #include "v10_structs.h"
0033 #include "nv.h"
0034 #include "nvd.h"
0035
0036 enum hqd_dequeue_request_type {
0037 NO_ACTION = 0,
0038 DRAIN_PIPE,
0039 RESET_WAVES,
0040 SAVE_WAVES
0041 };
0042
0043 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0044 uint32_t queue, uint32_t vmid)
0045 {
0046 mutex_lock(&adev->srbm_mutex);
0047 nv_grbm_select(adev, mec, pipe, queue, vmid);
0048 }
0049
0050 static void unlock_srbm(struct amdgpu_device *adev)
0051 {
0052 nv_grbm_select(adev, 0, 0, 0, 0);
0053 mutex_unlock(&adev->srbm_mutex);
0054 }
0055
0056 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0057 uint32_t queue_id)
0058 {
0059 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0060 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0061
0062 lock_srbm(adev, mec, pipe, queue_id, 0);
0063 }
0064
0065 static uint64_t get_queue_mask(struct amdgpu_device *adev,
0066 uint32_t pipe_id, uint32_t queue_id)
0067 {
0068 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
0069 queue_id;
0070
0071 return 1ull << bit;
0072 }
0073
0074 static void release_queue(struct amdgpu_device *adev)
0075 {
0076 unlock_srbm(adev);
0077 }
0078
0079 static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid,
0080 uint32_t sh_mem_config,
0081 uint32_t sh_mem_ape1_base,
0082 uint32_t sh_mem_ape1_limit,
0083 uint32_t sh_mem_bases)
0084 {
0085 lock_srbm(adev, 0, 0, 0, vmid);
0086
0087 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
0088 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
0089
0090
0091 unlock_srbm(adev);
0092 }
0093
0094
0095 static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
0096 unsigned int vmid)
0097 {
0098 uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
0099
0100
0101 pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
0102 vmid, pasid);
0103 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value);
0104
0105 return 0;
0106 }
0107
0108 static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id)
0109 {
0110 uint32_t mec;
0111 uint32_t pipe;
0112
0113 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0114 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0115
0116 lock_srbm(adev, mec, pipe, 0, 0);
0117
0118 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
0119 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0120 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0121
0122 unlock_srbm(adev);
0123
0124 return 0;
0125 }
0126
0127 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0128 unsigned int engine_id,
0129 unsigned int queue_id)
0130 {
0131 uint32_t sdma_engine_reg_base = 0;
0132 uint32_t sdma_rlc_reg_offset;
0133
0134 switch (engine_id) {
0135 default:
0136 dev_warn(adev->dev,
0137 "Invalid sdma engine id (%d), using engine id 0\n",
0138 engine_id);
0139 fallthrough;
0140 case 0:
0141 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0142 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0143 break;
0144 case 1:
0145 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0146 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0147 break;
0148 case 2:
0149 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0150 mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0151 break;
0152 case 3:
0153 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0154 mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0155 break;
0156 }
0157
0158 sdma_rlc_reg_offset = sdma_engine_reg_base
0159 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
0160
0161 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0162 queue_id, sdma_rlc_reg_offset);
0163
0164 return sdma_rlc_reg_offset;
0165 }
0166
0167 static inline struct v10_compute_mqd *get_mqd(void *mqd)
0168 {
0169 return (struct v10_compute_mqd *)mqd;
0170 }
0171
0172 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
0173 {
0174 return (struct v10_sdma_mqd *)mqd;
0175 }
0176
0177 static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
0178 uint32_t pipe_id, uint32_t queue_id,
0179 uint32_t __user *wptr, uint32_t wptr_shift,
0180 uint32_t wptr_mask, struct mm_struct *mm)
0181 {
0182 struct v10_compute_mqd *m;
0183 uint32_t *mqd_hqd;
0184 uint32_t reg, hqd_base, data;
0185
0186 m = get_mqd(mqd);
0187
0188 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
0189 acquire_queue(adev, pipe_id, queue_id);
0190
0191
0192 if (m->cp_hqd_vmid == 0) {
0193 uint32_t value, mec, pipe;
0194
0195 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0196 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0197
0198 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0199 mec, pipe, queue_id);
0200 value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
0201 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
0202 ((mec << 5) | (pipe << 3) | queue_id | 0x80));
0203 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value);
0204 }
0205
0206
0207 mqd_hqd = &m->cp_mqd_base_addr_lo;
0208 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0209
0210 for (reg = hqd_base;
0211 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0212 WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
0213
0214
0215
0216 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0217 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0218 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
0219
0220 if (wptr) {
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 uint32_t queue_size =
0238 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
0239 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
0240 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
0241
0242 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
0243 guessed_wptr += queue_size;
0244 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
0245 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
0246
0247 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
0248 lower_32_bits(guessed_wptr));
0249 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
0250 upper_32_bits(guessed_wptr));
0251 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
0252 lower_32_bits((uint64_t)wptr));
0253 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
0254 upper_32_bits((uint64_t)wptr));
0255 pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
0256 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0257 WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
0258 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0259 }
0260
0261
0262 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
0263 REG_SET_FIELD(m->cp_hqd_eop_rptr,
0264 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
0265
0266 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0267 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
0268
0269 release_queue(adev);
0270
0271 return 0;
0272 }
0273
0274 static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
0275 uint32_t pipe_id, uint32_t queue_id,
0276 uint32_t doorbell_off)
0277 {
0278 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0279 struct v10_compute_mqd *m;
0280 uint32_t mec, pipe;
0281 int r;
0282
0283 m = get_mqd(mqd);
0284
0285 acquire_queue(adev, pipe_id, queue_id);
0286
0287 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0288 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0289
0290 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0291 mec, pipe, queue_id);
0292
0293 spin_lock(&adev->gfx.kiq.ring_lock);
0294 r = amdgpu_ring_alloc(kiq_ring, 7);
0295 if (r) {
0296 pr_err("Failed to alloc KIQ (%d).\n", r);
0297 goto out_unlock;
0298 }
0299
0300 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0301 amdgpu_ring_write(kiq_ring,
0302 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
0303 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
0304 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
0305 PACKET3_MAP_QUEUES_PIPE(pipe) |
0306 PACKET3_MAP_QUEUES_ME((mec - 1)) |
0307 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
0308 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
0309 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
0310 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
0311 amdgpu_ring_write(kiq_ring,
0312 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
0313 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
0314 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
0315 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
0316 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
0317 amdgpu_ring_commit(kiq_ring);
0318
0319 out_unlock:
0320 spin_unlock(&adev->gfx.kiq.ring_lock);
0321 release_queue(adev);
0322
0323 return r;
0324 }
0325
0326 static int hqd_dump_v10_3(struct amdgpu_device *adev,
0327 uint32_t pipe_id, uint32_t queue_id,
0328 uint32_t (**dump)[2], uint32_t *n_regs)
0329 {
0330 uint32_t i = 0, reg;
0331 #define HQD_N_REGS 56
0332 #define DUMP_REG(addr) do { \
0333 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0334 break; \
0335 (*dump)[i][0] = (addr) << 2; \
0336 (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
0337 } while (0)
0338
0339 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0340 if (*dump == NULL)
0341 return -ENOMEM;
0342
0343 acquire_queue(adev, pipe_id, queue_id);
0344
0345 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0346 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0347 DUMP_REG(reg);
0348
0349 release_queue(adev);
0350
0351 WARN_ON_ONCE(i != HQD_N_REGS);
0352 *n_regs = i;
0353
0354 return 0;
0355 }
0356
0357 static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd,
0358 uint32_t __user *wptr, struct mm_struct *mm)
0359 {
0360 struct v10_sdma_mqd *m;
0361 uint32_t sdma_rlc_reg_offset;
0362 unsigned long end_jiffies;
0363 uint32_t data;
0364 uint64_t data64;
0365 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0366
0367 m = get_sdma_mqd(mqd);
0368 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0369 m->sdma_queue_id);
0370
0371 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0372 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0373
0374 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0375 while (true) {
0376 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0377 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0378 break;
0379 if (time_after(jiffies, end_jiffies)) {
0380 pr_err("SDMA RLC not idle in %s\n", __func__);
0381 return -ETIME;
0382 }
0383 usleep_range(500, 1000);
0384 }
0385
0386 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
0387 m->sdmax_rlcx_doorbell_offset);
0388
0389 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0390 ENABLE, 1);
0391 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0392 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0393 m->sdmax_rlcx_rb_rptr);
0394 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
0395 m->sdmax_rlcx_rb_rptr_hi);
0396
0397 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
0398 if (read_user_wptr(mm, wptr64, data64)) {
0399 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0400 lower_32_bits(data64));
0401 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0402 upper_32_bits(data64));
0403 } else {
0404 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0405 m->sdmax_rlcx_rb_rptr);
0406 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0407 m->sdmax_rlcx_rb_rptr_hi);
0408 }
0409 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
0410
0411 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0412 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0413 m->sdmax_rlcx_rb_base_hi);
0414 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0415 m->sdmax_rlcx_rb_rptr_addr_lo);
0416 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0417 m->sdmax_rlcx_rb_rptr_addr_hi);
0418
0419 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0420 RB_ENABLE, 1);
0421 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0422
0423 return 0;
0424 }
0425
0426 static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
0427 uint32_t engine_id, uint32_t queue_id,
0428 uint32_t (**dump)[2], uint32_t *n_regs)
0429 {
0430 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0431 engine_id, queue_id);
0432 uint32_t i = 0, reg;
0433 #undef HQD_N_REGS
0434 #define HQD_N_REGS (19+6+7+12)
0435
0436 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0437 if (*dump == NULL)
0438 return -ENOMEM;
0439
0440 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0441 DUMP_REG(sdma_rlc_reg_offset + reg);
0442 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
0443 DUMP_REG(sdma_rlc_reg_offset + reg);
0444 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
0445 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
0446 DUMP_REG(sdma_rlc_reg_offset + reg);
0447 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
0448 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
0449 DUMP_REG(sdma_rlc_reg_offset + reg);
0450
0451 WARN_ON_ONCE(i != HQD_N_REGS);
0452 *n_regs = i;
0453
0454 return 0;
0455 }
0456
0457 static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
0458 uint64_t queue_address, uint32_t pipe_id,
0459 uint32_t queue_id)
0460 {
0461 uint32_t act;
0462 bool retval = false;
0463 uint32_t low, high;
0464
0465 acquire_queue(adev, pipe_id, queue_id);
0466 act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0467 if (act) {
0468 low = lower_32_bits(queue_address >> 8);
0469 high = upper_32_bits(queue_address >> 8);
0470
0471 if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
0472 high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
0473 retval = true;
0474 }
0475 release_queue(adev);
0476 return retval;
0477 }
0478
0479 static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
0480 void *mqd)
0481 {
0482 struct v10_sdma_mqd *m;
0483 uint32_t sdma_rlc_reg_offset;
0484 uint32_t sdma_rlc_rb_cntl;
0485
0486 m = get_sdma_mqd(mqd);
0487 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0488 m->sdma_queue_id);
0489
0490 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0491
0492 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0493 return true;
0494
0495 return false;
0496 }
0497
0498 static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
0499 enum kfd_preempt_type reset_type,
0500 unsigned int utimeout, uint32_t pipe_id,
0501 uint32_t queue_id)
0502 {
0503 enum hqd_dequeue_request_type type;
0504 unsigned long end_jiffies;
0505 uint32_t temp;
0506 struct v10_compute_mqd *m = get_mqd(mqd);
0507
0508 acquire_queue(adev, pipe_id, queue_id);
0509
0510 if (m->cp_hqd_vmid == 0)
0511 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
0512
0513 switch (reset_type) {
0514 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0515 type = DRAIN_PIPE;
0516 break;
0517 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0518 type = RESET_WAVES;
0519 break;
0520 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
0521 type = SAVE_WAVES;
0522 break;
0523 default:
0524 type = DRAIN_PIPE;
0525 break;
0526 }
0527
0528 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
0529
0530 end_jiffies = (utimeout * HZ / 1000) + jiffies;
0531 while (true) {
0532 temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0533 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0534 break;
0535 if (time_after(jiffies, end_jiffies)) {
0536 pr_err("cp queue pipe %d queue %d preemption failed\n",
0537 pipe_id, queue_id);
0538 release_queue(adev);
0539 return -ETIME;
0540 }
0541 usleep_range(500, 1000);
0542 }
0543
0544 release_queue(adev);
0545 return 0;
0546 }
0547
0548 static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
0549 unsigned int utimeout)
0550 {
0551 struct v10_sdma_mqd *m;
0552 uint32_t sdma_rlc_reg_offset;
0553 uint32_t temp;
0554 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0555
0556 m = get_sdma_mqd(mqd);
0557 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0558 m->sdma_queue_id);
0559
0560 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0561 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0562 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0563
0564 while (true) {
0565 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0566 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0567 break;
0568 if (time_after(jiffies, end_jiffies)) {
0569 pr_err("SDMA RLC not idle in %s\n", __func__);
0570 return -ETIME;
0571 }
0572 usleep_range(500, 1000);
0573 }
0574
0575 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0576 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0577 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0578 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0579
0580 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0581 m->sdmax_rlcx_rb_rptr_hi =
0582 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
0583
0584 return 0;
0585 }
0586
0587 static int wave_control_execute_v10_3(struct amdgpu_device *adev,
0588 uint32_t gfx_index_val,
0589 uint32_t sq_cmd)
0590 {
0591 uint32_t data = 0;
0592
0593 mutex_lock(&adev->grbm_idx_mutex);
0594
0595 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
0596 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
0597
0598 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0599 INSTANCE_BROADCAST_WRITES, 1);
0600 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0601 SA_BROADCAST_WRITES, 1);
0602 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0603 SE_BROADCAST_WRITES, 1);
0604
0605 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
0606 mutex_unlock(&adev->grbm_idx_mutex);
0607
0608 return 0;
0609 }
0610
0611 static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
0612 uint8_t vmid, uint16_t *p_pasid)
0613 {
0614 uint32_t value;
0615
0616 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
0617 + vmid);
0618 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0619
0620 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0621 }
0622
0623 static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
0624 uint32_t vmid, uint64_t page_table_base)
0625 {
0626
0627 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0628 }
0629
0630 static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
0631 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
0632 {
0633 lock_srbm(adev, 0, 0, 0, vmid);
0634
0635
0636
0637
0638 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
0639 lower_32_bits(tba_addr >> 8));
0640 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
0641 upper_32_bits(tba_addr >> 8) |
0642 (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
0643
0644
0645
0646
0647 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
0648 lower_32_bits(tma_addr >> 8));
0649 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
0650 upper_32_bits(tma_addr >> 8));
0651
0652 unlock_srbm(adev);
0653 }
0654
0655 #if 0
0656 uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev,
0657 uint32_t trap_debug_wave_launch_mode,
0658 uint32_t vmid)
0659 {
0660 uint32_t data = 0;
0661 uint32_t orig_wave_cntl_value;
0662 uint32_t orig_stall_vmid;
0663
0664 mutex_lock(&adev->grbm_idx_mutex);
0665
0666 orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
0667 0,
0668 mmSPI_GDBG_WAVE_CNTL));
0669 orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
0670 SPI_GDBG_WAVE_CNTL,
0671 STALL_VMID);
0672
0673 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
0674 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
0675
0676 data = 0;
0677 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
0678
0679 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
0680
0681 mutex_unlock(&adev->grbm_idx_mutex);
0682
0683 return 0;
0684 }
0685
0686 uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev)
0687 {
0688 mutex_lock(&adev->grbm_idx_mutex);
0689
0690 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
0691
0692 mutex_unlock(&adev->grbm_idx_mutex);
0693
0694 return 0;
0695 }
0696
0697 uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev,
0698 uint32_t trap_override,
0699 uint32_t trap_mask)
0700 {
0701 uint32_t data = 0;
0702
0703 mutex_lock(&adev->grbm_idx_mutex);
0704
0705 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
0706 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
0707 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
0708
0709 data = 0;
0710 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
0711 EXCP_EN, trap_mask);
0712 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
0713 REPLACE, trap_override);
0714 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
0715
0716 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
0717 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
0718 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
0719
0720 mutex_unlock(&adev->grbm_idx_mutex);
0721
0722 return 0;
0723 }
0724
0725 uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
0726 uint8_t wave_launch_mode,
0727 uint32_t vmid)
0728 {
0729 uint32_t data = 0;
0730 bool is_stall_mode;
0731 bool is_mode_set;
0732
0733 is_stall_mode = (wave_launch_mode == 4);
0734 is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
0735
0736 mutex_lock(&adev->grbm_idx_mutex);
0737
0738 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
0739 VMID_MASK, is_mode_set ? 1 << vmid : 0);
0740 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
0741 MODE, is_mode_set ? wave_launch_mode : 0);
0742 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
0743
0744 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
0745 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
0746 STALL_VMID, is_stall_mode ? 1 << vmid : 0);
0747 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
0748 STALL_RA, is_stall_mode ? 1 : 0);
0749 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
0750
0751 mutex_unlock(&adev->grbm_idx_mutex);
0752
0753 return 0;
0754 }
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767 void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
0768 uint32_t *wait_times)
0769
0770 {
0771 *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
0772 }
0773
0774 void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
0775 uint32_t wait_times,
0776 uint32_t grace_period,
0777 uint32_t *reg_offset,
0778 uint32_t *reg_data)
0779 {
0780 *reg_data = wait_times;
0781
0782 *reg_data = REG_SET_FIELD(*reg_data,
0783 CP_IQ_WAIT_TIME2,
0784 SCH_WAVE,
0785 grace_period);
0786
0787 *reg_offset = mmCP_IQ_WAIT_TIME2;
0788 }
0789 #endif
0790
0791 const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
0792 .program_sh_mem_settings = program_sh_mem_settings_v10_3,
0793 .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
0794 .init_interrupts = init_interrupts_v10_3,
0795 .hqd_load = hqd_load_v10_3,
0796 .hiq_mqd_load = hiq_mqd_load_v10_3,
0797 .hqd_sdma_load = hqd_sdma_load_v10_3,
0798 .hqd_dump = hqd_dump_v10_3,
0799 .hqd_sdma_dump = hqd_sdma_dump_v10_3,
0800 .hqd_is_occupied = hqd_is_occupied_v10_3,
0801 .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
0802 .hqd_destroy = hqd_destroy_v10_3,
0803 .hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
0804 .wave_control_execute = wave_control_execute_v10_3,
0805 .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
0806 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
0807 .program_trap_handler_settings = program_trap_handler_settings_v10_3,
0808 #if 0
0809 .enable_debug_trap = enable_debug_trap_v10_3,
0810 .disable_debug_trap = disable_debug_trap_v10_3,
0811 .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
0812 .set_wave_launch_mode = set_wave_launch_mode_v10_3,
0813 .get_iq_wait_times = get_iq_wait_times_v10_3,
0814 .build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
0815 #endif
0816 };