0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <linux/mmu_context.h>
0023 #include "amdgpu.h"
0024 #include "amdgpu_amdkfd.h"
0025 #include "gc/gc_11_0_0_offset.h"
0026 #include "gc/gc_11_0_0_sh_mask.h"
0027 #include "oss/osssys_6_0_0_offset.h"
0028 #include "oss/osssys_6_0_0_sh_mask.h"
0029 #include "soc15_common.h"
0030 #include "soc15d.h"
0031 #include "v11_structs.h"
0032 #include "soc21.h"
0033
0034 enum hqd_dequeue_request_type {
0035 NO_ACTION = 0,
0036 DRAIN_PIPE,
0037 RESET_WAVES,
0038 SAVE_WAVES
0039 };
0040
0041 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0042 uint32_t queue, uint32_t vmid)
0043 {
0044 mutex_lock(&adev->srbm_mutex);
0045 soc21_grbm_select(adev, mec, pipe, queue, vmid);
0046 }
0047
0048 static void unlock_srbm(struct amdgpu_device *adev)
0049 {
0050 soc21_grbm_select(adev, 0, 0, 0, 0);
0051 mutex_unlock(&adev->srbm_mutex);
0052 }
0053
0054 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0055 uint32_t queue_id)
0056 {
0057 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0058 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0059
0060 lock_srbm(adev, mec, pipe, queue_id, 0);
0061 }
0062
0063 static uint64_t get_queue_mask(struct amdgpu_device *adev,
0064 uint32_t pipe_id, uint32_t queue_id)
0065 {
0066 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
0067 queue_id;
0068
0069 return 1ull << bit;
0070 }
0071
0072 static void release_queue(struct amdgpu_device *adev)
0073 {
0074 unlock_srbm(adev);
0075 }
0076
0077 static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
0078 uint32_t sh_mem_config,
0079 uint32_t sh_mem_ape1_base,
0080 uint32_t sh_mem_ape1_limit,
0081 uint32_t sh_mem_bases)
0082 {
0083 lock_srbm(adev, 0, 0, 0, vmid);
0084
0085 WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
0086 WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
0087
0088 unlock_srbm(adev);
0089 }
0090
0091 static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
0092 unsigned int vmid)
0093 {
0094 uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
0095
0096
0097 pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
0098 vmid, pasid);
0099 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
0100
0101 return 0;
0102 }
0103
0104 static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
0105 {
0106 uint32_t mec;
0107 uint32_t pipe;
0108
0109 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0110 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0111
0112 lock_srbm(adev, mec, pipe, 0, 0);
0113
0114 WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
0115 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0116 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0117
0118 unlock_srbm(adev);
0119
0120 return 0;
0121 }
0122
0123 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0124 unsigned int engine_id,
0125 unsigned int queue_id)
0126 {
0127 uint32_t sdma_engine_reg_base = 0;
0128 uint32_t sdma_rlc_reg_offset;
0129
0130 switch (engine_id) {
0131 case 0:
0132 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0133 regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
0134 break;
0135 case 1:
0136 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
0137 regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
0138 break;
0139 default:
0140 BUG();
0141 }
0142
0143 sdma_rlc_reg_offset = sdma_engine_reg_base
0144 + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
0145
0146 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0147 queue_id, sdma_rlc_reg_offset);
0148
0149 return sdma_rlc_reg_offset;
0150 }
0151
0152 static inline struct v11_compute_mqd *get_mqd(void *mqd)
0153 {
0154 return (struct v11_compute_mqd *)mqd;
0155 }
0156
0157 static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
0158 {
0159 return (struct v11_sdma_mqd *)mqd;
0160 }
0161
0162 static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
0163 uint32_t queue_id, uint32_t __user *wptr,
0164 uint32_t wptr_shift, uint32_t wptr_mask,
0165 struct mm_struct *mm)
0166 {
0167 struct v11_compute_mqd *m;
0168 uint32_t *mqd_hqd;
0169 uint32_t reg, hqd_base, data;
0170
0171 m = get_mqd(mqd);
0172
0173 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
0174 acquire_queue(adev, pipe_id, queue_id);
0175
0176
0177 if (m->cp_hqd_vmid == 0) {
0178 uint32_t value, mec, pipe;
0179
0180 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0181 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0182
0183 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0184 mec, pipe, queue_id);
0185 value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
0186 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
0187 ((mec << 5) | (pipe << 3) | queue_id | 0x80));
0188 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
0189 }
0190
0191
0192 mqd_hqd = &m->cp_mqd_base_addr_lo;
0193 hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
0194
0195 for (reg = hqd_base;
0196 reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
0197 WREG32(reg, mqd_hqd[reg - hqd_base]);
0198
0199
0200
0201 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0202 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0203 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
0204
0205 if (wptr) {
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222 uint32_t queue_size =
0223 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
0224 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
0225 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
0226
0227 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
0228 guessed_wptr += queue_size;
0229 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
0230 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
0231
0232 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
0233 lower_32_bits(guessed_wptr));
0234 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
0235 upper_32_bits(guessed_wptr));
0236 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
0237 lower_32_bits((uint64_t)wptr));
0238 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
0239 upper_32_bits((uint64_t)wptr));
0240 pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
0241 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0242 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
0243 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0244 }
0245
0246
0247 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
0248 REG_SET_FIELD(m->cp_hqd_eop_rptr,
0249 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
0250
0251 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0252 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
0253
0254 release_queue(adev);
0255
0256 return 0;
0257 }
0258
0259 static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
0260 uint32_t pipe_id, uint32_t queue_id,
0261 uint32_t doorbell_off)
0262 {
0263 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0264 struct v11_compute_mqd *m;
0265 uint32_t mec, pipe;
0266 int r;
0267
0268 m = get_mqd(mqd);
0269
0270 acquire_queue(adev, pipe_id, queue_id);
0271
0272 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0273 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0274
0275 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0276 mec, pipe, queue_id);
0277
0278 spin_lock(&adev->gfx.kiq.ring_lock);
0279 r = amdgpu_ring_alloc(kiq_ring, 7);
0280 if (r) {
0281 pr_err("Failed to alloc KIQ (%d).\n", r);
0282 goto out_unlock;
0283 }
0284
0285 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0286 amdgpu_ring_write(kiq_ring,
0287 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
0288 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
0289 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
0290 PACKET3_MAP_QUEUES_PIPE(pipe) |
0291 PACKET3_MAP_QUEUES_ME((mec - 1)) |
0292 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
0293 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
0294 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
0295 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
0296 amdgpu_ring_write(kiq_ring,
0297 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
0298 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
0299 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
0300 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
0301 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
0302 amdgpu_ring_commit(kiq_ring);
0303
0304 out_unlock:
0305 spin_unlock(&adev->gfx.kiq.ring_lock);
0306 release_queue(adev);
0307
0308 return r;
0309 }
0310
0311 static int hqd_dump_v11(struct amdgpu_device *adev,
0312 uint32_t pipe_id, uint32_t queue_id,
0313 uint32_t (**dump)[2], uint32_t *n_regs)
0314 {
0315 uint32_t i = 0, reg;
0316 #define HQD_N_REGS 56
0317 #define DUMP_REG(addr) do { \
0318 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0319 break; \
0320 (*dump)[i][0] = (addr) << 2; \
0321 (*dump)[i++][1] = RREG32(addr); \
0322 } while (0)
0323
0324 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0325 if (*dump == NULL)
0326 return -ENOMEM;
0327
0328 acquire_queue(adev, pipe_id, queue_id);
0329
0330 for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
0331 reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
0332 DUMP_REG(reg);
0333
0334 release_queue(adev);
0335
0336 WARN_ON_ONCE(i != HQD_N_REGS);
0337 *n_regs = i;
0338
0339 return 0;
0340 }
0341
0342 static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
0343 uint32_t __user *wptr, struct mm_struct *mm)
0344 {
0345 struct v11_sdma_mqd *m;
0346 uint32_t sdma_rlc_reg_offset;
0347 unsigned long end_jiffies;
0348 uint32_t data;
0349 uint64_t data64;
0350 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0351
0352 m = get_sdma_mqd(mqd);
0353 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0354 m->sdma_queue_id);
0355
0356 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
0357 m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
0358
0359 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0360 while (true) {
0361 data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
0362 if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
0363 break;
0364 if (time_after(jiffies, end_jiffies)) {
0365 pr_err("SDMA RLC not idle in %s\n", __func__);
0366 return -ETIME;
0367 }
0368 usleep_range(500, 1000);
0369 }
0370
0371 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
0372 m->sdmax_rlcx_doorbell_offset);
0373
0374 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
0375 ENABLE, 1);
0376 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
0377 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
0378 m->sdmax_rlcx_rb_rptr);
0379 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
0380 m->sdmax_rlcx_rb_rptr_hi);
0381
0382 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
0383 if (read_user_wptr(mm, wptr64, data64)) {
0384 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
0385 lower_32_bits(data64));
0386 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
0387 upper_32_bits(data64));
0388 } else {
0389 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
0390 m->sdmax_rlcx_rb_rptr);
0391 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
0392 m->sdmax_rlcx_rb_rptr_hi);
0393 }
0394 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
0395
0396 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
0397 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
0398 m->sdmax_rlcx_rb_base_hi);
0399 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
0400 m->sdmax_rlcx_rb_rptr_addr_lo);
0401 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
0402 m->sdmax_rlcx_rb_rptr_addr_hi);
0403
0404 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
0405 RB_ENABLE, 1);
0406 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
0407
0408 return 0;
0409 }
0410
0411 static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
0412 uint32_t engine_id, uint32_t queue_id,
0413 uint32_t (**dump)[2], uint32_t *n_regs)
0414 {
0415 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0416 engine_id, queue_id);
0417 uint32_t i = 0, reg;
0418 #undef HQD_N_REGS
0419 #define HQD_N_REGS (7+11+1+12+12)
0420
0421 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
0422 if (*dump == NULL)
0423 return -ENOMEM;
0424
0425 for (reg = regSDMA0_QUEUE0_RB_CNTL;
0426 reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
0427 DUMP_REG(sdma_rlc_reg_offset + reg);
0428 for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
0429 reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
0430 DUMP_REG(sdma_rlc_reg_offset + reg);
0431 for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
0432 reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
0433 DUMP_REG(sdma_rlc_reg_offset + reg);
0434 for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
0435 reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
0436 DUMP_REG(sdma_rlc_reg_offset + reg);
0437 for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
0438 reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
0439 DUMP_REG(sdma_rlc_reg_offset + reg);
0440
0441 WARN_ON_ONCE(i != HQD_N_REGS);
0442 *n_regs = i;
0443
0444 return 0;
0445 }
0446
0447 static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
0448 uint32_t pipe_id, uint32_t queue_id)
0449 {
0450 uint32_t act;
0451 bool retval = false;
0452 uint32_t low, high;
0453
0454 acquire_queue(adev, pipe_id, queue_id);
0455 act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
0456 if (act) {
0457 low = lower_32_bits(queue_address >> 8);
0458 high = upper_32_bits(queue_address >> 8);
0459
0460 if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
0461 high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
0462 retval = true;
0463 }
0464 release_queue(adev);
0465 return retval;
0466 }
0467
0468 static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
0469 {
0470 struct v11_sdma_mqd *m;
0471 uint32_t sdma_rlc_reg_offset;
0472 uint32_t sdma_rlc_rb_cntl;
0473
0474 m = get_sdma_mqd(mqd);
0475 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0476 m->sdma_queue_id);
0477
0478 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
0479
0480 if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
0481 return true;
0482
0483 return false;
0484 }
0485
0486 static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
0487 enum kfd_preempt_type reset_type,
0488 unsigned int utimeout, uint32_t pipe_id,
0489 uint32_t queue_id)
0490 {
0491 enum hqd_dequeue_request_type type;
0492 unsigned long end_jiffies;
0493 uint32_t temp;
0494 struct v11_compute_mqd *m = get_mqd(mqd);
0495
0496 acquire_queue(adev, pipe_id, queue_id);
0497
0498 if (m->cp_hqd_vmid == 0)
0499 WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
0500
0501 switch (reset_type) {
0502 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0503 type = DRAIN_PIPE;
0504 break;
0505 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0506 type = RESET_WAVES;
0507 break;
0508 default:
0509 type = DRAIN_PIPE;
0510 break;
0511 }
0512
0513 WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
0514
0515 end_jiffies = (utimeout * HZ / 1000) + jiffies;
0516 while (true) {
0517 temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
0518 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0519 break;
0520 if (time_after(jiffies, end_jiffies)) {
0521 pr_err("cp queue pipe %d queue %d preemption failed\n",
0522 pipe_id, queue_id);
0523 release_queue(adev);
0524 return -ETIME;
0525 }
0526 usleep_range(500, 1000);
0527 }
0528
0529 release_queue(adev);
0530 return 0;
0531 }
0532
0533 static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
0534 unsigned int utimeout)
0535 {
0536 struct v11_sdma_mqd *m;
0537 uint32_t sdma_rlc_reg_offset;
0538 uint32_t temp;
0539 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0540
0541 m = get_sdma_mqd(mqd);
0542 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0543 m->sdma_queue_id);
0544
0545 temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
0546 temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
0547 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
0548
0549 while (true) {
0550 temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
0551 if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
0552 break;
0553 if (time_after(jiffies, end_jiffies)) {
0554 pr_err("SDMA RLC not idle in %s\n", __func__);
0555 return -ETIME;
0556 }
0557 usleep_range(500, 1000);
0558 }
0559
0560 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
0561 WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
0562 RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
0563 SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
0564
0565 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
0566 m->sdmax_rlcx_rb_rptr_hi =
0567 RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
0568
0569 return 0;
0570 }
0571
0572 static int wave_control_execute_v11(struct amdgpu_device *adev,
0573 uint32_t gfx_index_val,
0574 uint32_t sq_cmd)
0575 {
0576 uint32_t data = 0;
0577
0578 mutex_lock(&adev->grbm_idx_mutex);
0579
0580 WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
0581 WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
0582
0583 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0584 INSTANCE_BROADCAST_WRITES, 1);
0585 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0586 SA_BROADCAST_WRITES, 1);
0587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0588 SE_BROADCAST_WRITES, 1);
0589
0590 WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
0591 mutex_unlock(&adev->grbm_idx_mutex);
0592
0593 return 0;
0594 }
0595
0596 static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
0597 uint32_t vmid, uint64_t page_table_base)
0598 {
0599 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0600 pr_err("trying to set page table base for wrong VMID %u\n",
0601 vmid);
0602 return;
0603 }
0604
0605
0606 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0607 }
0608
0609 const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
0610 .program_sh_mem_settings = program_sh_mem_settings_v11,
0611 .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
0612 .init_interrupts = init_interrupts_v11,
0613 .hqd_load = hqd_load_v11,
0614 .hiq_mqd_load = hiq_mqd_load_v11,
0615 .hqd_sdma_load = hqd_sdma_load_v11,
0616 .hqd_dump = hqd_dump_v11,
0617 .hqd_sdma_dump = hqd_sdma_dump_v11,
0618 .hqd_is_occupied = hqd_is_occupied_v11,
0619 .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
0620 .hqd_destroy = hqd_destroy_v11,
0621 .hqd_sdma_destroy = hqd_sdma_destroy_v11,
0622 .wave_control_execute = wave_control_execute_v11,
0623 .get_atc_vmid_pasid_mapping_info = NULL,
0624 .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
0625 };