0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include "amdgpu.h"
0023 #include "amdgpu_amdkfd.h"
0024 #include "gc/gc_9_0_offset.h"
0025 #include "gc/gc_9_0_sh_mask.h"
0026 #include "vega10_enum.h"
0027 #include "sdma0/sdma0_4_0_offset.h"
0028 #include "sdma0/sdma0_4_0_sh_mask.h"
0029 #include "sdma1/sdma1_4_0_offset.h"
0030 #include "sdma1/sdma1_4_0_sh_mask.h"
0031 #include "athub/athub_1_0_offset.h"
0032 #include "athub/athub_1_0_sh_mask.h"
0033 #include "oss/osssys_4_0_offset.h"
0034 #include "oss/osssys_4_0_sh_mask.h"
0035 #include "soc15_common.h"
0036 #include "v9_structs.h"
0037 #include "soc15.h"
0038 #include "soc15d.h"
0039 #include "gfx_v9_0.h"
0040 #include "amdgpu_amdkfd_gfx_v9.h"
0041
0042 enum hqd_dequeue_request_type {
0043 NO_ACTION = 0,
0044 DRAIN_PIPE,
0045 RESET_WAVES,
0046 SAVE_WAVES
0047 };
0048
0049 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
0050 uint32_t queue, uint32_t vmid)
0051 {
0052 mutex_lock(&adev->srbm_mutex);
0053 soc15_grbm_select(adev, mec, pipe, queue, vmid);
0054 }
0055
0056 static void unlock_srbm(struct amdgpu_device *adev)
0057 {
0058 soc15_grbm_select(adev, 0, 0, 0, 0);
0059 mutex_unlock(&adev->srbm_mutex);
0060 }
0061
0062 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
0063 uint32_t queue_id)
0064 {
0065 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0066 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0067
0068 lock_srbm(adev, mec, pipe, queue_id, 0);
0069 }
0070
0071 static uint64_t get_queue_mask(struct amdgpu_device *adev,
0072 uint32_t pipe_id, uint32_t queue_id)
0073 {
0074 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
0075 queue_id;
0076
0077 return 1ull << bit;
0078 }
0079
0080 static void release_queue(struct amdgpu_device *adev)
0081 {
0082 unlock_srbm(adev);
0083 }
0084
0085 void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
0086 uint32_t sh_mem_config,
0087 uint32_t sh_mem_ape1_base,
0088 uint32_t sh_mem_ape1_limit,
0089 uint32_t sh_mem_bases)
0090 {
0091 lock_srbm(adev, 0, 0, 0, vmid);
0092
0093 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
0094 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
0095
0096
0097 unlock_srbm(adev);
0098 }
0099
0100 int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
0101 unsigned int vmid)
0102 {
0103
0104
0105
0106
0107
0108
0109
0110 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
0111 ATC_VMID0_PASID_MAPPING__VALID_MASK;
0112
0113
0114
0115
0116
0117
0118
0119 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
0120 pasid_mapping);
0121
0122 while (!(RREG32(SOC15_REG_OFFSET(
0123 ATHUB, 0,
0124 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
0125 (1U << vmid)))
0126 cpu_relax();
0127
0128 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
0129 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
0130 1U << vmid);
0131
0132
0133 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
0134 pasid_mapping);
0135
0136 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
0137 pasid_mapping);
0138
0139 while (!(RREG32(SOC15_REG_OFFSET(
0140 ATHUB, 0,
0141 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
0142 (1U << (vmid + 16))))
0143 cpu_relax();
0144
0145 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
0146 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
0147 1U << (vmid + 16));
0148
0149
0150 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
0151 pasid_mapping);
0152 return 0;
0153 }
0154
0155
0156
0157
0158
0159 int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
0160 {
0161 uint32_t mec;
0162 uint32_t pipe;
0163
0164 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0165 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0166
0167 lock_srbm(adev, mec, pipe, 0, 0);
0168
0169 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
0170 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
0171 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
0172
0173 unlock_srbm(adev);
0174
0175 return 0;
0176 }
0177
0178 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
0179 unsigned int engine_id,
0180 unsigned int queue_id)
0181 {
0182 uint32_t sdma_engine_reg_base = 0;
0183 uint32_t sdma_rlc_reg_offset;
0184
0185 switch (engine_id) {
0186 default:
0187 dev_warn(adev->dev,
0188 "Invalid sdma engine id (%d), using engine id 0\n",
0189 engine_id);
0190 fallthrough;
0191 case 0:
0192 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
0193 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0194 break;
0195 case 1:
0196 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
0197 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
0198 break;
0199 }
0200
0201 sdma_rlc_reg_offset = sdma_engine_reg_base
0202 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
0203
0204 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
0205 queue_id, sdma_rlc_reg_offset);
0206
0207 return sdma_rlc_reg_offset;
0208 }
0209
0210 static inline struct v9_mqd *get_mqd(void *mqd)
0211 {
0212 return (struct v9_mqd *)mqd;
0213 }
0214
0215 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
0216 {
0217 return (struct v9_sdma_mqd *)mqd;
0218 }
0219
0220 int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
0221 uint32_t pipe_id, uint32_t queue_id,
0222 uint32_t __user *wptr, uint32_t wptr_shift,
0223 uint32_t wptr_mask, struct mm_struct *mm)
0224 {
0225 struct v9_mqd *m;
0226 uint32_t *mqd_hqd;
0227 uint32_t reg, hqd_base, data;
0228
0229 m = get_mqd(mqd);
0230
0231 acquire_queue(adev, pipe_id, queue_id);
0232
0233
0234 mqd_hqd = &m->cp_mqd_base_addr_lo;
0235 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0236
0237 for (reg = hqd_base;
0238 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0239 WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
0240
0241
0242
0243 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
0244 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
0245 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
0246
0247 if (wptr) {
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264 uint32_t queue_size =
0265 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
0266 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
0267 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
0268
0269 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
0270 guessed_wptr += queue_size;
0271 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
0272 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
0273
0274 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
0275 lower_32_bits(guessed_wptr));
0276 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
0277 upper_32_bits(guessed_wptr));
0278 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
0279 lower_32_bits((uintptr_t)wptr));
0280 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
0281 upper_32_bits((uintptr_t)wptr));
0282 WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
0283 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
0284 }
0285
0286
0287 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
0288 REG_SET_FIELD(m->cp_hqd_eop_rptr,
0289 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
0290
0291 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
0292 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
0293
0294 release_queue(adev);
0295
0296 return 0;
0297 }
0298
0299 int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
0300 uint32_t pipe_id, uint32_t queue_id,
0301 uint32_t doorbell_off)
0302 {
0303 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
0304 struct v9_mqd *m;
0305 uint32_t mec, pipe;
0306 int r;
0307
0308 m = get_mqd(mqd);
0309
0310 acquire_queue(adev, pipe_id, queue_id);
0311
0312 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
0313 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
0314
0315 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
0316 mec, pipe, queue_id);
0317
0318 spin_lock(&adev->gfx.kiq.ring_lock);
0319 r = amdgpu_ring_alloc(kiq_ring, 7);
0320 if (r) {
0321 pr_err("Failed to alloc KIQ (%d).\n", r);
0322 goto out_unlock;
0323 }
0324
0325 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0326 amdgpu_ring_write(kiq_ring,
0327 PACKET3_MAP_QUEUES_QUEUE_SEL(0) |
0328 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) |
0329 PACKET3_MAP_QUEUES_QUEUE(queue_id) |
0330 PACKET3_MAP_QUEUES_PIPE(pipe) |
0331 PACKET3_MAP_QUEUES_ME((mec - 1)) |
0332 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
0333 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
0334 PACKET3_MAP_QUEUES_ENGINE_SEL(1) |
0335 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
0336 amdgpu_ring_write(kiq_ring,
0337 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
0338 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
0339 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
0340 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
0341 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
0342 amdgpu_ring_commit(kiq_ring);
0343
0344 out_unlock:
0345 spin_unlock(&adev->gfx.kiq.ring_lock);
0346 release_queue(adev);
0347
0348 return r;
0349 }
0350
0351 int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
0352 uint32_t pipe_id, uint32_t queue_id,
0353 uint32_t (**dump)[2], uint32_t *n_regs)
0354 {
0355 uint32_t i = 0, reg;
0356 #define HQD_N_REGS 56
0357 #define DUMP_REG(addr) do { \
0358 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
0359 break; \
0360 (*dump)[i][0] = (addr) << 2; \
0361 (*dump)[i++][1] = RREG32(addr); \
0362 } while (0)
0363
0364 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0365 if (*dump == NULL)
0366 return -ENOMEM;
0367
0368 acquire_queue(adev, pipe_id, queue_id);
0369
0370 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
0371 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
0372 DUMP_REG(reg);
0373
0374 release_queue(adev);
0375
0376 WARN_ON_ONCE(i != HQD_N_REGS);
0377 *n_regs = i;
0378
0379 return 0;
0380 }
0381
0382 static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
0383 uint32_t __user *wptr, struct mm_struct *mm)
0384 {
0385 struct v9_sdma_mqd *m;
0386 uint32_t sdma_rlc_reg_offset;
0387 unsigned long end_jiffies;
0388 uint32_t data;
0389 uint64_t data64;
0390 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
0391
0392 m = get_sdma_mqd(mqd);
0393 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0394 m->sdma_queue_id);
0395
0396 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0397 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
0398
0399 end_jiffies = msecs_to_jiffies(2000) + jiffies;
0400 while (true) {
0401 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0402 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0403 break;
0404 if (time_after(jiffies, end_jiffies)) {
0405 pr_err("SDMA RLC not idle in %s\n", __func__);
0406 return -ETIME;
0407 }
0408 usleep_range(500, 1000);
0409 }
0410
0411 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
0412 m->sdmax_rlcx_doorbell_offset);
0413
0414 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
0415 ENABLE, 1);
0416 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
0417 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
0418 m->sdmax_rlcx_rb_rptr);
0419 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
0420 m->sdmax_rlcx_rb_rptr_hi);
0421
0422 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
0423 if (read_user_wptr(mm, wptr64, data64)) {
0424 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0425 lower_32_bits(data64));
0426 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0427 upper_32_bits(data64));
0428 } else {
0429 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
0430 m->sdmax_rlcx_rb_rptr);
0431 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
0432 m->sdmax_rlcx_rb_rptr_hi);
0433 }
0434 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
0435
0436 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
0437 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
0438 m->sdmax_rlcx_rb_base_hi);
0439 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
0440 m->sdmax_rlcx_rb_rptr_addr_lo);
0441 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
0442 m->sdmax_rlcx_rb_rptr_addr_hi);
0443
0444 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
0445 RB_ENABLE, 1);
0446 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
0447
0448 return 0;
0449 }
0450
0451 static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
0452 uint32_t engine_id, uint32_t queue_id,
0453 uint32_t (**dump)[2], uint32_t *n_regs)
0454 {
0455 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
0456 engine_id, queue_id);
0457 uint32_t i = 0, reg;
0458 #undef HQD_N_REGS
0459 #define HQD_N_REGS (19+6+7+10)
0460
0461 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
0462 if (*dump == NULL)
0463 return -ENOMEM;
0464
0465 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
0466 DUMP_REG(sdma_rlc_reg_offset + reg);
0467 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
0468 DUMP_REG(sdma_rlc_reg_offset + reg);
0469 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
0470 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
0471 DUMP_REG(sdma_rlc_reg_offset + reg);
0472 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
0473 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
0474 DUMP_REG(sdma_rlc_reg_offset + reg);
0475
0476 WARN_ON_ONCE(i != HQD_N_REGS);
0477 *n_regs = i;
0478
0479 return 0;
0480 }
0481
0482 bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
0483 uint64_t queue_address, uint32_t pipe_id,
0484 uint32_t queue_id)
0485 {
0486 uint32_t act;
0487 bool retval = false;
0488 uint32_t low, high;
0489
0490 acquire_queue(adev, pipe_id, queue_id);
0491 act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0492 if (act) {
0493 low = lower_32_bits(queue_address >> 8);
0494 high = upper_32_bits(queue_address >> 8);
0495
0496 if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
0497 high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
0498 retval = true;
0499 }
0500 release_queue(adev);
0501 return retval;
0502 }
0503
0504 static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
0505 {
0506 struct v9_sdma_mqd *m;
0507 uint32_t sdma_rlc_reg_offset;
0508 uint32_t sdma_rlc_rb_cntl;
0509
0510 m = get_sdma_mqd(mqd);
0511 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0512 m->sdma_queue_id);
0513
0514 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0515
0516 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
0517 return true;
0518
0519 return false;
0520 }
0521
0522 int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
0523 enum kfd_preempt_type reset_type,
0524 unsigned int utimeout, uint32_t pipe_id,
0525 uint32_t queue_id)
0526 {
0527 enum hqd_dequeue_request_type type;
0528 unsigned long end_jiffies;
0529 uint32_t temp;
0530 struct v9_mqd *m = get_mqd(mqd);
0531
0532 if (amdgpu_in_reset(adev))
0533 return -EIO;
0534
0535 acquire_queue(adev, pipe_id, queue_id);
0536
0537 if (m->cp_hqd_vmid == 0)
0538 WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
0539
0540 switch (reset_type) {
0541 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
0542 type = DRAIN_PIPE;
0543 break;
0544 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
0545 type = RESET_WAVES;
0546 break;
0547 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
0548 type = SAVE_WAVES;
0549 break;
0550 default:
0551 type = DRAIN_PIPE;
0552 break;
0553 }
0554
0555 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
0556
0557 end_jiffies = (utimeout * HZ / 1000) + jiffies;
0558 while (true) {
0559 temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
0560 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
0561 break;
0562 if (time_after(jiffies, end_jiffies)) {
0563 pr_err("cp queue preemption time out.\n");
0564 release_queue(adev);
0565 return -ETIME;
0566 }
0567 usleep_range(500, 1000);
0568 }
0569
0570 release_queue(adev);
0571 return 0;
0572 }
0573
0574 static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
0575 unsigned int utimeout)
0576 {
0577 struct v9_sdma_mqd *m;
0578 uint32_t sdma_rlc_reg_offset;
0579 uint32_t temp;
0580 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
0581
0582 m = get_sdma_mqd(mqd);
0583 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
0584 m->sdma_queue_id);
0585
0586 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
0587 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
0588 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
0589
0590 while (true) {
0591 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
0592 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
0593 break;
0594 if (time_after(jiffies, end_jiffies)) {
0595 pr_err("SDMA RLC not idle in %s\n", __func__);
0596 return -ETIME;
0597 }
0598 usleep_range(500, 1000);
0599 }
0600
0601 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
0602 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
0603 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
0604 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
0605
0606 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
0607 m->sdmax_rlcx_rb_rptr_hi =
0608 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
0609
0610 return 0;
0611 }
0612
0613 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
0614 uint8_t vmid, uint16_t *p_pasid)
0615 {
0616 uint32_t value;
0617
0618 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
0619 + vmid);
0620 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
0621
0622 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
0623 }
0624
0625 int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
0626 uint32_t gfx_index_val,
0627 uint32_t sq_cmd)
0628 {
0629 uint32_t data = 0;
0630
0631 mutex_lock(&adev->grbm_idx_mutex);
0632
0633 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
0634 WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
0635
0636 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0637 INSTANCE_BROADCAST_WRITES, 1);
0638 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0639 SH_BROADCAST_WRITES, 1);
0640 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
0641 SE_BROADCAST_WRITES, 1);
0642
0643 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
0644 mutex_unlock(&adev->grbm_idx_mutex);
0645
0646 return 0;
0647 }
0648
0649 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
0650 uint32_t vmid, uint64_t page_table_base)
0651 {
0652 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
0653 pr_err("trying to set page table base for wrong VMID %u\n",
0654 vmid);
0655 return;
0656 }
0657
0658 adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0659
0660 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
0661 }
0662
0663 static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
0664 {
0665 mutex_lock(&adev->srbm_mutex);
0666 mutex_lock(&adev->grbm_idx_mutex);
0667
0668 }
0669
0670 static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
0671 {
0672 mutex_unlock(&adev->grbm_idx_mutex);
0673 mutex_unlock(&adev->srbm_mutex);
0674 }
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687 static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
0688 int *wave_cnt, int *vmid)
0689 {
0690 int pipe_idx;
0691 int queue_slot;
0692 unsigned int reg_val;
0693
0694
0695
0696
0697
0698
0699 *vmid = 0xFF;
0700 *wave_cnt = 0;
0701 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
0702 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
0703 soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
0704 reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
0705 queue_slot);
0706 *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
0707 if (*wave_cnt != 0)
0708 *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
0709 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
0710 }
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758 void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
0759 int *pasid_wave_cnt, int *max_waves_per_cu)
0760 {
0761 int qidx;
0762 int vmid;
0763 int se_idx;
0764 int sh_idx;
0765 int se_cnt;
0766 int sh_cnt;
0767 int wave_cnt;
0768 int queue_map;
0769 int pasid_tmp;
0770 int max_queue_cnt;
0771 int vmid_wave_cnt = 0;
0772 DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
0773
0774 lock_spi_csq_mutexes(adev);
0775 soc15_grbm_select(adev, 1, 0, 0, 0);
0776
0777
0778
0779
0780
0781 bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
0782 KGD_MAX_QUEUES);
0783 max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
0784 adev->gfx.mec.num_queue_per_pipe;
0785 sh_cnt = adev->gfx.config.max_sh_per_se;
0786 se_cnt = adev->gfx.config.max_shader_engines;
0787 for (se_idx = 0; se_idx < se_cnt; se_idx++) {
0788 for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
0789
0790 gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
0791 queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
0792
0793
0794
0795
0796
0797
0798
0799 for (qidx = 0; qidx < max_queue_cnt; qidx++) {
0800
0801
0802
0803
0804 if (!test_bit(qidx, cp_queue_bitmap))
0805 continue;
0806
0807 if (!(queue_map & (1 << qidx)))
0808 continue;
0809
0810
0811 get_wave_count(adev, qidx, &wave_cnt, &vmid);
0812 if (wave_cnt != 0) {
0813 pasid_tmp =
0814 RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
0815 mmIH_VMID_0_LUT) + vmid);
0816 if (pasid_tmp == pasid)
0817 vmid_wave_cnt += wave_cnt;
0818 }
0819 }
0820 }
0821 }
0822
0823 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
0824 soc15_grbm_select(adev, 0, 0, 0, 0);
0825 unlock_spi_csq_mutexes(adev);
0826
0827
0828 *pasid_wave_cnt = vmid_wave_cnt;
0829 *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
0830 adev->gfx.cu_info.max_waves_per_simd;
0831 }
0832
0833 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
0834 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
0835 {
0836 lock_srbm(adev, 0, 0, 0, vmid);
0837
0838
0839
0840
0841 WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
0842 lower_32_bits(tba_addr >> 8));
0843 WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
0844 upper_32_bits(tba_addr >> 8));
0845
0846
0847
0848
0849 WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
0850 lower_32_bits(tma_addr >> 8));
0851 WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI,
0852 upper_32_bits(tma_addr >> 8));
0853
0854 unlock_srbm(adev);
0855 }
0856
0857 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
0858 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
0859 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
0860 .init_interrupts = kgd_gfx_v9_init_interrupts,
0861 .hqd_load = kgd_gfx_v9_hqd_load,
0862 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
0863 .hqd_sdma_load = kgd_hqd_sdma_load,
0864 .hqd_dump = kgd_gfx_v9_hqd_dump,
0865 .hqd_sdma_dump = kgd_hqd_sdma_dump,
0866 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
0867 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
0868 .hqd_destroy = kgd_gfx_v9_hqd_destroy,
0869 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
0870 .wave_control_execute = kgd_gfx_v9_wave_control_execute,
0871 .get_atc_vmid_pasid_mapping_info =
0872 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
0873 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
0874 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
0875 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
0876 };