0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/printk.h>
0026 #include <linux/slab.h>
0027 #include <linux/uaccess.h>
0028 #include "kfd_priv.h"
0029 #include "kfd_mqd_manager.h"
0030 #include "v9_structs.h"
0031 #include "gc/gc_9_0_offset.h"
0032 #include "gc/gc_9_0_sh_mask.h"
0033 #include "sdma0/sdma0_4_0_sh_mask.h"
0034 #include "amdgpu_amdkfd.h"
0035
0036 static inline struct v9_mqd *get_mqd(void *mqd)
0037 {
0038 return (struct v9_mqd *)mqd;
0039 }
0040
0041 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
0042 {
0043 return (struct v9_sdma_mqd *)mqd;
0044 }
0045
0046 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
0047 struct mqd_update_info *minfo)
0048 {
0049 struct v9_mqd *m;
0050 uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
0051
0052 if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
0053 !minfo->cu_mask.ptr)
0054 return;
0055
0056 mqd_symmetrically_map_cu_mask(mm,
0057 minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
0058
0059 m = get_mqd(mqd);
0060 m->compute_static_thread_mgmt_se0 = se_mask[0];
0061 m->compute_static_thread_mgmt_se1 = se_mask[1];
0062 m->compute_static_thread_mgmt_se2 = se_mask[2];
0063 m->compute_static_thread_mgmt_se3 = se_mask[3];
0064 m->compute_static_thread_mgmt_se4 = se_mask[4];
0065 m->compute_static_thread_mgmt_se5 = se_mask[5];
0066 m->compute_static_thread_mgmt_se6 = se_mask[6];
0067 m->compute_static_thread_mgmt_se7 = se_mask[7];
0068
0069 pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
0070 m->compute_static_thread_mgmt_se0,
0071 m->compute_static_thread_mgmt_se1,
0072 m->compute_static_thread_mgmt_se2,
0073 m->compute_static_thread_mgmt_se3,
0074 m->compute_static_thread_mgmt_se4,
0075 m->compute_static_thread_mgmt_se5,
0076 m->compute_static_thread_mgmt_se6,
0077 m->compute_static_thread_mgmt_se7);
0078 }
0079
0080 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
0081 {
0082 m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
0083 m->cp_hqd_queue_priority = q->priority;
0084 }
0085
0086 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
0087 struct queue_properties *q)
0088 {
0089 int retval;
0090 struct kfd_mem_obj *mqd_mem_obj = NULL;
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108 if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
0109 mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
0110 if (!mqd_mem_obj)
0111 return NULL;
0112 retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev,
0113 ALIGN(q->ctl_stack_size, PAGE_SIZE) +
0114 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
0115 &(mqd_mem_obj->gtt_mem),
0116 &(mqd_mem_obj->gpu_addr),
0117 (void *)&(mqd_mem_obj->cpu_ptr), true);
0118 } else {
0119 retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
0120 &mqd_mem_obj);
0121 }
0122
0123 if (retval) {
0124 kfree(mqd_mem_obj);
0125 return NULL;
0126 }
0127
0128 return mqd_mem_obj;
0129
0130 }
0131
0132 static void init_mqd(struct mqd_manager *mm, void **mqd,
0133 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0134 struct queue_properties *q)
0135 {
0136 uint64_t addr;
0137 struct v9_mqd *m;
0138
0139 m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
0140 addr = mqd_mem_obj->gpu_addr;
0141
0142 memset(m, 0, sizeof(struct v9_mqd));
0143
0144 m->header = 0xC0310800;
0145 m->compute_pipelinestat_enable = 1;
0146 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
0147 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
0148 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
0149 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
0150 m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
0151 m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
0152 m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
0153 m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
0154
0155 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0156 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
0157
0158 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
0159
0160 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
0161 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
0162
0163 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
0164 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
0165 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
0166
0167 if (q->format == KFD_QUEUE_FORMAT_AQL) {
0168 m->cp_hqd_aql_control =
0169 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
0170 }
0171
0172 if (q->tba_addr) {
0173 m->compute_pgm_rsrc2 |=
0174 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
0175 }
0176
0177 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
0178 m->cp_hqd_persistent_state |=
0179 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
0180 m->cp_hqd_ctx_save_base_addr_lo =
0181 lower_32_bits(q->ctx_save_restore_area_address);
0182 m->cp_hqd_ctx_save_base_addr_hi =
0183 upper_32_bits(q->ctx_save_restore_area_address);
0184 m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
0185 m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
0186 m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
0187 m->cp_hqd_wg_state_offset = q->ctl_stack_size;
0188 }
0189
0190 *mqd = m;
0191 if (gart_addr)
0192 *gart_addr = addr;
0193 mm->update_mqd(mm, m, q, NULL);
0194 }
0195
0196 static int load_mqd(struct mqd_manager *mm, void *mqd,
0197 uint32_t pipe_id, uint32_t queue_id,
0198 struct queue_properties *p, struct mm_struct *mms)
0199 {
0200
0201 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
0202
0203 return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
0204 (uint32_t __user *)p->write_ptr,
0205 wptr_shift, 0, mms);
0206 }
0207
0208 static void update_mqd(struct mqd_manager *mm, void *mqd,
0209 struct queue_properties *q,
0210 struct mqd_update_info *minfo)
0211 {
0212 struct v9_mqd *m;
0213
0214 m = get_mqd(mqd);
0215
0216 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
0217 m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
0218 pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
0219
0220 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
0221 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
0222
0223 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0224 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0225 m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
0226 m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
0227
0228 m->cp_hqd_pq_doorbell_control =
0229 q->doorbell_off <<
0230 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
0231 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
0232 m->cp_hqd_pq_doorbell_control);
0233
0234 m->cp_hqd_ib_control =
0235 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
0236 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
0237
0238
0239
0240
0241
0242
0243
0244
0245 m->cp_hqd_eop_control = min(0xA,
0246 order_base_2(q->eop_ring_buffer_size / 4) - 1);
0247 m->cp_hqd_eop_base_addr_lo =
0248 lower_32_bits(q->eop_ring_buffer_address >> 8);
0249 m->cp_hqd_eop_base_addr_hi =
0250 upper_32_bits(q->eop_ring_buffer_address >> 8);
0251
0252 m->cp_hqd_iq_timer = 0;
0253
0254 m->cp_hqd_vmid = q->vmid;
0255
0256 if (q->format == KFD_QUEUE_FORMAT_AQL) {
0257 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
0258 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
0259 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
0260 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
0261 m->cp_hqd_pq_doorbell_control |= 1 <<
0262 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
0263 }
0264 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
0265 m->cp_hqd_ctx_save_control = 0;
0266
0267 update_cu_mask(mm, mqd, minfo);
0268 set_priority(m, q);
0269
0270 q->is_active = QUEUE_IS_ACTIVE(*q);
0271 }
0272
0273
0274 static uint32_t read_doorbell_id(void *mqd)
0275 {
0276 struct v9_mqd *m = (struct v9_mqd *)mqd;
0277
0278 return m->queue_doorbell_id0;
0279 }
0280
0281 static int get_wave_state(struct mqd_manager *mm, void *mqd,
0282 void __user *ctl_stack,
0283 u32 *ctl_stack_used_size,
0284 u32 *save_area_used_size)
0285 {
0286 struct v9_mqd *m;
0287
0288
0289 void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
0290
0291 m = get_mqd(mqd);
0292
0293 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
0294 m->cp_hqd_cntl_stack_offset;
0295 *save_area_used_size = m->cp_hqd_wg_state_offset -
0296 m->cp_hqd_cntl_stack_size;
0297
0298 if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
0299 return -EFAULT;
0300
0301 return 0;
0302 }
0303
0304 static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
0305 {
0306 struct v9_mqd *m = get_mqd(mqd);
0307
0308 *ctl_stack_size = m->cp_hqd_cntl_stack_size;
0309 }
0310
0311 static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
0312 {
0313 struct v9_mqd *m;
0314
0315 void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
0316
0317 m = get_mqd(mqd);
0318
0319 memcpy(mqd_dst, m, sizeof(struct v9_mqd));
0320 memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
0321 }
0322
0323 static void restore_mqd(struct mqd_manager *mm, void **mqd,
0324 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0325 struct queue_properties *qp,
0326 const void *mqd_src,
0327 const void *ctl_stack_src, u32 ctl_stack_size)
0328 {
0329 uint64_t addr;
0330 struct v9_mqd *m;
0331 void *ctl_stack;
0332
0333 m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
0334 addr = mqd_mem_obj->gpu_addr;
0335
0336 memcpy(m, mqd_src, sizeof(*m));
0337
0338 *mqd = m;
0339 if (gart_addr)
0340 *gart_addr = addr;
0341
0342
0343 ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
0344 memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
0345
0346 m->cp_hqd_pq_doorbell_control =
0347 qp->doorbell_off <<
0348 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
0349 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
0350 m->cp_hqd_pq_doorbell_control);
0351
0352 qp->is_active = 0;
0353 }
0354
0355 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
0356 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0357 struct queue_properties *q)
0358 {
0359 struct v9_mqd *m;
0360
0361 init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
0362
0363 m = get_mqd(*mqd);
0364
0365 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
0366 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
0367 }
0368
0369 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
0370 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0371 struct queue_properties *q)
0372 {
0373 struct v9_sdma_mqd *m;
0374
0375 m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
0376
0377 memset(m, 0, sizeof(struct v9_sdma_mqd));
0378
0379 *mqd = m;
0380 if (gart_addr)
0381 *gart_addr = mqd_mem_obj->gpu_addr;
0382
0383 mm->update_mqd(mm, m, q, NULL);
0384 }
0385
0386 #define SDMA_RLC_DUMMY_DEFAULT 0xf
0387
0388 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
0389 struct queue_properties *q,
0390 struct mqd_update_info *minfo)
0391 {
0392 struct v9_sdma_mqd *m;
0393
0394 m = get_sdma_mqd(mqd);
0395 m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
0396 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
0397 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
0398 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
0399 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
0400
0401 m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
0402 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
0403 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0404 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0405 m->sdmax_rlcx_doorbell_offset =
0406 q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
0407
0408 m->sdma_engine_id = q->sdma_engine_id;
0409 m->sdma_queue_id = q->sdma_queue_id;
0410 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
0411
0412 q->is_active = QUEUE_IS_ACTIVE(*q);
0413 }
0414
0415 static void checkpoint_mqd_sdma(struct mqd_manager *mm,
0416 void *mqd,
0417 void *mqd_dst,
0418 void *ctl_stack_dst)
0419 {
0420 struct v9_sdma_mqd *m;
0421
0422 m = get_sdma_mqd(mqd);
0423
0424 memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
0425 }
0426
0427 static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
0428 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0429 struct queue_properties *qp,
0430 const void *mqd_src,
0431 const void *ctl_stack_src, const u32 ctl_stack_size)
0432 {
0433 uint64_t addr;
0434 struct v9_sdma_mqd *m;
0435
0436 m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
0437 addr = mqd_mem_obj->gpu_addr;
0438
0439 memcpy(m, mqd_src, sizeof(*m));
0440
0441 m->sdmax_rlcx_doorbell_offset =
0442 qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
0443
0444 *mqd = m;
0445 if (gart_addr)
0446 *gart_addr = addr;
0447
0448 qp->is_active = 0;
0449 }
0450
0451 #if defined(CONFIG_DEBUG_FS)
0452
0453 static int debugfs_show_mqd(struct seq_file *m, void *data)
0454 {
0455 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0456 data, sizeof(struct v9_mqd), false);
0457 return 0;
0458 }
0459
0460 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
0461 {
0462 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0463 data, sizeof(struct v9_sdma_mqd), false);
0464 return 0;
0465 }
0466
0467 #endif
0468
0469 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
0470 struct kfd_dev *dev)
0471 {
0472 struct mqd_manager *mqd;
0473
0474 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
0475 return NULL;
0476
0477 mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
0478 if (!mqd)
0479 return NULL;
0480
0481 mqd->dev = dev;
0482
0483 switch (type) {
0484 case KFD_MQD_TYPE_CP:
0485 mqd->allocate_mqd = allocate_mqd;
0486 mqd->init_mqd = init_mqd;
0487 mqd->free_mqd = kfd_free_mqd_cp;
0488 mqd->load_mqd = load_mqd;
0489 mqd->update_mqd = update_mqd;
0490 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0491 mqd->is_occupied = kfd_is_occupied_cp;
0492 mqd->get_wave_state = get_wave_state;
0493 mqd->get_checkpoint_info = get_checkpoint_info;
0494 mqd->checkpoint_mqd = checkpoint_mqd;
0495 mqd->restore_mqd = restore_mqd;
0496 mqd->mqd_size = sizeof(struct v9_mqd);
0497 #if defined(CONFIG_DEBUG_FS)
0498 mqd->debugfs_show_mqd = debugfs_show_mqd;
0499 #endif
0500 break;
0501 case KFD_MQD_TYPE_HIQ:
0502 mqd->allocate_mqd = allocate_hiq_mqd;
0503 mqd->init_mqd = init_mqd_hiq;
0504 mqd->free_mqd = free_mqd_hiq_sdma;
0505 mqd->load_mqd = kfd_hiq_load_mqd_kiq;
0506 mqd->update_mqd = update_mqd;
0507 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0508 mqd->is_occupied = kfd_is_occupied_cp;
0509 mqd->mqd_size = sizeof(struct v9_mqd);
0510 #if defined(CONFIG_DEBUG_FS)
0511 mqd->debugfs_show_mqd = debugfs_show_mqd;
0512 #endif
0513 mqd->read_doorbell_id = read_doorbell_id;
0514 break;
0515 case KFD_MQD_TYPE_DIQ:
0516 mqd->allocate_mqd = allocate_mqd;
0517 mqd->init_mqd = init_mqd_hiq;
0518 mqd->free_mqd = kfd_free_mqd_cp;
0519 mqd->load_mqd = load_mqd;
0520 mqd->update_mqd = update_mqd;
0521 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0522 mqd->is_occupied = kfd_is_occupied_cp;
0523 mqd->mqd_size = sizeof(struct v9_mqd);
0524 #if defined(CONFIG_DEBUG_FS)
0525 mqd->debugfs_show_mqd = debugfs_show_mqd;
0526 #endif
0527 break;
0528 case KFD_MQD_TYPE_SDMA:
0529 mqd->allocate_mqd = allocate_sdma_mqd;
0530 mqd->init_mqd = init_mqd_sdma;
0531 mqd->free_mqd = free_mqd_hiq_sdma;
0532 mqd->load_mqd = kfd_load_mqd_sdma;
0533 mqd->update_mqd = update_mqd_sdma;
0534 mqd->destroy_mqd = kfd_destroy_mqd_sdma;
0535 mqd->is_occupied = kfd_is_occupied_sdma;
0536 mqd->checkpoint_mqd = checkpoint_mqd_sdma;
0537 mqd->restore_mqd = restore_mqd_sdma;
0538 mqd->mqd_size = sizeof(struct v9_sdma_mqd);
0539 #if defined(CONFIG_DEBUG_FS)
0540 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
0541 #endif
0542 break;
0543 default:
0544 kfree(mqd);
0545 return NULL;
0546 }
0547
0548 return mqd;
0549 }