0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/printk.h>
0026 #include <linux/slab.h>
0027 #include <linux/mm_types.h>
0028
0029 #include "kfd_priv.h"
0030 #include "kfd_mqd_manager.h"
0031 #include "vi_structs.h"
0032 #include "gca/gfx_8_0_sh_mask.h"
0033 #include "gca/gfx_8_0_enum.h"
0034 #include "oss/oss_3_0_sh_mask.h"
0035
0036 #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
0037
0038 static inline struct vi_mqd *get_mqd(void *mqd)
0039 {
0040 return (struct vi_mqd *)mqd;
0041 }
0042
0043 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
0044 {
0045 return (struct vi_sdma_mqd *)mqd;
0046 }
0047
0048 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
0049 struct mqd_update_info *minfo)
0050 {
0051 struct vi_mqd *m;
0052 uint32_t se_mask[4] = {0};
0053
0054 if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
0055 !minfo->cu_mask.ptr)
0056 return;
0057
0058 mqd_symmetrically_map_cu_mask(mm,
0059 minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
0060
0061 m = get_mqd(mqd);
0062 m->compute_static_thread_mgmt_se0 = se_mask[0];
0063 m->compute_static_thread_mgmt_se1 = se_mask[1];
0064 m->compute_static_thread_mgmt_se2 = se_mask[2];
0065 m->compute_static_thread_mgmt_se3 = se_mask[3];
0066
0067 pr_debug("Update cu mask to %#x %#x %#x %#x\n",
0068 m->compute_static_thread_mgmt_se0,
0069 m->compute_static_thread_mgmt_se1,
0070 m->compute_static_thread_mgmt_se2,
0071 m->compute_static_thread_mgmt_se3);
0072 }
0073
0074 static void set_priority(struct vi_mqd *m, struct queue_properties *q)
0075 {
0076 m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
0077 m->cp_hqd_queue_priority = q->priority;
0078 }
0079
0080 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
0081 struct queue_properties *q)
0082 {
0083 struct kfd_mem_obj *mqd_mem_obj;
0084
0085 if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
0086 &mqd_mem_obj))
0087 return NULL;
0088
0089 return mqd_mem_obj;
0090 }
0091
0092 static void init_mqd(struct mqd_manager *mm, void **mqd,
0093 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0094 struct queue_properties *q)
0095 {
0096 uint64_t addr;
0097 struct vi_mqd *m;
0098
0099 m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
0100 addr = mqd_mem_obj->gpu_addr;
0101
0102 memset(m, 0, sizeof(struct vi_mqd));
0103
0104 m->header = 0xC0310800;
0105 m->compute_pipelinestat_enable = 1;
0106 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
0107 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
0108 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
0109 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
0110
0111 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0112 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
0113
0114 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
0115 MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
0116
0117 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
0118 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
0119
0120 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
0121 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
0122 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
0123
0124 set_priority(m, q);
0125 m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
0126
0127 if (q->format == KFD_QUEUE_FORMAT_AQL)
0128 m->cp_hqd_iq_rptr = 1;
0129
0130 if (q->tba_addr) {
0131 m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
0132 m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
0133 m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
0134 m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
0135 m->compute_pgm_rsrc2 |=
0136 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
0137 }
0138
0139 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
0140 m->cp_hqd_persistent_state |=
0141 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
0142 m->cp_hqd_ctx_save_base_addr_lo =
0143 lower_32_bits(q->ctx_save_restore_area_address);
0144 m->cp_hqd_ctx_save_base_addr_hi =
0145 upper_32_bits(q->ctx_save_restore_area_address);
0146 m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
0147 m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
0148 m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
0149 m->cp_hqd_wg_state_offset = q->ctl_stack_size;
0150 }
0151
0152 *mqd = m;
0153 if (gart_addr)
0154 *gart_addr = addr;
0155 mm->update_mqd(mm, m, q, NULL);
0156 }
0157
0158 static int load_mqd(struct mqd_manager *mm, void *mqd,
0159 uint32_t pipe_id, uint32_t queue_id,
0160 struct queue_properties *p, struct mm_struct *mms)
0161 {
0162
0163 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
0164 uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
0165
0166 return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
0167 (uint32_t __user *)p->write_ptr,
0168 wptr_shift, wptr_mask, mms);
0169 }
0170
0171 static void __update_mqd(struct mqd_manager *mm, void *mqd,
0172 struct queue_properties *q, struct mqd_update_info *minfo,
0173 unsigned int mtype, unsigned int atc_bit)
0174 {
0175 struct vi_mqd *m;
0176
0177 m = get_mqd(mqd);
0178
0179 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
0180 atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
0181 mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
0182 m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
0183 pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
0184
0185 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
0186 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
0187
0188 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0189 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0190 m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
0191 m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
0192
0193 m->cp_hqd_pq_doorbell_control =
0194 q->doorbell_off <<
0195 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
0196 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
0197 m->cp_hqd_pq_doorbell_control);
0198
0199 m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
0200 mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
0201
0202 m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
0203 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
0204 mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
0205
0206
0207
0208
0209
0210
0211
0212
0213 m->cp_hqd_eop_control |= min(0xA,
0214 order_base_2(q->eop_ring_buffer_size / 4) - 1);
0215 m->cp_hqd_eop_base_addr_lo =
0216 lower_32_bits(q->eop_ring_buffer_address >> 8);
0217 m->cp_hqd_eop_base_addr_hi =
0218 upper_32_bits(q->eop_ring_buffer_address >> 8);
0219
0220 m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
0221 mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
0222
0223 m->cp_hqd_vmid = q->vmid;
0224
0225 if (q->format == KFD_QUEUE_FORMAT_AQL) {
0226 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
0227 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
0228 }
0229
0230 if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
0231 m->cp_hqd_ctx_save_control =
0232 atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
0233 mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
0234
0235 update_cu_mask(mm, mqd, minfo);
0236 set_priority(m, q);
0237
0238 q->is_active = QUEUE_IS_ACTIVE(*q);
0239 }
0240
0241
0242 static void update_mqd(struct mqd_manager *mm, void *mqd,
0243 struct queue_properties *q,
0244 struct mqd_update_info *minfo)
0245 {
0246 __update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
0247 }
0248
0249 static uint32_t read_doorbell_id(void *mqd)
0250 {
0251 struct vi_mqd *m = (struct vi_mqd *)mqd;
0252
0253 return m->queue_doorbell_id0;
0254 }
0255
0256 static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
0257 struct queue_properties *q,
0258 struct mqd_update_info *minfo)
0259 {
0260 __update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
0261 }
0262
0263 static int get_wave_state(struct mqd_manager *mm, void *mqd,
0264 void __user *ctl_stack,
0265 u32 *ctl_stack_used_size,
0266 u32 *save_area_used_size)
0267 {
0268 struct vi_mqd *m;
0269
0270 m = get_mqd(mqd);
0271
0272 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
0273 m->cp_hqd_cntl_stack_offset;
0274 *save_area_used_size = m->cp_hqd_wg_state_offset -
0275 m->cp_hqd_cntl_stack_size;
0276
0277
0278
0279
0280
0281
0282 return 0;
0283 }
0284
0285 static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
0286 {
0287
0288 *ctl_stack_size = 0;
0289 }
0290
0291 static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
0292 {
0293 struct vi_mqd *m;
0294
0295 m = get_mqd(mqd);
0296
0297 memcpy(mqd_dst, m, sizeof(struct vi_mqd));
0298 }
0299
0300 static void restore_mqd(struct mqd_manager *mm, void **mqd,
0301 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0302 struct queue_properties *qp,
0303 const void *mqd_src,
0304 const void *ctl_stack_src, const u32 ctl_stack_size)
0305 {
0306 uint64_t addr;
0307 struct vi_mqd *m;
0308
0309 m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
0310 addr = mqd_mem_obj->gpu_addr;
0311
0312 memcpy(m, mqd_src, sizeof(*m));
0313
0314 *mqd = m;
0315 if (gart_addr)
0316 *gart_addr = addr;
0317
0318 m->cp_hqd_pq_doorbell_control =
0319 qp->doorbell_off <<
0320 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
0321 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
0322 m->cp_hqd_pq_doorbell_control);
0323
0324 qp->is_active = 0;
0325 }
0326
0327 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
0328 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0329 struct queue_properties *q)
0330 {
0331 struct vi_mqd *m;
0332
0333 init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
0334
0335 m = get_mqd(*mqd);
0336
0337 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
0338 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
0339 }
0340
0341 static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
0342 struct queue_properties *q,
0343 struct mqd_update_info *minfo)
0344 {
0345 __update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
0346 }
0347
0348 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
0349 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0350 struct queue_properties *q)
0351 {
0352 struct vi_sdma_mqd *m;
0353
0354 m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
0355
0356 memset(m, 0, sizeof(struct vi_sdma_mqd));
0357
0358 *mqd = m;
0359 if (gart_addr)
0360 *gart_addr = mqd_mem_obj->gpu_addr;
0361
0362 mm->update_mqd(mm, m, q, NULL);
0363 }
0364
0365 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
0366 struct queue_properties *q,
0367 struct mqd_update_info *minfo)
0368 {
0369 struct vi_sdma_mqd *m;
0370
0371 m = get_sdma_mqd(mqd);
0372 m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
0373 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
0374 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
0375 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
0376 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
0377
0378 m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
0379 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
0380 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0381 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0382 m->sdmax_rlcx_doorbell =
0383 q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
0384
0385 m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
0386
0387 m->sdma_engine_id = q->sdma_engine_id;
0388 m->sdma_queue_id = q->sdma_queue_id;
0389
0390 q->is_active = QUEUE_IS_ACTIVE(*q);
0391 }
0392
0393 static void checkpoint_mqd_sdma(struct mqd_manager *mm,
0394 void *mqd,
0395 void *mqd_dst,
0396 void *ctl_stack_dst)
0397 {
0398 struct vi_sdma_mqd *m;
0399
0400 m = get_sdma_mqd(mqd);
0401
0402 memcpy(mqd_dst, m, sizeof(struct vi_sdma_mqd));
0403 }
0404
0405 static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
0406 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0407 struct queue_properties *qp,
0408 const void *mqd_src,
0409 const void *ctl_stack_src, const u32 ctl_stack_size)
0410 {
0411 uint64_t addr;
0412 struct vi_sdma_mqd *m;
0413
0414 m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
0415 addr = mqd_mem_obj->gpu_addr;
0416
0417 memcpy(m, mqd_src, sizeof(*m));
0418
0419 m->sdmax_rlcx_doorbell =
0420 qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
0421
0422 *mqd = m;
0423 if (gart_addr)
0424 *gart_addr = addr;
0425
0426 qp->is_active = 0;
0427 }
0428
0429 #if defined(CONFIG_DEBUG_FS)
0430
0431
0432 static int debugfs_show_mqd(struct seq_file *m, void *data)
0433 {
0434 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0435 data, sizeof(struct vi_mqd), false);
0436 return 0;
0437 }
0438
0439 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
0440 {
0441 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0442 data, sizeof(struct vi_sdma_mqd), false);
0443 return 0;
0444 }
0445
0446 #endif
0447
0448 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
0449 struct kfd_dev *dev)
0450 {
0451 struct mqd_manager *mqd;
0452
0453 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
0454 return NULL;
0455
0456 mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
0457 if (!mqd)
0458 return NULL;
0459
0460 mqd->dev = dev;
0461
0462 switch (type) {
0463 case KFD_MQD_TYPE_CP:
0464 mqd->allocate_mqd = allocate_mqd;
0465 mqd->init_mqd = init_mqd;
0466 mqd->free_mqd = kfd_free_mqd_cp;
0467 mqd->load_mqd = load_mqd;
0468 mqd->update_mqd = update_mqd;
0469 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0470 mqd->is_occupied = kfd_is_occupied_cp;
0471 mqd->get_wave_state = get_wave_state;
0472 mqd->get_checkpoint_info = get_checkpoint_info;
0473 mqd->checkpoint_mqd = checkpoint_mqd;
0474 mqd->restore_mqd = restore_mqd;
0475 mqd->mqd_size = sizeof(struct vi_mqd);
0476 #if defined(CONFIG_DEBUG_FS)
0477 mqd->debugfs_show_mqd = debugfs_show_mqd;
0478 #endif
0479 break;
0480 case KFD_MQD_TYPE_HIQ:
0481 mqd->allocate_mqd = allocate_hiq_mqd;
0482 mqd->init_mqd = init_mqd_hiq;
0483 mqd->free_mqd = free_mqd_hiq_sdma;
0484 mqd->load_mqd = load_mqd;
0485 mqd->update_mqd = update_mqd_hiq;
0486 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0487 mqd->is_occupied = kfd_is_occupied_cp;
0488 mqd->mqd_size = sizeof(struct vi_mqd);
0489 #if defined(CONFIG_DEBUG_FS)
0490 mqd->debugfs_show_mqd = debugfs_show_mqd;
0491 #endif
0492 mqd->read_doorbell_id = read_doorbell_id;
0493 break;
0494 case KFD_MQD_TYPE_DIQ:
0495 mqd->allocate_mqd = allocate_mqd;
0496 mqd->init_mqd = init_mqd_hiq;
0497 mqd->free_mqd = kfd_free_mqd_cp;
0498 mqd->load_mqd = load_mqd;
0499 mqd->update_mqd = update_mqd_hiq;
0500 mqd->destroy_mqd = kfd_destroy_mqd_cp;
0501 mqd->is_occupied = kfd_is_occupied_cp;
0502 mqd->mqd_size = sizeof(struct vi_mqd);
0503 #if defined(CONFIG_DEBUG_FS)
0504 mqd->debugfs_show_mqd = debugfs_show_mqd;
0505 #endif
0506 break;
0507 case KFD_MQD_TYPE_SDMA:
0508 mqd->allocate_mqd = allocate_sdma_mqd;
0509 mqd->init_mqd = init_mqd_sdma;
0510 mqd->free_mqd = free_mqd_hiq_sdma;
0511 mqd->load_mqd = kfd_load_mqd_sdma;
0512 mqd->update_mqd = update_mqd_sdma;
0513 mqd->destroy_mqd = kfd_destroy_mqd_sdma;
0514 mqd->is_occupied = kfd_is_occupied_sdma;
0515 mqd->checkpoint_mqd = checkpoint_mqd_sdma;
0516 mqd->restore_mqd = restore_mqd_sdma;
0517 mqd->mqd_size = sizeof(struct vi_sdma_mqd);
0518 #if defined(CONFIG_DEBUG_FS)
0519 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
0520 #endif
0521 break;
0522 default:
0523 kfree(mqd);
0524 return NULL;
0525 }
0526
0527 return mqd;
0528 }
0529
0530 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
0531 struct kfd_dev *dev)
0532 {
0533 struct mqd_manager *mqd;
0534
0535 mqd = mqd_manager_init_vi(type, dev);
0536 if (!mqd)
0537 return NULL;
0538 if (type == KFD_MQD_TYPE_CP)
0539 mqd->update_mqd = update_mqd_tonga;
0540 return mqd;
0541 }