0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/printk.h>
0025 #include <linux/slab.h>
0026 #include <linux/uaccess.h>
0027 #include "kfd_priv.h"
0028 #include "kfd_mqd_manager.h"
0029 #include "v11_structs.h"
0030 #include "gc/gc_11_0_0_offset.h"
0031 #include "gc/gc_11_0_0_sh_mask.h"
0032 #include "amdgpu_amdkfd.h"
0033
0034 static inline struct v11_compute_mqd *get_mqd(void *mqd)
0035 {
0036 return (struct v11_compute_mqd *)mqd;
0037 }
0038
0039 static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
0040 {
0041 return (struct v11_sdma_mqd *)mqd;
0042 }
0043
0044 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
0045 struct mqd_update_info *minfo)
0046 {
0047 struct v11_compute_mqd *m;
0048 uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
0049
0050 if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
0051 !minfo->cu_mask.ptr)
0052 return;
0053
0054 mqd_symmetrically_map_cu_mask(mm,
0055 minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
0056
0057 m = get_mqd(mqd);
0058 m->compute_static_thread_mgmt_se0 = se_mask[0];
0059 m->compute_static_thread_mgmt_se1 = se_mask[1];
0060 m->compute_static_thread_mgmt_se2 = se_mask[2];
0061 m->compute_static_thread_mgmt_se3 = se_mask[3];
0062 m->compute_static_thread_mgmt_se4 = se_mask[4];
0063 m->compute_static_thread_mgmt_se5 = se_mask[5];
0064 m->compute_static_thread_mgmt_se6 = se_mask[6];
0065 m->compute_static_thread_mgmt_se7 = se_mask[7];
0066
0067 pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
0068 m->compute_static_thread_mgmt_se0,
0069 m->compute_static_thread_mgmt_se1,
0070 m->compute_static_thread_mgmt_se2,
0071 m->compute_static_thread_mgmt_se3,
0072 m->compute_static_thread_mgmt_se4,
0073 m->compute_static_thread_mgmt_se5,
0074 m->compute_static_thread_mgmt_se6,
0075 m->compute_static_thread_mgmt_se7);
0076 }
0077
0078 static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q)
0079 {
0080 m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
0081 m->cp_hqd_queue_priority = q->priority;
0082 }
0083
0084 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
0085 struct queue_properties *q)
0086 {
0087 struct kfd_mem_obj *mqd_mem_obj;
0088 int size;
0089
0090
0091
0092
0093
0094 if (kfd->shared_resources.enable_mes)
0095 size = PAGE_SIZE;
0096 else
0097 size = sizeof(struct v11_compute_mqd);
0098
0099 if (kfd_gtt_sa_allocate(kfd, size, &mqd_mem_obj))
0100 return NULL;
0101
0102 return mqd_mem_obj;
0103 }
0104
0105 static void init_mqd(struct mqd_manager *mm, void **mqd,
0106 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0107 struct queue_properties *q)
0108 {
0109 uint64_t addr;
0110 struct v11_compute_mqd *m;
0111 int size;
0112
0113 m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
0114 addr = mqd_mem_obj->gpu_addr;
0115
0116 if (mm->dev->shared_resources.enable_mes)
0117 size = PAGE_SIZE;
0118 else
0119 size = sizeof(struct v11_compute_mqd);
0120
0121 memset(m, 0, size);
0122
0123 m->header = 0xC0310800;
0124 m->compute_pipelinestat_enable = 1;
0125 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
0126 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
0127 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
0128 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
0129 m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
0130 m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
0131 m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
0132 m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
0133
0134 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0135 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
0136
0137 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
0138
0139 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
0140 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
0141
0142 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
0143 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
0144 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
0145
0146 if (q->format == KFD_QUEUE_FORMAT_AQL) {
0147 m->cp_hqd_aql_control =
0148 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
0149 }
0150
0151 if (mm->dev->cwsr_enabled) {
0152 m->cp_hqd_persistent_state |=
0153 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
0154 m->cp_hqd_ctx_save_base_addr_lo =
0155 lower_32_bits(q->ctx_save_restore_area_address);
0156 m->cp_hqd_ctx_save_base_addr_hi =
0157 upper_32_bits(q->ctx_save_restore_area_address);
0158 m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
0159 m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
0160 m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
0161 m->cp_hqd_wg_state_offset = q->ctl_stack_size;
0162 }
0163
0164 *mqd = m;
0165 if (gart_addr)
0166 *gart_addr = addr;
0167 mm->update_mqd(mm, m, q, NULL);
0168 }
0169
0170 static int load_mqd(struct mqd_manager *mm, void *mqd,
0171 uint32_t pipe_id, uint32_t queue_id,
0172 struct queue_properties *p, struct mm_struct *mms)
0173 {
0174 int r = 0;
0175
0176 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
0177
0178 r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
0179 (uint32_t __user *)p->write_ptr,
0180 wptr_shift, 0, mms);
0181 return r;
0182 }
0183
0184 static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
0185 uint32_t pipe_id, uint32_t queue_id,
0186 struct queue_properties *p, struct mm_struct *mms)
0187 {
0188 return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
0189 queue_id, p->doorbell_off);
0190 }
0191
0192 static void update_mqd(struct mqd_manager *mm, void *mqd,
0193 struct queue_properties *q,
0194 struct mqd_update_info *minfo)
0195 {
0196 struct v11_compute_mqd *m;
0197
0198 m = get_mqd(mqd);
0199
0200 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
0201 m->cp_hqd_pq_control |=
0202 ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
0203 pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
0204
0205 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
0206 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
0207
0208 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0209 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0210 m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
0211 m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
0212
0213 m->cp_hqd_pq_doorbell_control =
0214 q->doorbell_off <<
0215 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
0216 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
0217 m->cp_hqd_pq_doorbell_control);
0218
0219 m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
0220
0221
0222
0223
0224
0225
0226
0227
0228 m->cp_hqd_eop_control = min(0xA,
0229 ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
0230 m->cp_hqd_eop_base_addr_lo =
0231 lower_32_bits(q->eop_ring_buffer_address >> 8);
0232 m->cp_hqd_eop_base_addr_hi =
0233 upper_32_bits(q->eop_ring_buffer_address >> 8);
0234
0235 m->cp_hqd_iq_timer = 0;
0236
0237 m->cp_hqd_vmid = q->vmid;
0238
0239 if (q->format == KFD_QUEUE_FORMAT_AQL) {
0240
0241 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
0242 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
0243 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
0244 m->cp_hqd_pq_doorbell_control |=
0245 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
0246 }
0247 if (mm->dev->cwsr_enabled)
0248 m->cp_hqd_ctx_save_control = 0;
0249
0250 update_cu_mask(mm, mqd, minfo);
0251 set_priority(m, q);
0252
0253 q->is_active = QUEUE_IS_ACTIVE(*q);
0254 }
0255
0256 static uint32_t read_doorbell_id(void *mqd)
0257 {
0258 struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
0259
0260 return m->queue_doorbell_id0;
0261 }
0262
0263 static int destroy_mqd(struct mqd_manager *mm, void *mqd,
0264 enum kfd_preempt_type type,
0265 unsigned int timeout, uint32_t pipe_id,
0266 uint32_t queue_id)
0267 {
0268 return mm->dev->kfd2kgd->hqd_destroy
0269 (mm->dev->adev, mqd, type, timeout,
0270 pipe_id, queue_id);
0271 }
0272
0273 static void free_mqd(struct mqd_manager *mm, void *mqd,
0274 struct kfd_mem_obj *mqd_mem_obj)
0275 {
0276 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
0277 }
0278
0279 static bool is_occupied(struct mqd_manager *mm, void *mqd,
0280 uint64_t queue_address, uint32_t pipe_id,
0281 uint32_t queue_id)
0282 {
0283 return mm->dev->kfd2kgd->hqd_is_occupied(
0284 mm->dev->adev, queue_address,
0285 pipe_id, queue_id);
0286 }
0287
0288 static int get_wave_state(struct mqd_manager *mm, void *mqd,
0289 void __user *ctl_stack,
0290 u32 *ctl_stack_used_size,
0291 u32 *save_area_used_size)
0292 {
0293 struct v11_compute_mqd *m;
0294
0295
0296 m = get_mqd(mqd);
0297
0298
0299
0300
0301
0302
0303 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
0304 m->cp_hqd_cntl_stack_offset;
0305 *save_area_used_size = m->cp_hqd_wg_state_offset -
0306 m->cp_hqd_cntl_stack_size;
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322 return 0;
0323 }
0324
0325 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
0326 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0327 struct queue_properties *q)
0328 {
0329 struct v11_compute_mqd *m;
0330
0331 init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
0332
0333 m = get_mqd(*mqd);
0334
0335 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
0336 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
0337 }
0338
0339 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
0340 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
0341 struct queue_properties *q)
0342 {
0343 struct v11_sdma_mqd *m;
0344
0345 m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
0346
0347 memset(m, 0, sizeof(struct v11_sdma_mqd));
0348
0349 *mqd = m;
0350 if (gart_addr)
0351 *gart_addr = mqd_mem_obj->gpu_addr;
0352
0353 mm->update_mqd(mm, m, q, NULL);
0354 }
0355
0356 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
0357 uint32_t pipe_id, uint32_t queue_id,
0358 struct queue_properties *p, struct mm_struct *mms)
0359 {
0360 return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
0361 (uint32_t __user *)p->write_ptr,
0362 mms);
0363 }
0364
0365 #define SDMA_RLC_DUMMY_DEFAULT 0xf
0366
0367 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
0368 struct queue_properties *q,
0369 struct mqd_update_info *minfo)
0370 {
0371 struct v11_sdma_mqd *m;
0372
0373 m = get_sdma_mqd(mqd);
0374 m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
0375 << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
0376 q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
0377 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
0378 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
0379
0380 m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
0381 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
0382 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
0383 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
0384 m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
0385 m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
0386 m->sdmax_rlcx_doorbell_offset =
0387 q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
0388
0389 m->sdma_engine_id = q->sdma_engine_id;
0390 m->sdma_queue_id = q->sdma_queue_id;
0391 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
0392
0393 q->is_active = QUEUE_IS_ACTIVE(*q);
0394 }
0395
0396
0397
0398
0399
0400 static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
0401 enum kfd_preempt_type type,
0402 unsigned int timeout, uint32_t pipe_id,
0403 uint32_t queue_id)
0404 {
0405 return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
0406 }
0407
0408 static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
0409 uint64_t queue_address, uint32_t pipe_id,
0410 uint32_t queue_id)
0411 {
0412 return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
0413 }
0414
0415 #if defined(CONFIG_DEBUG_FS)
0416
0417 static int debugfs_show_mqd(struct seq_file *m, void *data)
0418 {
0419 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0420 data, sizeof(struct v11_compute_mqd), false);
0421 return 0;
0422 }
0423
0424 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
0425 {
0426 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0427 data, sizeof(struct v11_sdma_mqd), false);
0428 return 0;
0429 }
0430
0431 #endif
0432
0433 struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
0434 struct kfd_dev *dev)
0435 {
0436 struct mqd_manager *mqd;
0437
0438 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
0439 return NULL;
0440
0441 mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
0442 if (!mqd)
0443 return NULL;
0444
0445 mqd->dev = dev;
0446
0447 switch (type) {
0448 case KFD_MQD_TYPE_CP:
0449 pr_debug("%s@%i\n", __func__, __LINE__);
0450 mqd->allocate_mqd = allocate_mqd;
0451 mqd->init_mqd = init_mqd;
0452 mqd->free_mqd = free_mqd;
0453 mqd->load_mqd = load_mqd;
0454 mqd->update_mqd = update_mqd;
0455 mqd->destroy_mqd = destroy_mqd;
0456 mqd->is_occupied = is_occupied;
0457 mqd->mqd_size = sizeof(struct v11_compute_mqd);
0458 mqd->get_wave_state = get_wave_state;
0459 #if defined(CONFIG_DEBUG_FS)
0460 mqd->debugfs_show_mqd = debugfs_show_mqd;
0461 #endif
0462 pr_debug("%s@%i\n", __func__, __LINE__);
0463 break;
0464 case KFD_MQD_TYPE_HIQ:
0465 pr_debug("%s@%i\n", __func__, __LINE__);
0466 mqd->allocate_mqd = allocate_hiq_mqd;
0467 mqd->init_mqd = init_mqd_hiq;
0468 mqd->free_mqd = free_mqd_hiq_sdma;
0469 mqd->load_mqd = hiq_load_mqd_kiq;
0470 mqd->update_mqd = update_mqd;
0471 mqd->destroy_mqd = destroy_mqd;
0472 mqd->is_occupied = is_occupied;
0473 mqd->mqd_size = sizeof(struct v11_compute_mqd);
0474 #if defined(CONFIG_DEBUG_FS)
0475 mqd->debugfs_show_mqd = debugfs_show_mqd;
0476 #endif
0477 mqd->read_doorbell_id = read_doorbell_id;
0478 pr_debug("%s@%i\n", __func__, __LINE__);
0479 break;
0480 case KFD_MQD_TYPE_DIQ:
0481 mqd->allocate_mqd = allocate_mqd;
0482 mqd->init_mqd = init_mqd_hiq;
0483 mqd->free_mqd = free_mqd;
0484 mqd->load_mqd = load_mqd;
0485 mqd->update_mqd = update_mqd;
0486 mqd->destroy_mqd = destroy_mqd;
0487 mqd->is_occupied = is_occupied;
0488 mqd->mqd_size = sizeof(struct v11_compute_mqd);
0489 #if defined(CONFIG_DEBUG_FS)
0490 mqd->debugfs_show_mqd = debugfs_show_mqd;
0491 #endif
0492 break;
0493 case KFD_MQD_TYPE_SDMA:
0494 pr_debug("%s@%i\n", __func__, __LINE__);
0495 mqd->allocate_mqd = allocate_sdma_mqd;
0496 mqd->init_mqd = init_mqd_sdma;
0497 mqd->free_mqd = free_mqd_hiq_sdma;
0498 mqd->load_mqd = load_mqd_sdma;
0499 mqd->update_mqd = update_mqd_sdma;
0500 mqd->destroy_mqd = destroy_mqd_sdma;
0501 mqd->is_occupied = is_occupied_sdma;
0502 mqd->mqd_size = sizeof(struct v11_sdma_mqd);
0503 #if defined(CONFIG_DEBUG_FS)
0504 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
0505 #endif
0506 pr_debug("%s@%i\n", __func__, __LINE__);
0507 break;
0508 default:
0509 kfree(mqd);
0510 return NULL;
0511 }
0512
0513 return mqd;
0514 }