0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/ratelimit.h>
0026 #include <linux/printk.h>
0027 #include <linux/slab.h>
0028 #include <linux/list.h>
0029 #include <linux/types.h>
0030 #include <linux/bitops.h>
0031 #include <linux/sched.h>
0032 #include "kfd_priv.h"
0033 #include "kfd_device_queue_manager.h"
0034 #include "kfd_mqd_manager.h"
0035 #include "cik_regs.h"
0036 #include "kfd_kernel_queue.h"
0037 #include "amdgpu_amdkfd.h"
0038 #include "mes_api_def.h"
0039
0040
0041 #define CIK_HPD_EOP_BYTES_LOG2 11
0042 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
0043
0044 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
0045 u32 pasid, unsigned int vmid);
0046
0047 static int execute_queues_cpsch(struct device_queue_manager *dqm,
0048 enum kfd_unmap_queues_filter filter,
0049 uint32_t filter_param);
0050 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
0051 enum kfd_unmap_queues_filter filter,
0052 uint32_t filter_param, bool reset);
0053
0054 static int map_queues_cpsch(struct device_queue_manager *dqm);
0055
0056 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
0057 struct queue *q);
0058
0059 static inline void deallocate_hqd(struct device_queue_manager *dqm,
0060 struct queue *q);
0061 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
0062 static int allocate_sdma_queue(struct device_queue_manager *dqm,
0063 struct queue *q, const uint32_t *restore_sdma_id);
0064 static void kfd_process_hw_exception(struct work_struct *work);
0065
0066 static inline
0067 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
0068 {
0069 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
0070 return KFD_MQD_TYPE_SDMA;
0071 return KFD_MQD_TYPE_CP;
0072 }
0073
0074 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
0075 {
0076 int i;
0077 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
0078 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
0079
0080
0081 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
0082 if (test_bit(pipe_offset + i,
0083 dqm->dev->shared_resources.cp_queue_bitmap))
0084 return true;
0085 return false;
0086 }
0087
0088 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
0089 {
0090 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
0091 KGD_MAX_QUEUES);
0092 }
0093
0094 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
0095 {
0096 return dqm->dev->shared_resources.num_queue_per_pipe;
0097 }
0098
0099 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
0100 {
0101 return dqm->dev->shared_resources.num_pipe_per_mec;
0102 }
0103
0104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
0105 {
0106 return kfd_get_num_sdma_engines(dqm->dev) +
0107 kfd_get_num_xgmi_sdma_engines(dqm->dev);
0108 }
0109
0110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
0111 {
0112 return kfd_get_num_sdma_engines(dqm->dev) *
0113 dqm->dev->device_info.num_sdma_queues_per_engine;
0114 }
0115
0116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
0117 {
0118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
0119 dqm->dev->device_info.num_sdma_queues_per_engine;
0120 }
0121
0122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
0123 {
0124 return dqm->dev->device_info.reserved_sdma_queues_bitmap;
0125 }
0126
0127 void program_sh_mem_settings(struct device_queue_manager *dqm,
0128 struct qcm_process_device *qpd)
0129 {
0130 return dqm->dev->kfd2kgd->program_sh_mem_settings(
0131 dqm->dev->adev, qpd->vmid,
0132 qpd->sh_mem_config,
0133 qpd->sh_mem_ape1_base,
0134 qpd->sh_mem_ape1_limit,
0135 qpd->sh_mem_bases);
0136 }
0137
0138 static void kfd_hws_hang(struct device_queue_manager *dqm)
0139 {
0140
0141
0142
0143 dqm->is_hws_hang = true;
0144
0145
0146
0147
0148
0149 if (!dqm->is_resetting)
0150 schedule_work(&dqm->hw_exception_work);
0151 }
0152
0153 static int convert_to_mes_queue_type(int queue_type)
0154 {
0155 int mes_queue_type;
0156
0157 switch (queue_type) {
0158 case KFD_QUEUE_TYPE_COMPUTE:
0159 mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
0160 break;
0161 case KFD_QUEUE_TYPE_SDMA:
0162 mes_queue_type = MES_QUEUE_TYPE_SDMA;
0163 break;
0164 default:
0165 WARN(1, "Invalid queue type %d", queue_type);
0166 mes_queue_type = -EINVAL;
0167 break;
0168 }
0169
0170 return mes_queue_type;
0171 }
0172
0173 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
0174 struct qcm_process_device *qpd)
0175 {
0176 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
0177 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
0178 struct mes_add_queue_input queue_input;
0179 int r, queue_type;
0180 uint64_t wptr_addr_off;
0181
0182 if (dqm->is_hws_hang)
0183 return -EIO;
0184
0185 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
0186 queue_input.process_id = qpd->pqm->process->pasid;
0187 queue_input.page_table_base_addr = qpd->page_table_base;
0188 queue_input.process_va_start = 0;
0189 queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
0190
0191 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;
0192 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
0193 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM;
0194 queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
0195 queue_input.inprocess_gang_priority = q->properties.priority;
0196 queue_input.gang_global_priority_level =
0197 AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
0198 queue_input.doorbell_offset = q->properties.doorbell_off;
0199 queue_input.mqd_addr = q->gart_mqd_addr;
0200 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
0201
0202 if (q->wptr_bo) {
0203 wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
0204 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
0205 }
0206
0207 queue_input.is_kfd_process = 1;
0208 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
0209 queue_input.queue_size = q->properties.queue_size >> 2;
0210
0211 queue_input.paging = false;
0212 queue_input.tba_addr = qpd->tba_addr;
0213 queue_input.tma_addr = qpd->tma_addr;
0214
0215 queue_type = convert_to_mes_queue_type(q->properties.type);
0216 if (queue_type < 0) {
0217 pr_err("Queue type not supported with MES, queue:%d\n",
0218 q->properties.type);
0219 return -EINVAL;
0220 }
0221 queue_input.queue_type = (uint32_t)queue_type;
0222
0223 if (q->gws) {
0224 queue_input.gws_base = 0;
0225 queue_input.gws_size = qpd->num_gws;
0226 }
0227
0228 amdgpu_mes_lock(&adev->mes);
0229 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
0230 amdgpu_mes_unlock(&adev->mes);
0231 if (r) {
0232 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
0233 q->properties.doorbell_off);
0234 pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
0235 kfd_hws_hang(dqm);
0236 }
0237
0238 return r;
0239 }
0240
0241 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
0242 struct qcm_process_device *qpd)
0243 {
0244 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
0245 int r;
0246 struct mes_remove_queue_input queue_input;
0247
0248 if (dqm->is_hws_hang)
0249 return -EIO;
0250
0251 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
0252 queue_input.doorbell_offset = q->properties.doorbell_off;
0253 queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
0254
0255 amdgpu_mes_lock(&adev->mes);
0256 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
0257 amdgpu_mes_unlock(&adev->mes);
0258
0259 if (r) {
0260 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
0261 q->properties.doorbell_off);
0262 pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
0263 kfd_hws_hang(dqm);
0264 }
0265
0266 return r;
0267 }
0268
0269 static int remove_all_queues_mes(struct device_queue_manager *dqm)
0270 {
0271 struct device_process_node *cur;
0272 struct qcm_process_device *qpd;
0273 struct queue *q;
0274 int retval = 0;
0275
0276 list_for_each_entry(cur, &dqm->queues, list) {
0277 qpd = cur->qpd;
0278 list_for_each_entry(q, &qpd->queues_list, list) {
0279 if (q->properties.is_active) {
0280 retval = remove_queue_mes(dqm, q, qpd);
0281 if (retval) {
0282 pr_err("%s: Failed to remove queue %d for dev %d",
0283 __func__,
0284 q->properties.queue_id,
0285 dqm->dev->id);
0286 return retval;
0287 }
0288 }
0289 }
0290 }
0291
0292 return retval;
0293 }
0294
0295 static void increment_queue_count(struct device_queue_manager *dqm,
0296 struct qcm_process_device *qpd,
0297 struct queue *q)
0298 {
0299 dqm->active_queue_count++;
0300 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
0301 q->properties.type == KFD_QUEUE_TYPE_DIQ)
0302 dqm->active_cp_queue_count++;
0303
0304 if (q->properties.is_gws) {
0305 dqm->gws_queue_count++;
0306 qpd->mapped_gws_queue = true;
0307 }
0308 }
0309
0310 static void decrement_queue_count(struct device_queue_manager *dqm,
0311 struct qcm_process_device *qpd,
0312 struct queue *q)
0313 {
0314 dqm->active_queue_count--;
0315 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
0316 q->properties.type == KFD_QUEUE_TYPE_DIQ)
0317 dqm->active_cp_queue_count--;
0318
0319 if (q->properties.is_gws) {
0320 dqm->gws_queue_count--;
0321 qpd->mapped_gws_queue = false;
0322 }
0323 }
0324
0325
0326
0327
0328
0329 static int allocate_doorbell(struct qcm_process_device *qpd,
0330 struct queue *q,
0331 uint32_t const *restore_id)
0332 {
0333 struct kfd_dev *dev = qpd->dqm->dev;
0334
0335 if (!KFD_IS_SOC15(dev)) {
0336
0337
0338
0339
0340 if (restore_id && *restore_id != q->properties.queue_id)
0341 return -EINVAL;
0342
0343 q->doorbell_id = q->properties.queue_id;
0344 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0345 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
0346
0347
0348
0349
0350
0351
0352 uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
0353 uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
0354 + (q->properties.sdma_queue_id & 1)
0355 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
0356 + (q->properties.sdma_queue_id >> 1);
0357
0358 if (restore_id && *restore_id != valid_id)
0359 return -EINVAL;
0360 q->doorbell_id = valid_id;
0361 } else {
0362
0363 if (restore_id) {
0364
0365 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
0366 return -EINVAL;
0367
0368 q->doorbell_id = *restore_id;
0369 } else {
0370
0371 unsigned int found;
0372
0373 found = find_first_zero_bit(qpd->doorbell_bitmap,
0374 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
0375 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
0376 pr_debug("No doorbells available");
0377 return -EBUSY;
0378 }
0379 set_bit(found, qpd->doorbell_bitmap);
0380 q->doorbell_id = found;
0381 }
0382 }
0383
0384 q->properties.doorbell_off =
0385 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
0386 q->doorbell_id);
0387 return 0;
0388 }
0389
0390 static void deallocate_doorbell(struct qcm_process_device *qpd,
0391 struct queue *q)
0392 {
0393 unsigned int old;
0394 struct kfd_dev *dev = qpd->dqm->dev;
0395
0396 if (!KFD_IS_SOC15(dev) ||
0397 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0398 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
0399 return;
0400
0401 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
0402 WARN_ON(!old);
0403 }
0404
0405 static void program_trap_handler_settings(struct device_queue_manager *dqm,
0406 struct qcm_process_device *qpd)
0407 {
0408 if (dqm->dev->kfd2kgd->program_trap_handler_settings)
0409 dqm->dev->kfd2kgd->program_trap_handler_settings(
0410 dqm->dev->adev, qpd->vmid,
0411 qpd->tba_addr, qpd->tma_addr);
0412 }
0413
0414 static int allocate_vmid(struct device_queue_manager *dqm,
0415 struct qcm_process_device *qpd,
0416 struct queue *q)
0417 {
0418 int allocated_vmid = -1, i;
0419
0420 for (i = dqm->dev->vm_info.first_vmid_kfd;
0421 i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
0422 if (!dqm->vmid_pasid[i]) {
0423 allocated_vmid = i;
0424 break;
0425 }
0426 }
0427
0428 if (allocated_vmid < 0) {
0429 pr_err("no more vmid to allocate\n");
0430 return -ENOSPC;
0431 }
0432
0433 pr_debug("vmid allocated: %d\n", allocated_vmid);
0434
0435 dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
0436
0437 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
0438
0439 qpd->vmid = allocated_vmid;
0440 q->properties.vmid = allocated_vmid;
0441
0442 program_sh_mem_settings(dqm, qpd);
0443
0444 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
0445 program_trap_handler_settings(dqm, qpd);
0446
0447
0448
0449
0450 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
0451 qpd->vmid,
0452 qpd->page_table_base);
0453
0454 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
0455
0456 if (dqm->dev->kfd2kgd->set_scratch_backing_va)
0457 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
0458 qpd->sh_hidden_private_base, qpd->vmid);
0459
0460 return 0;
0461 }
0462
0463 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
0464 struct qcm_process_device *qpd)
0465 {
0466 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
0467 int ret;
0468
0469 if (!qpd->ib_kaddr)
0470 return -ENOMEM;
0471
0472 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
0473 if (ret)
0474 return ret;
0475
0476 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
0477 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
0478 pmf->release_mem_size / sizeof(uint32_t));
0479 }
0480
0481 static void deallocate_vmid(struct device_queue_manager *dqm,
0482 struct qcm_process_device *qpd,
0483 struct queue *q)
0484 {
0485
0486 if (q->device->adev->asic_type == CHIP_HAWAII)
0487 if (flush_texture_cache_nocpsch(q->device, qpd))
0488 pr_err("Failed to flush TC\n");
0489
0490 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
0491
0492
0493 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
0494 dqm->vmid_pasid[qpd->vmid] = 0;
0495
0496 qpd->vmid = 0;
0497 q->properties.vmid = 0;
0498 }
0499
0500 static int create_queue_nocpsch(struct device_queue_manager *dqm,
0501 struct queue *q,
0502 struct qcm_process_device *qpd,
0503 const struct kfd_criu_queue_priv_data *qd,
0504 const void *restore_mqd, const void *restore_ctl_stack)
0505 {
0506 struct mqd_manager *mqd_mgr;
0507 int retval;
0508
0509 dqm_lock(dqm);
0510
0511 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
0512 pr_warn("Can't create new usermode queue because %d queues were already created\n",
0513 dqm->total_queue_count);
0514 retval = -EPERM;
0515 goto out_unlock;
0516 }
0517
0518 if (list_empty(&qpd->queues_list)) {
0519 retval = allocate_vmid(dqm, qpd, q);
0520 if (retval)
0521 goto out_unlock;
0522 }
0523 q->properties.vmid = qpd->vmid;
0524
0525
0526
0527
0528
0529 q->properties.is_evicted = !!qpd->evicted;
0530
0531 q->properties.tba_addr = qpd->tba_addr;
0532 q->properties.tma_addr = qpd->tma_addr;
0533
0534 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
0535 q->properties.type)];
0536 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
0537 retval = allocate_hqd(dqm, q);
0538 if (retval)
0539 goto deallocate_vmid;
0540 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
0541 q->pipe, q->queue);
0542 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0543 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
0544 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
0545 if (retval)
0546 goto deallocate_vmid;
0547 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
0548 }
0549
0550 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
0551 if (retval)
0552 goto out_deallocate_hqd;
0553
0554
0555 dqm_unlock(dqm);
0556 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
0557 dqm_lock(dqm);
0558
0559 if (!q->mqd_mem_obj) {
0560 retval = -ENOMEM;
0561 goto out_deallocate_doorbell;
0562 }
0563
0564 if (qd)
0565 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
0566 &q->properties, restore_mqd, restore_ctl_stack,
0567 qd->ctl_stack_size);
0568 else
0569 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
0570 &q->gart_mqd_addr, &q->properties);
0571
0572 if (q->properties.is_active) {
0573 if (!dqm->sched_running) {
0574 WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
0575 goto add_queue_to_list;
0576 }
0577
0578 if (WARN(q->process->mm != current->mm,
0579 "should only run in user thread"))
0580 retval = -EFAULT;
0581 else
0582 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
0583 q->queue, &q->properties, current->mm);
0584 if (retval)
0585 goto out_free_mqd;
0586 }
0587
0588 add_queue_to_list:
0589 list_add(&q->list, &qpd->queues_list);
0590 qpd->queue_count++;
0591 if (q->properties.is_active)
0592 increment_queue_count(dqm, qpd, q);
0593
0594
0595
0596
0597
0598 dqm->total_queue_count++;
0599 pr_debug("Total of %d queues are accountable so far\n",
0600 dqm->total_queue_count);
0601 goto out_unlock;
0602
0603 out_free_mqd:
0604 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
0605 out_deallocate_doorbell:
0606 deallocate_doorbell(qpd, q);
0607 out_deallocate_hqd:
0608 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
0609 deallocate_hqd(dqm, q);
0610 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0611 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
0612 deallocate_sdma_queue(dqm, q);
0613 deallocate_vmid:
0614 if (list_empty(&qpd->queues_list))
0615 deallocate_vmid(dqm, qpd, q);
0616 out_unlock:
0617 dqm_unlock(dqm);
0618 return retval;
0619 }
0620
0621 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
0622 {
0623 bool set;
0624 int pipe, bit, i;
0625
0626 set = false;
0627
0628 for (pipe = dqm->next_pipe_to_allocate, i = 0;
0629 i < get_pipes_per_mec(dqm);
0630 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
0631
0632 if (!is_pipe_enabled(dqm, 0, pipe))
0633 continue;
0634
0635 if (dqm->allocated_queues[pipe] != 0) {
0636 bit = ffs(dqm->allocated_queues[pipe]) - 1;
0637 dqm->allocated_queues[pipe] &= ~(1 << bit);
0638 q->pipe = pipe;
0639 q->queue = bit;
0640 set = true;
0641 break;
0642 }
0643 }
0644
0645 if (!set)
0646 return -EBUSY;
0647
0648 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
0649
0650 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
0651
0652 return 0;
0653 }
0654
0655 static inline void deallocate_hqd(struct device_queue_manager *dqm,
0656 struct queue *q)
0657 {
0658 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
0659 }
0660
0661 #define SQ_IND_CMD_CMD_KILL 0x00000003
0662 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001
0663
0664 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
0665 {
0666 int status = 0;
0667 unsigned int vmid;
0668 uint16_t queried_pasid;
0669 union SQ_CMD_BITS reg_sq_cmd;
0670 union GRBM_GFX_INDEX_BITS reg_gfx_index;
0671 struct kfd_process_device *pdd;
0672 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
0673 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
0674
0675 reg_sq_cmd.u32All = 0;
0676 reg_gfx_index.u32All = 0;
0677
0678 pr_debug("Killing all process wavefronts\n");
0679
0680 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
0681 pr_err("no vmid pasid mapping supported \n");
0682 return -EOPNOTSUPP;
0683 }
0684
0685
0686
0687
0688
0689
0690 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
0691 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
0692 (dev->adev, vmid, &queried_pasid);
0693
0694 if (status && queried_pasid == p->pasid) {
0695 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
0696 vmid, p->pasid);
0697 break;
0698 }
0699 }
0700
0701 if (vmid > last_vmid_to_scan) {
0702 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
0703 return -EFAULT;
0704 }
0705
0706
0707 pdd = kfd_get_process_device_data(dev, p);
0708 if (!pdd)
0709 return -EFAULT;
0710
0711 reg_gfx_index.bits.sh_broadcast_writes = 1;
0712 reg_gfx_index.bits.se_broadcast_writes = 1;
0713 reg_gfx_index.bits.instance_broadcast_writes = 1;
0714 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
0715 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
0716 reg_sq_cmd.bits.vm_id = vmid;
0717
0718 dev->kfd2kgd->wave_control_execute(dev->adev,
0719 reg_gfx_index.u32All,
0720 reg_sq_cmd.u32All);
0721
0722 return 0;
0723 }
0724
0725
0726
0727
0728 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
0729 struct qcm_process_device *qpd,
0730 struct queue *q)
0731 {
0732 int retval;
0733 struct mqd_manager *mqd_mgr;
0734
0735 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
0736 q->properties.type)];
0737
0738 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
0739 deallocate_hqd(dqm, q);
0740 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
0741 deallocate_sdma_queue(dqm, q);
0742 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
0743 deallocate_sdma_queue(dqm, q);
0744 else {
0745 pr_debug("q->properties.type %d is invalid\n",
0746 q->properties.type);
0747 return -EINVAL;
0748 }
0749 dqm->total_queue_count--;
0750
0751 deallocate_doorbell(qpd, q);
0752
0753 if (!dqm->sched_running) {
0754 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
0755 return 0;
0756 }
0757
0758 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
0759 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
0760 KFD_UNMAP_LATENCY_MS,
0761 q->pipe, q->queue);
0762 if (retval == -ETIME)
0763 qpd->reset_wavefronts = true;
0764
0765 list_del(&q->list);
0766 if (list_empty(&qpd->queues_list)) {
0767 if (qpd->reset_wavefronts) {
0768 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
0769 dqm->dev);
0770
0771
0772
0773 dbgdev_wave_reset_wavefronts(dqm->dev,
0774 qpd->pqm->process);
0775 qpd->reset_wavefronts = false;
0776 }
0777
0778 deallocate_vmid(dqm, qpd, q);
0779 }
0780 qpd->queue_count--;
0781 if (q->properties.is_active)
0782 decrement_queue_count(dqm, qpd, q);
0783
0784 return retval;
0785 }
0786
0787 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
0788 struct qcm_process_device *qpd,
0789 struct queue *q)
0790 {
0791 int retval;
0792 uint64_t sdma_val = 0;
0793 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
0794 struct mqd_manager *mqd_mgr =
0795 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
0796
0797
0798 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
0799 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
0800 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
0801 &sdma_val);
0802 if (retval)
0803 pr_err("Failed to read SDMA queue counter for queue: %d\n",
0804 q->properties.queue_id);
0805 }
0806
0807 dqm_lock(dqm);
0808 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
0809 if (!retval)
0810 pdd->sdma_past_activity_counter += sdma_val;
0811 dqm_unlock(dqm);
0812
0813 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
0814
0815 return retval;
0816 }
0817
0818 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
0819 struct mqd_update_info *minfo)
0820 {
0821 int retval = 0;
0822 struct mqd_manager *mqd_mgr;
0823 struct kfd_process_device *pdd;
0824 bool prev_active = false;
0825
0826 dqm_lock(dqm);
0827 pdd = kfd_get_process_device_data(q->device, q->process);
0828 if (!pdd) {
0829 retval = -ENODEV;
0830 goto out_unlock;
0831 }
0832 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
0833 q->properties.type)];
0834
0835
0836 prev_active = q->properties.is_active;
0837
0838
0839 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
0840 if (!dqm->dev->shared_resources.enable_mes)
0841 retval = unmap_queues_cpsch(dqm,
0842 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
0843 else if (prev_active)
0844 retval = remove_queue_mes(dqm, q, &pdd->qpd);
0845
0846 if (retval) {
0847 pr_err("unmap queue failed\n");
0848 goto out_unlock;
0849 }
0850 } else if (prev_active &&
0851 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
0852 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0853 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
0854
0855 if (!dqm->sched_running) {
0856 WARN_ONCE(1, "Update non-HWS queue while stopped\n");
0857 goto out_unlock;
0858 }
0859
0860 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
0861 (dqm->dev->cwsr_enabled ?
0862 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
0863 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
0864 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
0865 if (retval) {
0866 pr_err("destroy mqd failed\n");
0867 goto out_unlock;
0868 }
0869 }
0870
0871 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
0872
0873
0874
0875
0876
0877
0878
0879 if (q->properties.is_active && !prev_active) {
0880 increment_queue_count(dqm, &pdd->qpd, q);
0881 } else if (!q->properties.is_active && prev_active) {
0882 decrement_queue_count(dqm, &pdd->qpd, q);
0883 } else if (q->gws && !q->properties.is_gws) {
0884 if (q->properties.is_active) {
0885 dqm->gws_queue_count++;
0886 pdd->qpd.mapped_gws_queue = true;
0887 }
0888 q->properties.is_gws = true;
0889 } else if (!q->gws && q->properties.is_gws) {
0890 if (q->properties.is_active) {
0891 dqm->gws_queue_count--;
0892 pdd->qpd.mapped_gws_queue = false;
0893 }
0894 q->properties.is_gws = false;
0895 }
0896
0897 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
0898 if (!dqm->dev->shared_resources.enable_mes)
0899 retval = map_queues_cpsch(dqm);
0900 else if (q->properties.is_active)
0901 retval = add_queue_mes(dqm, q, &pdd->qpd);
0902 } else if (q->properties.is_active &&
0903 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
0904 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
0905 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
0906 if (WARN(q->process->mm != current->mm,
0907 "should only run in user thread"))
0908 retval = -EFAULT;
0909 else
0910 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
0911 q->pipe, q->queue,
0912 &q->properties, current->mm);
0913 }
0914
0915 out_unlock:
0916 dqm_unlock(dqm);
0917 return retval;
0918 }
0919
0920 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
0921 struct qcm_process_device *qpd)
0922 {
0923 struct queue *q;
0924 struct mqd_manager *mqd_mgr;
0925 struct kfd_process_device *pdd;
0926 int retval, ret = 0;
0927
0928 dqm_lock(dqm);
0929 if (qpd->evicted++ > 0)
0930 goto out;
0931
0932 pdd = qpd_to_pdd(qpd);
0933 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
0934 pdd->process->pasid);
0935
0936 pdd->last_evict_timestamp = get_jiffies_64();
0937
0938
0939
0940 list_for_each_entry(q, &qpd->queues_list, list) {
0941 q->properties.is_evicted = true;
0942 if (!q->properties.is_active)
0943 continue;
0944
0945 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
0946 q->properties.type)];
0947 q->properties.is_active = false;
0948 decrement_queue_count(dqm, qpd, q);
0949
0950 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
0951 continue;
0952
0953 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
0954 (dqm->dev->cwsr_enabled ?
0955 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
0956 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
0957 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
0958 if (retval && !ret)
0959
0960
0961
0962 ret = retval;
0963 }
0964
0965 out:
0966 dqm_unlock(dqm);
0967 return ret;
0968 }
0969
0970 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
0971 struct qcm_process_device *qpd)
0972 {
0973 struct queue *q;
0974 struct kfd_process_device *pdd;
0975 int retval = 0;
0976
0977 dqm_lock(dqm);
0978 if (qpd->evicted++ > 0)
0979 goto out;
0980
0981 pdd = qpd_to_pdd(qpd);
0982 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
0983 pdd->process->pasid);
0984
0985
0986
0987
0988 list_for_each_entry(q, &qpd->queues_list, list) {
0989 q->properties.is_evicted = true;
0990 if (!q->properties.is_active)
0991 continue;
0992
0993 q->properties.is_active = false;
0994 decrement_queue_count(dqm, qpd, q);
0995
0996 if (dqm->dev->shared_resources.enable_mes) {
0997 retval = remove_queue_mes(dqm, q, qpd);
0998 if (retval) {
0999 pr_err("Failed to evict queue %d\n",
1000 q->properties.queue_id);
1001 goto out;
1002 }
1003 }
1004 }
1005 pdd->last_evict_timestamp = get_jiffies_64();
1006 if (!dqm->dev->shared_resources.enable_mes)
1007 retval = execute_queues_cpsch(dqm,
1008 qpd->is_debug ?
1009 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
1010 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1011
1012 out:
1013 dqm_unlock(dqm);
1014 return retval;
1015 }
1016
1017 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1018 struct qcm_process_device *qpd)
1019 {
1020 struct mm_struct *mm = NULL;
1021 struct queue *q;
1022 struct mqd_manager *mqd_mgr;
1023 struct kfd_process_device *pdd;
1024 uint64_t pd_base;
1025 uint64_t eviction_duration;
1026 int retval, ret = 0;
1027
1028 pdd = qpd_to_pdd(qpd);
1029
1030 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1031
1032 dqm_lock(dqm);
1033 if (WARN_ON_ONCE(!qpd->evicted))
1034 goto out;
1035 if (qpd->evicted > 1) {
1036 qpd->evicted--;
1037 goto out;
1038 }
1039
1040 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1041 pdd->process->pasid);
1042
1043
1044 qpd->page_table_base = pd_base;
1045 pr_debug("Updated PD address to 0x%llx\n", pd_base);
1046
1047 if (!list_empty(&qpd->queues_list)) {
1048 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1049 dqm->dev->adev,
1050 qpd->vmid,
1051 qpd->page_table_base);
1052 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1053 }
1054
1055
1056
1057
1058 mm = get_task_mm(pdd->process->lead_thread);
1059 if (!mm) {
1060 ret = -EFAULT;
1061 goto out;
1062 }
1063
1064
1065
1066
1067 list_for_each_entry(q, &qpd->queues_list, list) {
1068 q->properties.is_evicted = false;
1069 if (!QUEUE_IS_ACTIVE(q->properties))
1070 continue;
1071
1072 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1073 q->properties.type)];
1074 q->properties.is_active = true;
1075 increment_queue_count(dqm, qpd, q);
1076
1077 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1078 continue;
1079
1080 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1081 q->queue, &q->properties, mm);
1082 if (retval && !ret)
1083
1084
1085
1086 ret = retval;
1087 }
1088 qpd->evicted = 0;
1089 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1090 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1091 out:
1092 if (mm)
1093 mmput(mm);
1094 dqm_unlock(dqm);
1095 return ret;
1096 }
1097
1098 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1099 struct qcm_process_device *qpd)
1100 {
1101 struct queue *q;
1102 struct kfd_process_device *pdd;
1103 uint64_t pd_base;
1104 uint64_t eviction_duration;
1105 int retval = 0;
1106
1107 pdd = qpd_to_pdd(qpd);
1108
1109 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1110
1111 dqm_lock(dqm);
1112 if (WARN_ON_ONCE(!qpd->evicted))
1113 goto out;
1114 if (qpd->evicted > 1) {
1115 qpd->evicted--;
1116 goto out;
1117 }
1118
1119 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1120 pdd->process->pasid);
1121
1122
1123 qpd->page_table_base = pd_base;
1124 pr_debug("Updated PD address to 0x%llx\n", pd_base);
1125
1126
1127 list_for_each_entry(q, &qpd->queues_list, list) {
1128 q->properties.is_evicted = false;
1129 if (!QUEUE_IS_ACTIVE(q->properties))
1130 continue;
1131
1132 q->properties.is_active = true;
1133 increment_queue_count(dqm, &pdd->qpd, q);
1134
1135 if (dqm->dev->shared_resources.enable_mes) {
1136 retval = add_queue_mes(dqm, q, qpd);
1137 if (retval) {
1138 pr_err("Failed to restore queue %d\n",
1139 q->properties.queue_id);
1140 goto out;
1141 }
1142 }
1143 }
1144 if (!dqm->dev->shared_resources.enable_mes)
1145 retval = execute_queues_cpsch(dqm,
1146 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1147 qpd->evicted = 0;
1148 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1149 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1150 out:
1151 dqm_unlock(dqm);
1152 return retval;
1153 }
1154
1155 static int register_process(struct device_queue_manager *dqm,
1156 struct qcm_process_device *qpd)
1157 {
1158 struct device_process_node *n;
1159 struct kfd_process_device *pdd;
1160 uint64_t pd_base;
1161 int retval;
1162
1163 n = kzalloc(sizeof(*n), GFP_KERNEL);
1164 if (!n)
1165 return -ENOMEM;
1166
1167 n->qpd = qpd;
1168
1169 pdd = qpd_to_pdd(qpd);
1170
1171 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1172
1173 dqm_lock(dqm);
1174 list_add(&n->list, &dqm->queues);
1175
1176
1177 qpd->page_table_base = pd_base;
1178 pr_debug("Updated PD address to 0x%llx\n", pd_base);
1179
1180 retval = dqm->asic_ops.update_qpd(dqm, qpd);
1181
1182 dqm->processes_count++;
1183
1184 dqm_unlock(dqm);
1185
1186
1187
1188
1189 kfd_inc_compute_active(dqm->dev);
1190
1191 return retval;
1192 }
1193
1194 static int unregister_process(struct device_queue_manager *dqm,
1195 struct qcm_process_device *qpd)
1196 {
1197 int retval;
1198 struct device_process_node *cur, *next;
1199
1200 pr_debug("qpd->queues_list is %s\n",
1201 list_empty(&qpd->queues_list) ? "empty" : "not empty");
1202
1203 retval = 0;
1204 dqm_lock(dqm);
1205
1206 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1207 if (qpd == cur->qpd) {
1208 list_del(&cur->list);
1209 kfree(cur);
1210 dqm->processes_count--;
1211 goto out;
1212 }
1213 }
1214
1215 retval = 1;
1216 out:
1217 dqm_unlock(dqm);
1218
1219
1220
1221
1222 if (!retval)
1223 kfd_dec_compute_active(dqm->dev);
1224
1225 return retval;
1226 }
1227
1228 static int
1229 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1230 unsigned int vmid)
1231 {
1232 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1233 dqm->dev->adev, pasid, vmid);
1234 }
1235
1236 static void init_interrupts(struct device_queue_manager *dqm)
1237 {
1238 unsigned int i;
1239
1240 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
1241 if (is_pipe_enabled(dqm, 0, i))
1242 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
1243 }
1244
1245 static int initialize_nocpsch(struct device_queue_manager *dqm)
1246 {
1247 int pipe, queue;
1248
1249 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1250
1251 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1252 sizeof(unsigned int), GFP_KERNEL);
1253 if (!dqm->allocated_queues)
1254 return -ENOMEM;
1255
1256 mutex_init(&dqm->lock_hidden);
1257 INIT_LIST_HEAD(&dqm->queues);
1258 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1259 dqm->active_cp_queue_count = 0;
1260 dqm->gws_queue_count = 0;
1261
1262 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1263 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1264
1265 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1266 if (test_bit(pipe_offset + queue,
1267 dqm->dev->shared_resources.cp_queue_bitmap))
1268 dqm->allocated_queues[pipe] |= 1 << queue;
1269 }
1270
1271 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1272
1273 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1274 dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1275 pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1276
1277 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1278
1279 return 0;
1280 }
1281
1282 static void uninitialize(struct device_queue_manager *dqm)
1283 {
1284 int i;
1285
1286 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1287
1288 kfree(dqm->allocated_queues);
1289 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1290 kfree(dqm->mqd_mgrs[i]);
1291 mutex_destroy(&dqm->lock_hidden);
1292 }
1293
1294 static int start_nocpsch(struct device_queue_manager *dqm)
1295 {
1296 int r = 0;
1297
1298 pr_info("SW scheduler is used");
1299 init_interrupts(dqm);
1300
1301 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1302 r = pm_init(&dqm->packet_mgr, dqm);
1303 if (!r)
1304 dqm->sched_running = true;
1305
1306 return r;
1307 }
1308
1309 static int stop_nocpsch(struct device_queue_manager *dqm)
1310 {
1311 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1312 pm_uninit(&dqm->packet_mgr, false);
1313 dqm->sched_running = false;
1314
1315 return 0;
1316 }
1317
1318 static void pre_reset(struct device_queue_manager *dqm)
1319 {
1320 dqm_lock(dqm);
1321 dqm->is_resetting = true;
1322 dqm_unlock(dqm);
1323 }
1324
1325 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1326 struct queue *q, const uint32_t *restore_sdma_id)
1327 {
1328 int bit;
1329
1330 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1331 if (dqm->sdma_bitmap == 0) {
1332 pr_err("No more SDMA queue to allocate\n");
1333 return -ENOMEM;
1334 }
1335
1336 if (restore_sdma_id) {
1337
1338 if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
1339 pr_err("SDMA queue already in use\n");
1340 return -EBUSY;
1341 }
1342 dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1343 q->sdma_id = *restore_sdma_id;
1344 } else {
1345
1346 bit = __ffs64(dqm->sdma_bitmap);
1347 dqm->sdma_bitmap &= ~(1ULL << bit);
1348 q->sdma_id = bit;
1349 }
1350
1351 q->properties.sdma_engine_id = q->sdma_id %
1352 kfd_get_num_sdma_engines(dqm->dev);
1353 q->properties.sdma_queue_id = q->sdma_id /
1354 kfd_get_num_sdma_engines(dqm->dev);
1355 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1356 if (dqm->xgmi_sdma_bitmap == 0) {
1357 pr_err("No more XGMI SDMA queue to allocate\n");
1358 return -ENOMEM;
1359 }
1360 if (restore_sdma_id) {
1361
1362 if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
1363 pr_err("SDMA queue already in use\n");
1364 return -EBUSY;
1365 }
1366 dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1367 q->sdma_id = *restore_sdma_id;
1368 } else {
1369 bit = __ffs64(dqm->xgmi_sdma_bitmap);
1370 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1371 q->sdma_id = bit;
1372 }
1373
1374
1375
1376
1377
1378
1379 q->properties.sdma_engine_id =
1380 kfd_get_num_sdma_engines(dqm->dev) +
1381 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1382 q->properties.sdma_queue_id = q->sdma_id /
1383 kfd_get_num_xgmi_sdma_engines(dqm->dev);
1384 }
1385
1386 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1387 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1388
1389 return 0;
1390 }
1391
1392 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1393 struct queue *q)
1394 {
1395 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1396 if (q->sdma_id >= get_num_sdma_queues(dqm))
1397 return;
1398 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1399 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1400 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1401 return;
1402 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1403 }
1404 }
1405
1406
1407
1408
1409
1410 static int set_sched_resources(struct device_queue_manager *dqm)
1411 {
1412 int i, mec;
1413 struct scheduling_resources res;
1414
1415 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1416
1417 res.queue_mask = 0;
1418 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1419 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1420 / dqm->dev->shared_resources.num_pipe_per_mec;
1421
1422 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1423 continue;
1424
1425
1426 if (mec > 0)
1427 continue;
1428
1429
1430
1431
1432
1433 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1434 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1435 break;
1436 }
1437
1438 res.queue_mask |= 1ull
1439 << amdgpu_queue_mask_bit_to_set_resource_bit(
1440 dqm->dev->adev, i);
1441 }
1442 res.gws_mask = ~0ull;
1443 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1444
1445 pr_debug("Scheduling resources:\n"
1446 "vmid mask: 0x%8X\n"
1447 "queue mask: 0x%8llX\n",
1448 res.vmid_mask, res.queue_mask);
1449
1450 return pm_send_set_resources(&dqm->packet_mgr, &res);
1451 }
1452
1453 static int initialize_cpsch(struct device_queue_manager *dqm)
1454 {
1455 uint64_t num_sdma_queues;
1456 uint64_t num_xgmi_sdma_queues;
1457
1458 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1459
1460 mutex_init(&dqm->lock_hidden);
1461 INIT_LIST_HEAD(&dqm->queues);
1462 dqm->active_queue_count = dqm->processes_count = 0;
1463 dqm->active_cp_queue_count = 0;
1464 dqm->gws_queue_count = 0;
1465 dqm->active_runlist = false;
1466
1467 num_sdma_queues = get_num_sdma_queues(dqm);
1468 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1469 dqm->sdma_bitmap = ULLONG_MAX;
1470 else
1471 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1472
1473 dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1474 pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1475
1476 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1477 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1478 dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1479 else
1480 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1481
1482 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1483
1484 return 0;
1485 }
1486
1487 static int start_cpsch(struct device_queue_manager *dqm)
1488 {
1489 int retval;
1490
1491 retval = 0;
1492
1493 dqm_lock(dqm);
1494
1495 if (!dqm->dev->shared_resources.enable_mes) {
1496 retval = pm_init(&dqm->packet_mgr, dqm);
1497 if (retval)
1498 goto fail_packet_manager_init;
1499
1500 retval = set_sched_resources(dqm);
1501 if (retval)
1502 goto fail_set_sched_resources;
1503 }
1504 pr_debug("Allocating fence memory\n");
1505
1506
1507 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1508 &dqm->fence_mem);
1509
1510 if (retval)
1511 goto fail_allocate_vidmem;
1512
1513 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1514 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1515
1516 init_interrupts(dqm);
1517
1518
1519 dqm->is_hws_hang = false;
1520 dqm->is_resetting = false;
1521 dqm->sched_running = true;
1522 if (!dqm->dev->shared_resources.enable_mes)
1523 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1524 dqm_unlock(dqm);
1525
1526 return 0;
1527 fail_allocate_vidmem:
1528 fail_set_sched_resources:
1529 if (!dqm->dev->shared_resources.enable_mes)
1530 pm_uninit(&dqm->packet_mgr, false);
1531 fail_packet_manager_init:
1532 dqm_unlock(dqm);
1533 return retval;
1534 }
1535
1536 static int stop_cpsch(struct device_queue_manager *dqm)
1537 {
1538 bool hanging;
1539
1540 dqm_lock(dqm);
1541 if (!dqm->sched_running) {
1542 dqm_unlock(dqm);
1543 return 0;
1544 }
1545
1546 if (!dqm->is_hws_hang) {
1547 if (!dqm->dev->shared_resources.enable_mes)
1548 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
1549 else
1550 remove_all_queues_mes(dqm);
1551 }
1552
1553 hanging = dqm->is_hws_hang || dqm->is_resetting;
1554 dqm->sched_running = false;
1555
1556 if (!dqm->dev->shared_resources.enable_mes)
1557 pm_release_ib(&dqm->packet_mgr);
1558
1559 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1560 if (!dqm->dev->shared_resources.enable_mes)
1561 pm_uninit(&dqm->packet_mgr, hanging);
1562 dqm_unlock(dqm);
1563
1564 return 0;
1565 }
1566
1567 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1568 struct kernel_queue *kq,
1569 struct qcm_process_device *qpd)
1570 {
1571 dqm_lock(dqm);
1572 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1573 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1574 dqm->total_queue_count);
1575 dqm_unlock(dqm);
1576 return -EPERM;
1577 }
1578
1579
1580
1581
1582
1583 dqm->total_queue_count++;
1584 pr_debug("Total of %d queues are accountable so far\n",
1585 dqm->total_queue_count);
1586
1587 list_add(&kq->list, &qpd->priv_queue_list);
1588 increment_queue_count(dqm, qpd, kq->queue);
1589 qpd->is_debug = true;
1590 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1591 dqm_unlock(dqm);
1592
1593 return 0;
1594 }
1595
1596 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1597 struct kernel_queue *kq,
1598 struct qcm_process_device *qpd)
1599 {
1600 dqm_lock(dqm);
1601 list_del(&kq->list);
1602 decrement_queue_count(dqm, qpd, kq->queue);
1603 qpd->is_debug = false;
1604 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1605
1606
1607
1608
1609 dqm->total_queue_count--;
1610 pr_debug("Total of %d queues are accountable so far\n",
1611 dqm->total_queue_count);
1612 dqm_unlock(dqm);
1613 }
1614
1615 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1616 struct qcm_process_device *qpd,
1617 const struct kfd_criu_queue_priv_data *qd,
1618 const void *restore_mqd, const void *restore_ctl_stack)
1619 {
1620 int retval;
1621 struct mqd_manager *mqd_mgr;
1622
1623 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1624 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1625 dqm->total_queue_count);
1626 retval = -EPERM;
1627 goto out;
1628 }
1629
1630 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1631 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1632 dqm_lock(dqm);
1633 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1634 dqm_unlock(dqm);
1635 if (retval)
1636 goto out;
1637 }
1638
1639 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1640 if (retval)
1641 goto out_deallocate_sdma_queue;
1642
1643 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1644 q->properties.type)];
1645
1646 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1647 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1648 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1649 q->properties.tba_addr = qpd->tba_addr;
1650 q->properties.tma_addr = qpd->tma_addr;
1651 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1652 if (!q->mqd_mem_obj) {
1653 retval = -ENOMEM;
1654 goto out_deallocate_doorbell;
1655 }
1656
1657 dqm_lock(dqm);
1658
1659
1660
1661
1662
1663 q->properties.is_evicted = !!qpd->evicted;
1664
1665 if (qd)
1666 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1667 &q->properties, restore_mqd, restore_ctl_stack,
1668 qd->ctl_stack_size);
1669 else
1670 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1671 &q->gart_mqd_addr, &q->properties);
1672
1673 list_add(&q->list, &qpd->queues_list);
1674 qpd->queue_count++;
1675
1676 if (q->properties.is_active) {
1677 increment_queue_count(dqm, qpd, q);
1678
1679 if (!dqm->dev->shared_resources.enable_mes)
1680 retval = execute_queues_cpsch(dqm,
1681 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1682 else
1683 retval = add_queue_mes(dqm, q, qpd);
1684 if (retval)
1685 goto cleanup_queue;
1686 }
1687
1688
1689
1690
1691
1692 dqm->total_queue_count++;
1693
1694 pr_debug("Total of %d queues are accountable so far\n",
1695 dqm->total_queue_count);
1696
1697 dqm_unlock(dqm);
1698 return retval;
1699
1700 cleanup_queue:
1701 qpd->queue_count--;
1702 list_del(&q->list);
1703 if (q->properties.is_active)
1704 decrement_queue_count(dqm, qpd, q);
1705 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1706 dqm_unlock(dqm);
1707 out_deallocate_doorbell:
1708 deallocate_doorbell(qpd, q);
1709 out_deallocate_sdma_queue:
1710 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1711 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1712 dqm_lock(dqm);
1713 deallocate_sdma_queue(dqm, q);
1714 dqm_unlock(dqm);
1715 }
1716 out:
1717 return retval;
1718 }
1719
1720 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1721 uint64_t fence_value,
1722 unsigned int timeout_ms)
1723 {
1724 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1725
1726 while (*fence_addr != fence_value) {
1727 if (time_after(jiffies, end_jiffies)) {
1728 pr_err("qcm fence wait loop timeout expired\n");
1729
1730
1731
1732
1733 while (halt_if_hws_hang)
1734 schedule();
1735
1736 return -ETIME;
1737 }
1738 schedule();
1739 }
1740
1741 return 0;
1742 }
1743
1744
1745 static int map_queues_cpsch(struct device_queue_manager *dqm)
1746 {
1747 int retval;
1748
1749 if (!dqm->sched_running)
1750 return 0;
1751 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1752 return 0;
1753 if (dqm->active_runlist)
1754 return 0;
1755
1756 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1757 pr_debug("%s sent runlist\n", __func__);
1758 if (retval) {
1759 pr_err("failed to execute runlist\n");
1760 return retval;
1761 }
1762 dqm->active_runlist = true;
1763
1764 return retval;
1765 }
1766
1767
1768 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1769 enum kfd_unmap_queues_filter filter,
1770 uint32_t filter_param, bool reset)
1771 {
1772 int retval = 0;
1773 struct mqd_manager *mqd_mgr;
1774
1775 if (!dqm->sched_running)
1776 return 0;
1777 if (dqm->is_hws_hang || dqm->is_resetting)
1778 return -EIO;
1779 if (!dqm->active_runlist)
1780 return retval;
1781
1782 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1783 if (retval)
1784 return retval;
1785
1786 *dqm->fence_addr = KFD_FENCE_INIT;
1787 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1788 KFD_FENCE_COMPLETED);
1789
1790 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1791 queue_preemption_timeout_ms);
1792 if (retval) {
1793 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1794 kfd_hws_hang(dqm);
1795 return retval;
1796 }
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1807 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1808 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1809 while (halt_if_hws_hang)
1810 schedule();
1811 return -ETIME;
1812 }
1813
1814 pm_release_ib(&dqm->packet_mgr);
1815 dqm->active_runlist = false;
1816
1817 return retval;
1818 }
1819
1820
1821 static int reset_queues_cpsch(struct device_queue_manager *dqm,
1822 uint16_t pasid)
1823 {
1824 int retval;
1825
1826 dqm_lock(dqm);
1827
1828 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1829 pasid, true);
1830
1831 dqm_unlock(dqm);
1832 return retval;
1833 }
1834
1835
1836 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1837 enum kfd_unmap_queues_filter filter,
1838 uint32_t filter_param)
1839 {
1840 int retval;
1841
1842 if (dqm->is_hws_hang)
1843 return -EIO;
1844 retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1845 if (retval)
1846 return retval;
1847
1848 return map_queues_cpsch(dqm);
1849 }
1850
1851 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1852 struct qcm_process_device *qpd,
1853 struct queue *q)
1854 {
1855 int retval;
1856 struct mqd_manager *mqd_mgr;
1857 uint64_t sdma_val = 0;
1858 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1859
1860
1861 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1862 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1863 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1864 &sdma_val);
1865 if (retval)
1866 pr_err("Failed to read SDMA queue counter for queue: %d\n",
1867 q->properties.queue_id);
1868 }
1869
1870 retval = 0;
1871
1872
1873 dqm_lock(dqm);
1874
1875 if (qpd->is_debug) {
1876
1877
1878
1879
1880 retval = -EBUSY;
1881 goto failed_try_destroy_debugged_queue;
1882
1883 }
1884
1885 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1886 q->properties.type)];
1887
1888 deallocate_doorbell(qpd, q);
1889
1890 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1891 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1892 deallocate_sdma_queue(dqm, q);
1893 pdd->sdma_past_activity_counter += sdma_val;
1894 }
1895
1896 list_del(&q->list);
1897 qpd->queue_count--;
1898 if (q->properties.is_active) {
1899 if (!dqm->dev->shared_resources.enable_mes) {
1900 decrement_queue_count(dqm, qpd, q);
1901 retval = execute_queues_cpsch(dqm,
1902 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1903 if (retval == -ETIME)
1904 qpd->reset_wavefronts = true;
1905 } else {
1906 retval = remove_queue_mes(dqm, q, qpd);
1907 }
1908 }
1909
1910
1911
1912
1913
1914 dqm->total_queue_count--;
1915 pr_debug("Total of %d queues are accountable so far\n",
1916 dqm->total_queue_count);
1917
1918 dqm_unlock(dqm);
1919
1920
1921 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1922
1923 return retval;
1924
1925 failed_try_destroy_debugged_queue:
1926
1927 dqm_unlock(dqm);
1928 return retval;
1929 }
1930
1931
1932
1933
1934
1935 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1936
1937 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1938
1939 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1940 struct qcm_process_device *qpd,
1941 enum cache_policy default_policy,
1942 enum cache_policy alternate_policy,
1943 void __user *alternate_aperture_base,
1944 uint64_t alternate_aperture_size)
1945 {
1946 bool retval = true;
1947
1948 if (!dqm->asic_ops.set_cache_memory_policy)
1949 return retval;
1950
1951 dqm_lock(dqm);
1952
1953 if (alternate_aperture_size == 0) {
1954
1955 qpd->sh_mem_ape1_base = 1;
1956 qpd->sh_mem_ape1_limit = 0;
1957 } else {
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968 uint64_t base = (uintptr_t)alternate_aperture_base;
1969 uint64_t limit = base + alternate_aperture_size - 1;
1970
1971 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1972 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1973 retval = false;
1974 goto out;
1975 }
1976
1977 qpd->sh_mem_ape1_base = base >> 16;
1978 qpd->sh_mem_ape1_limit = limit >> 16;
1979 }
1980
1981 retval = dqm->asic_ops.set_cache_memory_policy(
1982 dqm,
1983 qpd,
1984 default_policy,
1985 alternate_policy,
1986 alternate_aperture_base,
1987 alternate_aperture_size);
1988
1989 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1990 program_sh_mem_settings(dqm, qpd);
1991
1992 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1993 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1994 qpd->sh_mem_ape1_limit);
1995
1996 out:
1997 dqm_unlock(dqm);
1998 return retval;
1999 }
2000
2001 static int process_termination_nocpsch(struct device_queue_manager *dqm,
2002 struct qcm_process_device *qpd)
2003 {
2004 struct queue *q;
2005 struct device_process_node *cur, *next_dpn;
2006 int retval = 0;
2007 bool found = false;
2008
2009 dqm_lock(dqm);
2010
2011
2012 while (!list_empty(&qpd->queues_list)) {
2013 struct mqd_manager *mqd_mgr;
2014 int ret;
2015
2016 q = list_first_entry(&qpd->queues_list, struct queue, list);
2017 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2018 q->properties.type)];
2019 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2020 if (ret)
2021 retval = ret;
2022 dqm_unlock(dqm);
2023 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2024 dqm_lock(dqm);
2025 }
2026
2027
2028 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2029 if (qpd == cur->qpd) {
2030 list_del(&cur->list);
2031 kfree(cur);
2032 dqm->processes_count--;
2033 found = true;
2034 break;
2035 }
2036 }
2037
2038 dqm_unlock(dqm);
2039
2040
2041
2042
2043 if (found)
2044 kfd_dec_compute_active(dqm->dev);
2045
2046 return retval;
2047 }
2048
2049 static int get_wave_state(struct device_queue_manager *dqm,
2050 struct queue *q,
2051 void __user *ctl_stack,
2052 u32 *ctl_stack_used_size,
2053 u32 *save_area_used_size)
2054 {
2055 struct mqd_manager *mqd_mgr;
2056
2057 dqm_lock(dqm);
2058
2059 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2060
2061 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
2062 q->properties.is_active || !q->device->cwsr_enabled ||
2063 !mqd_mgr->get_wave_state) {
2064 dqm_unlock(dqm);
2065 return -EINVAL;
2066 }
2067
2068 dqm_unlock(dqm);
2069
2070
2071
2072
2073
2074
2075 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
2076 ctl_stack_used_size, save_area_used_size);
2077 }
2078
2079 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2080 const struct queue *q,
2081 u32 *mqd_size,
2082 u32 *ctl_stack_size)
2083 {
2084 struct mqd_manager *mqd_mgr;
2085 enum KFD_MQD_TYPE mqd_type =
2086 get_mqd_type_from_queue_type(q->properties.type);
2087
2088 dqm_lock(dqm);
2089 mqd_mgr = dqm->mqd_mgrs[mqd_type];
2090 *mqd_size = mqd_mgr->mqd_size;
2091 *ctl_stack_size = 0;
2092
2093 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
2094 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2095
2096 dqm_unlock(dqm);
2097 }
2098
2099 static int checkpoint_mqd(struct device_queue_manager *dqm,
2100 const struct queue *q,
2101 void *mqd,
2102 void *ctl_stack)
2103 {
2104 struct mqd_manager *mqd_mgr;
2105 int r = 0;
2106 enum KFD_MQD_TYPE mqd_type =
2107 get_mqd_type_from_queue_type(q->properties.type);
2108
2109 dqm_lock(dqm);
2110
2111 if (q->properties.is_active || !q->device->cwsr_enabled) {
2112 r = -EINVAL;
2113 goto dqm_unlock;
2114 }
2115
2116 mqd_mgr = dqm->mqd_mgrs[mqd_type];
2117 if (!mqd_mgr->checkpoint_mqd) {
2118 r = -EOPNOTSUPP;
2119 goto dqm_unlock;
2120 }
2121
2122 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2123
2124 dqm_unlock:
2125 dqm_unlock(dqm);
2126 return r;
2127 }
2128
2129 static int process_termination_cpsch(struct device_queue_manager *dqm,
2130 struct qcm_process_device *qpd)
2131 {
2132 int retval;
2133 struct queue *q;
2134 struct kernel_queue *kq, *kq_next;
2135 struct mqd_manager *mqd_mgr;
2136 struct device_process_node *cur, *next_dpn;
2137 enum kfd_unmap_queues_filter filter =
2138 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2139 bool found = false;
2140
2141 retval = 0;
2142
2143 dqm_lock(dqm);
2144
2145
2146 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
2147 list_del(&kq->list);
2148 decrement_queue_count(dqm, qpd, kq->queue);
2149 qpd->is_debug = false;
2150 dqm->total_queue_count--;
2151 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
2152 }
2153
2154
2155 list_for_each_entry(q, &qpd->queues_list, list) {
2156 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2157 deallocate_sdma_queue(dqm, q);
2158 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2159 deallocate_sdma_queue(dqm, q);
2160
2161 if (q->properties.is_active) {
2162 decrement_queue_count(dqm, qpd, q);
2163
2164 if (dqm->dev->shared_resources.enable_mes) {
2165 retval = remove_queue_mes(dqm, q, qpd);
2166 if (retval)
2167 pr_err("Failed to remove queue %d\n",
2168 q->properties.queue_id);
2169 }
2170 }
2171
2172 dqm->total_queue_count--;
2173 }
2174
2175
2176 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2177 if (qpd == cur->qpd) {
2178 list_del(&cur->list);
2179 kfree(cur);
2180 dqm->processes_count--;
2181 found = true;
2182 break;
2183 }
2184 }
2185
2186 if (!dqm->dev->shared_resources.enable_mes)
2187 retval = execute_queues_cpsch(dqm, filter, 0);
2188
2189 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2190 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2191 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2192 qpd->reset_wavefronts = false;
2193 }
2194
2195
2196
2197
2198 while (!list_empty(&qpd->queues_list)) {
2199 q = list_first_entry(&qpd->queues_list, struct queue, list);
2200 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2201 q->properties.type)];
2202 list_del(&q->list);
2203 qpd->queue_count--;
2204 dqm_unlock(dqm);
2205 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2206 dqm_lock(dqm);
2207 }
2208 dqm_unlock(dqm);
2209
2210
2211
2212
2213 if (found)
2214 kfd_dec_compute_active(dqm->dev);
2215
2216 return retval;
2217 }
2218
2219 static int init_mqd_managers(struct device_queue_manager *dqm)
2220 {
2221 int i, j;
2222 struct mqd_manager *mqd_mgr;
2223
2224 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
2225 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2226 if (!mqd_mgr) {
2227 pr_err("mqd manager [%d] initialization failed\n", i);
2228 goto out_free;
2229 }
2230 dqm->mqd_mgrs[i] = mqd_mgr;
2231 }
2232
2233 return 0;
2234
2235 out_free:
2236 for (j = 0; j < i; j++) {
2237 kfree(dqm->mqd_mgrs[j]);
2238 dqm->mqd_mgrs[j] = NULL;
2239 }
2240
2241 return -ENOMEM;
2242 }
2243
2244
2245 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2246 {
2247 int retval;
2248 struct kfd_dev *dev = dqm->dev;
2249 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2250 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2251 get_num_all_sdma_engines(dqm) *
2252 dev->device_info.num_sdma_queues_per_engine +
2253 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
2254
2255 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2256 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2257 (void *)&(mem_obj->cpu_ptr), false);
2258
2259 return retval;
2260 }
2261
2262 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
2263 {
2264 struct device_queue_manager *dqm;
2265
2266 pr_debug("Loading device queue manager\n");
2267
2268 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2269 if (!dqm)
2270 return NULL;
2271
2272 switch (dev->adev->asic_type) {
2273
2274 case CHIP_HAWAII:
2275
2276
2277
2278
2279
2280 case CHIP_TONGA:
2281 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2282 break;
2283 default:
2284 dqm->sched_policy = sched_policy;
2285 break;
2286 }
2287
2288 dqm->dev = dev;
2289 switch (dqm->sched_policy) {
2290 case KFD_SCHED_POLICY_HWS:
2291 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
2292
2293 dqm->ops.create_queue = create_queue_cpsch;
2294 dqm->ops.initialize = initialize_cpsch;
2295 dqm->ops.start = start_cpsch;
2296 dqm->ops.stop = stop_cpsch;
2297 dqm->ops.pre_reset = pre_reset;
2298 dqm->ops.destroy_queue = destroy_queue_cpsch;
2299 dqm->ops.update_queue = update_queue;
2300 dqm->ops.register_process = register_process;
2301 dqm->ops.unregister_process = unregister_process;
2302 dqm->ops.uninitialize = uninitialize;
2303 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2304 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2305 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2306 dqm->ops.process_termination = process_termination_cpsch;
2307 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2308 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2309 dqm->ops.get_wave_state = get_wave_state;
2310 dqm->ops.reset_queues = reset_queues_cpsch;
2311 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2312 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2313 break;
2314 case KFD_SCHED_POLICY_NO_HWS:
2315
2316 dqm->ops.start = start_nocpsch;
2317 dqm->ops.stop = stop_nocpsch;
2318 dqm->ops.pre_reset = pre_reset;
2319 dqm->ops.create_queue = create_queue_nocpsch;
2320 dqm->ops.destroy_queue = destroy_queue_nocpsch;
2321 dqm->ops.update_queue = update_queue;
2322 dqm->ops.register_process = register_process;
2323 dqm->ops.unregister_process = unregister_process;
2324 dqm->ops.initialize = initialize_nocpsch;
2325 dqm->ops.uninitialize = uninitialize;
2326 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2327 dqm->ops.process_termination = process_termination_nocpsch;
2328 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2329 dqm->ops.restore_process_queues =
2330 restore_process_queues_nocpsch;
2331 dqm->ops.get_wave_state = get_wave_state;
2332 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2333 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2334 break;
2335 default:
2336 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2337 goto out_free;
2338 }
2339
2340 switch (dev->adev->asic_type) {
2341 case CHIP_CARRIZO:
2342 device_queue_manager_init_vi(&dqm->asic_ops);
2343 break;
2344
2345 case CHIP_KAVERI:
2346 device_queue_manager_init_cik(&dqm->asic_ops);
2347 break;
2348
2349 case CHIP_HAWAII:
2350 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
2351 break;
2352
2353 case CHIP_TONGA:
2354 case CHIP_FIJI:
2355 case CHIP_POLARIS10:
2356 case CHIP_POLARIS11:
2357 case CHIP_POLARIS12:
2358 case CHIP_VEGAM:
2359 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
2360 break;
2361
2362 default:
2363 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
2364 device_queue_manager_init_v11(&dqm->asic_ops);
2365 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2366 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
2367 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
2368 device_queue_manager_init_v9(&dqm->asic_ops);
2369 else {
2370 WARN(1, "Unexpected ASIC family %u",
2371 dev->adev->asic_type);
2372 goto out_free;
2373 }
2374 }
2375
2376 if (init_mqd_managers(dqm))
2377 goto out_free;
2378
2379 if (allocate_hiq_sdma_mqd(dqm)) {
2380 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
2381 goto out_free;
2382 }
2383
2384 if (!dqm->ops.initialize(dqm))
2385 return dqm;
2386
2387 out_free:
2388 kfree(dqm);
2389 return NULL;
2390 }
2391
2392 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
2393 struct kfd_mem_obj *mqd)
2394 {
2395 WARN(!mqd, "No hiq sdma mqd trunk to free");
2396
2397 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2398 }
2399
2400 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2401 {
2402 dqm->ops.uninitialize(dqm);
2403 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2404 kfree(dqm);
2405 }
2406
2407 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2408 {
2409 struct kfd_process_device *pdd;
2410 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2411 int ret = 0;
2412
2413 if (!p)
2414 return -EINVAL;
2415 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2416 pdd = kfd_get_process_device_data(dqm->dev, p);
2417 if (pdd)
2418 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2419 kfd_unref_process(p);
2420
2421 return ret;
2422 }
2423
2424 static void kfd_process_hw_exception(struct work_struct *work)
2425 {
2426 struct device_queue_manager *dqm = container_of(work,
2427 struct device_queue_manager, hw_exception_work);
2428 amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2429 }
2430
2431 #if defined(CONFIG_DEBUG_FS)
2432
2433 static void seq_reg_dump(struct seq_file *m,
2434 uint32_t (*dump)[2], uint32_t n_regs)
2435 {
2436 uint32_t i, count;
2437
2438 for (i = 0, count = 0; i < n_regs; i++) {
2439 if (count == 0 ||
2440 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2441 seq_printf(m, "%s %08x: %08x",
2442 i ? "\n" : "",
2443 dump[i][0], dump[i][1]);
2444 count = 7;
2445 } else {
2446 seq_printf(m, " %08x", dump[i][1]);
2447 count--;
2448 }
2449 }
2450
2451 seq_puts(m, "\n");
2452 }
2453
2454 int dqm_debugfs_hqds(struct seq_file *m, void *data)
2455 {
2456 struct device_queue_manager *dqm = data;
2457 uint32_t (*dump)[2], n_regs;
2458 int pipe, queue;
2459 int r = 0;
2460
2461 if (!dqm->sched_running) {
2462 seq_puts(m, " Device is stopped\n");
2463 return 0;
2464 }
2465
2466 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2467 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2468 &dump, &n_regs);
2469 if (!r) {
2470 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
2471 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2472 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2473 KFD_CIK_HIQ_QUEUE);
2474 seq_reg_dump(m, dump, n_regs);
2475
2476 kfree(dump);
2477 }
2478
2479 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2480 int pipe_offset = pipe * get_queues_per_pipe(dqm);
2481
2482 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2483 if (!test_bit(pipe_offset + queue,
2484 dqm->dev->shared_resources.cp_queue_bitmap))
2485 continue;
2486
2487 r = dqm->dev->kfd2kgd->hqd_dump(
2488 dqm->dev->adev, pipe, queue, &dump, &n_regs);
2489 if (r)
2490 break;
2491
2492 seq_printf(m, " CP Pipe %d, Queue %d\n",
2493 pipe, queue);
2494 seq_reg_dump(m, dump, n_regs);
2495
2496 kfree(dump);
2497 }
2498 }
2499
2500 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2501 for (queue = 0;
2502 queue < dqm->dev->device_info.num_sdma_queues_per_engine;
2503 queue++) {
2504 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2505 dqm->dev->adev, pipe, queue, &dump, &n_regs);
2506 if (r)
2507 break;
2508
2509 seq_printf(m, " SDMA Engine %d, RLC %d\n",
2510 pipe, queue);
2511 seq_reg_dump(m, dump, n_regs);
2512
2513 kfree(dump);
2514 }
2515 }
2516
2517 return r;
2518 }
2519
2520 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2521 {
2522 int r = 0;
2523
2524 dqm_lock(dqm);
2525 r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2526 if (r) {
2527 dqm_unlock(dqm);
2528 return r;
2529 }
2530 dqm->active_runlist = true;
2531 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2532 dqm_unlock(dqm);
2533
2534 return r;
2535 }
2536
2537 #endif