Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2019 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 
0024 #include "amdgpu_mes.h"
0025 #include "amdgpu.h"
0026 #include "soc15_common.h"
0027 #include "amdgpu_mes_ctx.h"
0028 
0029 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
0030 #define AMDGPU_ONE_DOORBELL_SIZE 8
0031 
0032 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
0033 {
0034     return roundup(AMDGPU_ONE_DOORBELL_SIZE *
0035                AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
0036                PAGE_SIZE);
0037 }
0038 
0039 int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
0040                       unsigned int *doorbell_index)
0041 {
0042     int r = ida_simple_get(&adev->mes.doorbell_ida, 2,
0043                    adev->mes.max_doorbell_slices,
0044                    GFP_KERNEL);
0045     if (r > 0)
0046         *doorbell_index = r;
0047 
0048     return r;
0049 }
0050 
0051 void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
0052                       unsigned int doorbell_index)
0053 {
0054     if (doorbell_index)
0055         ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index);
0056 }
0057 
0058 unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
0059                     struct amdgpu_device *adev,
0060                     uint32_t doorbell_index,
0061                     unsigned int doorbell_id)
0062 {
0063     return ((doorbell_index *
0064         amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) +
0065         doorbell_id * 2);
0066 }
0067 
0068 static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev,
0069                      struct amdgpu_mes_process *process,
0070                      int ip_type, uint64_t *doorbell_index)
0071 {
0072     unsigned int offset, found;
0073 
0074     if (ip_type == AMDGPU_RING_TYPE_SDMA) {
0075         offset = adev->doorbell_index.sdma_engine[0];
0076         found = find_next_zero_bit(process->doorbell_bitmap,
0077                        AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
0078                        offset);
0079     } else {
0080         found = find_first_zero_bit(process->doorbell_bitmap,
0081                         AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS);
0082     }
0083 
0084     if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) {
0085         DRM_WARN("No doorbell available\n");
0086         return -ENOSPC;
0087     }
0088 
0089     set_bit(found, process->doorbell_bitmap);
0090 
0091     *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
0092                 process->doorbell_index, found);
0093 
0094     return 0;
0095 }
0096 
0097 static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev,
0098                        struct amdgpu_mes_process *process,
0099                        uint32_t doorbell_index)
0100 {
0101     unsigned int old, doorbell_id;
0102 
0103     doorbell_id = doorbell_index -
0104         (process->doorbell_index *
0105          amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32);
0106     doorbell_id /= 2;
0107 
0108     old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap);
0109     WARN_ON(!old);
0110 }
0111 
0112 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
0113 {
0114     size_t doorbell_start_offset;
0115     size_t doorbell_aperture_size;
0116     size_t doorbell_process_limit;
0117     size_t aggregated_doorbell_start;
0118     int i;
0119 
0120     aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
0121     aggregated_doorbell_start =
0122         roundup(aggregated_doorbell_start, PAGE_SIZE);
0123 
0124     doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
0125     doorbell_start_offset =
0126         roundup(doorbell_start_offset,
0127             amdgpu_mes_doorbell_process_slice(adev));
0128 
0129     doorbell_aperture_size = adev->doorbell.size;
0130     doorbell_aperture_size =
0131             rounddown(doorbell_aperture_size,
0132                   amdgpu_mes_doorbell_process_slice(adev));
0133 
0134     if (doorbell_aperture_size > doorbell_start_offset)
0135         doorbell_process_limit =
0136             (doorbell_aperture_size - doorbell_start_offset) /
0137             amdgpu_mes_doorbell_process_slice(adev);
0138     else
0139         return -ENOSPC;
0140 
0141     adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
0142     adev->mes.max_doorbell_slices = doorbell_process_limit;
0143 
0144     /* allocate Qword range for aggregated doorbell */
0145     for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
0146         adev->mes.aggregated_doorbells[i] =
0147             aggregated_doorbell_start / sizeof(u32) + i * 2;
0148 
0149     DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
0150     return 0;
0151 }
0152 
0153 int amdgpu_mes_init(struct amdgpu_device *adev)
0154 {
0155     int i, r;
0156 
0157     adev->mes.adev = adev;
0158 
0159     idr_init(&adev->mes.pasid_idr);
0160     idr_init(&adev->mes.gang_id_idr);
0161     idr_init(&adev->mes.queue_id_idr);
0162     ida_init(&adev->mes.doorbell_ida);
0163     spin_lock_init(&adev->mes.queue_id_lock);
0164     spin_lock_init(&adev->mes.ring_lock);
0165     mutex_init(&adev->mes.mutex_hidden);
0166 
0167     adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
0168     adev->mes.vmid_mask_mmhub = 0xffffff00;
0169     adev->mes.vmid_mask_gfxhub = 0xffffff00;
0170 
0171     for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
0172         /* use only 1st MEC pipes */
0173         if (i >= 4)
0174             continue;
0175         adev->mes.compute_hqd_mask[i] = 0xc;
0176     }
0177 
0178     for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
0179         adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
0180 
0181     for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
0182         if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
0183             adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
0184         /* zero sdma_hqd_mask for non-existent engine */
0185         else if (adev->sdma.num_instances == 1)
0186             adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
0187         else
0188             adev->mes.sdma_hqd_mask[i] = 0xfc;
0189     }
0190 
0191     r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
0192     if (r) {
0193         dev_err(adev->dev,
0194             "(%d) ring trail_fence_offs wb alloc failed\n", r);
0195         goto error_ids;
0196     }
0197     adev->mes.sch_ctx_gpu_addr =
0198         adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
0199     adev->mes.sch_ctx_ptr =
0200         (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
0201 
0202     r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
0203     if (r) {
0204         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
0205         dev_err(adev->dev,
0206             "(%d) query_status_fence_offs wb alloc failed\n", r);
0207         goto error_ids;
0208     }
0209     adev->mes.query_status_fence_gpu_addr =
0210         adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
0211     adev->mes.query_status_fence_ptr =
0212         (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
0213 
0214     r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
0215     if (r) {
0216         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
0217         amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
0218         dev_err(adev->dev,
0219             "(%d) read_val_offs alloc failed\n", r);
0220         goto error_ids;
0221     }
0222     adev->mes.read_val_gpu_addr =
0223         adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
0224     adev->mes.read_val_ptr =
0225         (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
0226 
0227     r = amdgpu_mes_doorbell_init(adev);
0228     if (r)
0229         goto error;
0230 
0231     return 0;
0232 
0233 error:
0234     amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
0235     amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
0236     amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
0237 error_ids:
0238     idr_destroy(&adev->mes.pasid_idr);
0239     idr_destroy(&adev->mes.gang_id_idr);
0240     idr_destroy(&adev->mes.queue_id_idr);
0241     ida_destroy(&adev->mes.doorbell_ida);
0242     mutex_destroy(&adev->mes.mutex_hidden);
0243     return r;
0244 }
0245 
0246 void amdgpu_mes_fini(struct amdgpu_device *adev)
0247 {
0248     amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
0249     amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
0250     amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
0251 
0252     idr_destroy(&adev->mes.pasid_idr);
0253     idr_destroy(&adev->mes.gang_id_idr);
0254     idr_destroy(&adev->mes.queue_id_idr);
0255     ida_destroy(&adev->mes.doorbell_ida);
0256     mutex_destroy(&adev->mes.mutex_hidden);
0257 }
0258 
0259 static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
0260 {
0261     amdgpu_bo_free_kernel(&q->mqd_obj,
0262                   &q->mqd_gpu_addr,
0263                   &q->mqd_cpu_ptr);
0264 }
0265 
0266 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
0267                   struct amdgpu_vm *vm)
0268 {
0269     struct amdgpu_mes_process *process;
0270     int r;
0271 
0272     /* allocate the mes process buffer */
0273     process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
0274     if (!process) {
0275         DRM_ERROR("no more memory to create mes process\n");
0276         return -ENOMEM;
0277     }
0278 
0279     process->doorbell_bitmap =
0280         kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
0281                      BITS_PER_BYTE), GFP_KERNEL);
0282     if (!process->doorbell_bitmap) {
0283         DRM_ERROR("failed to allocate doorbell bitmap\n");
0284         kfree(process);
0285         return -ENOMEM;
0286     }
0287 
0288     /* allocate the process context bo and map it */
0289     r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
0290                     AMDGPU_GEM_DOMAIN_GTT,
0291                     &process->proc_ctx_bo,
0292                     &process->proc_ctx_gpu_addr,
0293                     &process->proc_ctx_cpu_ptr);
0294     if (r) {
0295         DRM_ERROR("failed to allocate process context bo\n");
0296         goto clean_up_memory;
0297     }
0298     memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
0299 
0300     /*
0301      * Avoid taking any other locks under MES lock to avoid circular
0302      * lock dependencies.
0303      */
0304     amdgpu_mes_lock(&adev->mes);
0305 
0306     /* add the mes process to idr list */
0307     r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
0308               GFP_KERNEL);
0309     if (r < 0) {
0310         DRM_ERROR("failed to lock pasid=%d\n", pasid);
0311         goto clean_up_ctx;
0312     }
0313 
0314     /* allocate the starting doorbell index of the process */
0315     r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
0316     if (r < 0) {
0317         DRM_ERROR("failed to allocate doorbell for process\n");
0318         goto clean_up_pasid;
0319     }
0320 
0321     DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
0322 
0323     INIT_LIST_HEAD(&process->gang_list);
0324     process->vm = vm;
0325     process->pasid = pasid;
0326     process->process_quantum = adev->mes.default_process_quantum;
0327     process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
0328 
0329     amdgpu_mes_unlock(&adev->mes);
0330     return 0;
0331 
0332 clean_up_pasid:
0333     idr_remove(&adev->mes.pasid_idr, pasid);
0334     amdgpu_mes_unlock(&adev->mes);
0335 clean_up_ctx:
0336     amdgpu_bo_free_kernel(&process->proc_ctx_bo,
0337                   &process->proc_ctx_gpu_addr,
0338                   &process->proc_ctx_cpu_ptr);
0339 clean_up_memory:
0340     kfree(process->doorbell_bitmap);
0341     kfree(process);
0342     return r;
0343 }
0344 
0345 void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
0346 {
0347     struct amdgpu_mes_process *process;
0348     struct amdgpu_mes_gang *gang, *tmp1;
0349     struct amdgpu_mes_queue *queue, *tmp2;
0350     struct mes_remove_queue_input queue_input;
0351     unsigned long flags;
0352     int r;
0353 
0354     /*
0355      * Avoid taking any other locks under MES lock to avoid circular
0356      * lock dependencies.
0357      */
0358     amdgpu_mes_lock(&adev->mes);
0359 
0360     process = idr_find(&adev->mes.pasid_idr, pasid);
0361     if (!process) {
0362         DRM_WARN("pasid %d doesn't exist\n", pasid);
0363         amdgpu_mes_unlock(&adev->mes);
0364         return;
0365     }
0366 
0367     /* Remove all queues from hardware */
0368     list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
0369         list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
0370             spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
0371             idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
0372             spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0373 
0374             queue_input.doorbell_offset = queue->doorbell_off;
0375             queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
0376 
0377             r = adev->mes.funcs->remove_hw_queue(&adev->mes,
0378                                  &queue_input);
0379             if (r)
0380                 DRM_WARN("failed to remove hardware queue\n");
0381         }
0382 
0383         idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
0384     }
0385 
0386     amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
0387     idr_remove(&adev->mes.pasid_idr, pasid);
0388     amdgpu_mes_unlock(&adev->mes);
0389 
0390     /* free all memory allocated by the process */
0391     list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
0392         /* free all queues in the gang */
0393         list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
0394             amdgpu_mes_queue_free_mqd(queue);
0395             list_del(&queue->list);
0396             kfree(queue);
0397         }
0398         amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
0399                       &gang->gang_ctx_gpu_addr,
0400                       &gang->gang_ctx_cpu_ptr);
0401         list_del(&gang->list);
0402         kfree(gang);
0403 
0404     }
0405     amdgpu_bo_free_kernel(&process->proc_ctx_bo,
0406                   &process->proc_ctx_gpu_addr,
0407                   &process->proc_ctx_cpu_ptr);
0408     kfree(process->doorbell_bitmap);
0409     kfree(process);
0410 }
0411 
0412 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
0413             struct amdgpu_mes_gang_properties *gprops,
0414             int *gang_id)
0415 {
0416     struct amdgpu_mes_process *process;
0417     struct amdgpu_mes_gang *gang;
0418     int r;
0419 
0420     /* allocate the mes gang buffer */
0421     gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
0422     if (!gang) {
0423         return -ENOMEM;
0424     }
0425 
0426     /* allocate the gang context bo and map it to cpu space */
0427     r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
0428                     AMDGPU_GEM_DOMAIN_GTT,
0429                     &gang->gang_ctx_bo,
0430                     &gang->gang_ctx_gpu_addr,
0431                     &gang->gang_ctx_cpu_ptr);
0432     if (r) {
0433         DRM_ERROR("failed to allocate process context bo\n");
0434         goto clean_up_mem;
0435     }
0436     memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
0437 
0438     /*
0439      * Avoid taking any other locks under MES lock to avoid circular
0440      * lock dependencies.
0441      */
0442     amdgpu_mes_lock(&adev->mes);
0443 
0444     process = idr_find(&adev->mes.pasid_idr, pasid);
0445     if (!process) {
0446         DRM_ERROR("pasid %d doesn't exist\n", pasid);
0447         r = -EINVAL;
0448         goto clean_up_ctx;
0449     }
0450 
0451     /* add the mes gang to idr list */
0452     r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
0453               GFP_KERNEL);
0454     if (r < 0) {
0455         DRM_ERROR("failed to allocate idr for gang\n");
0456         goto clean_up_ctx;
0457     }
0458 
0459     gang->gang_id = r;
0460     *gang_id = r;
0461 
0462     INIT_LIST_HEAD(&gang->queue_list);
0463     gang->process = process;
0464     gang->priority = gprops->priority;
0465     gang->gang_quantum = gprops->gang_quantum ?
0466         gprops->gang_quantum : adev->mes.default_gang_quantum;
0467     gang->global_priority_level = gprops->global_priority_level;
0468     gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
0469     list_add_tail(&gang->list, &process->gang_list);
0470 
0471     amdgpu_mes_unlock(&adev->mes);
0472     return 0;
0473 
0474 clean_up_ctx:
0475     amdgpu_mes_unlock(&adev->mes);
0476     amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
0477                   &gang->gang_ctx_gpu_addr,
0478                   &gang->gang_ctx_cpu_ptr);
0479 clean_up_mem:
0480     kfree(gang);
0481     return r;
0482 }
0483 
0484 int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
0485 {
0486     struct amdgpu_mes_gang *gang;
0487 
0488     /*
0489      * Avoid taking any other locks under MES lock to avoid circular
0490      * lock dependencies.
0491      */
0492     amdgpu_mes_lock(&adev->mes);
0493 
0494     gang = idr_find(&adev->mes.gang_id_idr, gang_id);
0495     if (!gang) {
0496         DRM_ERROR("gang id %d doesn't exist\n", gang_id);
0497         amdgpu_mes_unlock(&adev->mes);
0498         return -EINVAL;
0499     }
0500 
0501     if (!list_empty(&gang->queue_list)) {
0502         DRM_ERROR("queue list is not empty\n");
0503         amdgpu_mes_unlock(&adev->mes);
0504         return -EBUSY;
0505     }
0506 
0507     idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
0508     list_del(&gang->list);
0509     amdgpu_mes_unlock(&adev->mes);
0510 
0511     amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
0512                   &gang->gang_ctx_gpu_addr,
0513                   &gang->gang_ctx_cpu_ptr);
0514 
0515     kfree(gang);
0516 
0517     return 0;
0518 }
0519 
0520 int amdgpu_mes_suspend(struct amdgpu_device *adev)
0521 {
0522     struct idr *idp;
0523     struct amdgpu_mes_process *process;
0524     struct amdgpu_mes_gang *gang;
0525     struct mes_suspend_gang_input input;
0526     int r, pasid;
0527 
0528     /*
0529      * Avoid taking any other locks under MES lock to avoid circular
0530      * lock dependencies.
0531      */
0532     amdgpu_mes_lock(&adev->mes);
0533 
0534     idp = &adev->mes.pasid_idr;
0535 
0536     idr_for_each_entry(idp, process, pasid) {
0537         list_for_each_entry(gang, &process->gang_list, list) {
0538             r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
0539             if (r)
0540                 DRM_ERROR("failed to suspend pasid %d gangid %d",
0541                      pasid, gang->gang_id);
0542         }
0543     }
0544 
0545     amdgpu_mes_unlock(&adev->mes);
0546     return 0;
0547 }
0548 
0549 int amdgpu_mes_resume(struct amdgpu_device *adev)
0550 {
0551     struct idr *idp;
0552     struct amdgpu_mes_process *process;
0553     struct amdgpu_mes_gang *gang;
0554     struct mes_resume_gang_input input;
0555     int r, pasid;
0556 
0557     /*
0558      * Avoid taking any other locks under MES lock to avoid circular
0559      * lock dependencies.
0560      */
0561     amdgpu_mes_lock(&adev->mes);
0562 
0563     idp = &adev->mes.pasid_idr;
0564 
0565     idr_for_each_entry(idp, process, pasid) {
0566         list_for_each_entry(gang, &process->gang_list, list) {
0567             r = adev->mes.funcs->resume_gang(&adev->mes, &input);
0568             if (r)
0569                 DRM_ERROR("failed to resume pasid %d gangid %d",
0570                      pasid, gang->gang_id);
0571         }
0572     }
0573 
0574     amdgpu_mes_unlock(&adev->mes);
0575     return 0;
0576 }
0577 
0578 static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
0579                      struct amdgpu_mes_queue *q,
0580                      struct amdgpu_mes_queue_properties *p)
0581 {
0582     struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
0583     u32 mqd_size = mqd_mgr->mqd_size;
0584     int r;
0585 
0586     r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
0587                     AMDGPU_GEM_DOMAIN_GTT,
0588                     &q->mqd_obj,
0589                     &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
0590     if (r) {
0591         dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
0592         return r;
0593     }
0594     memset(q->mqd_cpu_ptr, 0, mqd_size);
0595 
0596     r = amdgpu_bo_reserve(q->mqd_obj, false);
0597     if (unlikely(r != 0))
0598         goto clean_up;
0599 
0600     return 0;
0601 
0602 clean_up:
0603     amdgpu_bo_free_kernel(&q->mqd_obj,
0604                   &q->mqd_gpu_addr,
0605                   &q->mqd_cpu_ptr);
0606     return r;
0607 }
0608 
0609 static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
0610                      struct amdgpu_mes_queue *q,
0611                      struct amdgpu_mes_queue_properties *p)
0612 {
0613     struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
0614     struct amdgpu_mqd_prop mqd_prop = {0};
0615 
0616     mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
0617     mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
0618     mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
0619     mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
0620     mqd_prop.queue_size = p->queue_size;
0621     mqd_prop.use_doorbell = true;
0622     mqd_prop.doorbell_index = p->doorbell_off;
0623     mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
0624     mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
0625     mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
0626     mqd_prop.hqd_active = false;
0627 
0628     mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
0629 
0630     amdgpu_bo_unreserve(q->mqd_obj);
0631 }
0632 
0633 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
0634                 struct amdgpu_mes_queue_properties *qprops,
0635                 int *queue_id)
0636 {
0637     struct amdgpu_mes_queue *queue;
0638     struct amdgpu_mes_gang *gang;
0639     struct mes_add_queue_input queue_input;
0640     unsigned long flags;
0641     int r;
0642 
0643     /* allocate the mes queue buffer */
0644     queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
0645     if (!queue) {
0646         DRM_ERROR("Failed to allocate memory for queue\n");
0647         return -ENOMEM;
0648     }
0649 
0650     /* Allocate the queue mqd */
0651     r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
0652     if (r)
0653         goto clean_up_memory;
0654 
0655     /*
0656      * Avoid taking any other locks under MES lock to avoid circular
0657      * lock dependencies.
0658      */
0659     amdgpu_mes_lock(&adev->mes);
0660 
0661     gang = idr_find(&adev->mes.gang_id_idr, gang_id);
0662     if (!gang) {
0663         DRM_ERROR("gang id %d doesn't exist\n", gang_id);
0664         r = -EINVAL;
0665         goto clean_up_mqd;
0666     }
0667 
0668     /* add the mes gang to idr list */
0669     spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
0670     r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
0671               GFP_ATOMIC);
0672     if (r < 0) {
0673         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0674         goto clean_up_mqd;
0675     }
0676     spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0677     *queue_id = queue->queue_id = r;
0678 
0679     /* allocate a doorbell index for the queue */
0680     r = amdgpu_mes_queue_doorbell_get(adev, gang->process,
0681                       qprops->queue_type,
0682                       &qprops->doorbell_off);
0683     if (r)
0684         goto clean_up_queue_id;
0685 
0686     /* initialize the queue mqd */
0687     amdgpu_mes_queue_init_mqd(adev, queue, qprops);
0688 
0689     /* add hw queue to mes */
0690     queue_input.process_id = gang->process->pasid;
0691 
0692     queue_input.page_table_base_addr =
0693         adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
0694         adev->gmc.vram_start;
0695 
0696     queue_input.process_va_start = 0;
0697     queue_input.process_va_end =
0698         (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
0699     queue_input.process_quantum = gang->process->process_quantum;
0700     queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
0701     queue_input.gang_quantum = gang->gang_quantum;
0702     queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
0703     queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
0704     queue_input.gang_global_priority_level = gang->global_priority_level;
0705     queue_input.doorbell_offset = qprops->doorbell_off;
0706     queue_input.mqd_addr = queue->mqd_gpu_addr;
0707     queue_input.wptr_addr = qprops->wptr_gpu_addr;
0708     queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
0709     queue_input.queue_type = qprops->queue_type;
0710     queue_input.paging = qprops->paging;
0711     queue_input.is_kfd_process = 0;
0712 
0713     r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
0714     if (r) {
0715         DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
0716               qprops->doorbell_off);
0717         goto clean_up_doorbell;
0718     }
0719 
0720     DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
0721           "queue type=%d, doorbell=0x%llx\n",
0722           gang->process->pasid, gang_id, qprops->queue_type,
0723           qprops->doorbell_off);
0724 
0725     queue->ring = qprops->ring;
0726     queue->doorbell_off = qprops->doorbell_off;
0727     queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
0728     queue->queue_type = qprops->queue_type;
0729     queue->paging = qprops->paging;
0730     queue->gang = gang;
0731     queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
0732     list_add_tail(&queue->list, &gang->queue_list);
0733 
0734     amdgpu_mes_unlock(&adev->mes);
0735     return 0;
0736 
0737 clean_up_doorbell:
0738     amdgpu_mes_queue_doorbell_free(adev, gang->process,
0739                        qprops->doorbell_off);
0740 clean_up_queue_id:
0741     spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
0742     idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
0743     spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0744 clean_up_mqd:
0745     amdgpu_mes_unlock(&adev->mes);
0746     amdgpu_mes_queue_free_mqd(queue);
0747 clean_up_memory:
0748     kfree(queue);
0749     return r;
0750 }
0751 
0752 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
0753 {
0754     unsigned long flags;
0755     struct amdgpu_mes_queue *queue;
0756     struct amdgpu_mes_gang *gang;
0757     struct mes_remove_queue_input queue_input;
0758     int r;
0759 
0760     /*
0761      * Avoid taking any other locks under MES lock to avoid circular
0762      * lock dependencies.
0763      */
0764     amdgpu_mes_lock(&adev->mes);
0765 
0766     /* remove the mes gang from idr list */
0767     spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
0768 
0769     queue = idr_find(&adev->mes.queue_id_idr, queue_id);
0770     if (!queue) {
0771         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0772         amdgpu_mes_unlock(&adev->mes);
0773         DRM_ERROR("queue id %d doesn't exist\n", queue_id);
0774         return -EINVAL;
0775     }
0776 
0777     idr_remove(&adev->mes.queue_id_idr, queue_id);
0778     spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
0779 
0780     DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
0781           queue->doorbell_off);
0782 
0783     gang = queue->gang;
0784     queue_input.doorbell_offset = queue->doorbell_off;
0785     queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
0786 
0787     r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
0788     if (r)
0789         DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
0790               queue_id);
0791 
0792     list_del(&queue->list);
0793     amdgpu_mes_queue_doorbell_free(adev, gang->process,
0794                        queue->doorbell_off);
0795     amdgpu_mes_unlock(&adev->mes);
0796 
0797     amdgpu_mes_queue_free_mqd(queue);
0798     kfree(queue);
0799     return 0;
0800 }
0801 
0802 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
0803                   struct amdgpu_ring *ring,
0804                   enum amdgpu_unmap_queues_action action,
0805                   u64 gpu_addr, u64 seq)
0806 {
0807     struct mes_unmap_legacy_queue_input queue_input;
0808     int r;
0809 
0810     queue_input.action = action;
0811     queue_input.queue_type = ring->funcs->type;
0812     queue_input.doorbell_offset = ring->doorbell_index;
0813     queue_input.pipe_id = ring->pipe;
0814     queue_input.queue_id = ring->queue;
0815     queue_input.trail_fence_addr = gpu_addr;
0816     queue_input.trail_fence_data = seq;
0817 
0818     r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
0819     if (r)
0820         DRM_ERROR("failed to unmap legacy queue\n");
0821 
0822     return r;
0823 }
0824 
0825 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
0826 {
0827     struct mes_misc_op_input op_input;
0828     int r, val = 0;
0829 
0830     op_input.op = MES_MISC_OP_READ_REG;
0831     op_input.read_reg.reg_offset = reg;
0832     op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
0833 
0834     if (!adev->mes.funcs->misc_op) {
0835         DRM_ERROR("mes rreg is not supported!\n");
0836         goto error;
0837     }
0838 
0839     r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
0840     if (r)
0841         DRM_ERROR("failed to read reg (0x%x)\n", reg);
0842     else
0843         val = *(adev->mes.read_val_ptr);
0844 
0845 error:
0846     return val;
0847 }
0848 
0849 int amdgpu_mes_wreg(struct amdgpu_device *adev,
0850             uint32_t reg, uint32_t val)
0851 {
0852     struct mes_misc_op_input op_input;
0853     int r;
0854 
0855     op_input.op = MES_MISC_OP_WRITE_REG;
0856     op_input.write_reg.reg_offset = reg;
0857     op_input.write_reg.reg_value = val;
0858 
0859     if (!adev->mes.funcs->misc_op) {
0860         DRM_ERROR("mes wreg is not supported!\n");
0861         r = -EINVAL;
0862         goto error;
0863     }
0864 
0865     r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
0866     if (r)
0867         DRM_ERROR("failed to write reg (0x%x)\n", reg);
0868 
0869 error:
0870     return r;
0871 }
0872 
0873 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
0874                   uint32_t reg0, uint32_t reg1,
0875                   uint32_t ref, uint32_t mask)
0876 {
0877     struct mes_misc_op_input op_input;
0878     int r;
0879 
0880     op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
0881     op_input.wrm_reg.reg0 = reg0;
0882     op_input.wrm_reg.reg1 = reg1;
0883     op_input.wrm_reg.ref = ref;
0884     op_input.wrm_reg.mask = mask;
0885 
0886     if (!adev->mes.funcs->misc_op) {
0887         DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
0888         r = -EINVAL;
0889         goto error;
0890     }
0891 
0892     r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
0893     if (r)
0894         DRM_ERROR("failed to reg_write_reg_wait\n");
0895 
0896 error:
0897     return r;
0898 }
0899 
0900 int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
0901             uint32_t val, uint32_t mask)
0902 {
0903     struct mes_misc_op_input op_input;
0904     int r;
0905 
0906     op_input.op = MES_MISC_OP_WRM_REG_WAIT;
0907     op_input.wrm_reg.reg0 = reg;
0908     op_input.wrm_reg.ref = val;
0909     op_input.wrm_reg.mask = mask;
0910 
0911     if (!adev->mes.funcs->misc_op) {
0912         DRM_ERROR("mes reg wait is not supported!\n");
0913         r = -EINVAL;
0914         goto error;
0915     }
0916 
0917     r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
0918     if (r)
0919         DRM_ERROR("failed to reg_write_reg_wait\n");
0920 
0921 error:
0922     return r;
0923 }
0924 
0925 static void
0926 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
0927                    struct amdgpu_ring *ring,
0928                    struct amdgpu_mes_queue_properties *props)
0929 {
0930     props->queue_type = ring->funcs->type;
0931     props->hqd_base_gpu_addr = ring->gpu_addr;
0932     props->rptr_gpu_addr = ring->rptr_gpu_addr;
0933     props->wptr_gpu_addr = ring->wptr_gpu_addr;
0934     props->wptr_mc_addr =
0935         ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
0936     props->queue_size = ring->ring_size;
0937     props->eop_gpu_addr = ring->eop_gpu_addr;
0938     props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
0939     props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
0940     props->paging = false;
0941     props->ring = ring;
0942 }
0943 
0944 #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)            \
0945 do {                                    \
0946        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)               \
0947         return offsetof(struct amdgpu_mes_ctx_meta_data,    \
0948                 _eng[ring->idx].slots[id_offs]);        \
0949        else if (id_offs == AMDGPU_MES_CTX_RING_OFFS)            \
0950         return offsetof(struct amdgpu_mes_ctx_meta_data,        \
0951                 _eng[ring->idx].ring);                  \
0952        else if (id_offs == AMDGPU_MES_CTX_IB_OFFS)          \
0953         return offsetof(struct amdgpu_mes_ctx_meta_data,        \
0954                 _eng[ring->idx].ib);                    \
0955        else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS)         \
0956         return offsetof(struct amdgpu_mes_ctx_meta_data,        \
0957                 _eng[ring->idx].padding);               \
0958 } while(0)
0959 
0960 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
0961 {
0962     switch (ring->funcs->type) {
0963     case AMDGPU_RING_TYPE_GFX:
0964         DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
0965         break;
0966     case AMDGPU_RING_TYPE_COMPUTE:
0967         DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
0968         break;
0969     case AMDGPU_RING_TYPE_SDMA:
0970         DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
0971         break;
0972     default:
0973         break;
0974     }
0975 
0976     WARN_ON(1);
0977     return -EINVAL;
0978 }
0979 
0980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
0981             int queue_type, int idx,
0982             struct amdgpu_mes_ctx_data *ctx_data,
0983             struct amdgpu_ring **out)
0984 {
0985     struct amdgpu_ring *ring;
0986     struct amdgpu_mes_gang *gang;
0987     struct amdgpu_mes_queue_properties qprops = {0};
0988     int r, queue_id, pasid;
0989 
0990     /*
0991      * Avoid taking any other locks under MES lock to avoid circular
0992      * lock dependencies.
0993      */
0994     amdgpu_mes_lock(&adev->mes);
0995     gang = idr_find(&adev->mes.gang_id_idr, gang_id);
0996     if (!gang) {
0997         DRM_ERROR("gang id %d doesn't exist\n", gang_id);
0998         amdgpu_mes_unlock(&adev->mes);
0999         return -EINVAL;
1000     }
1001     pasid = gang->process->pasid;
1002 
1003     ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1004     if (!ring) {
1005         amdgpu_mes_unlock(&adev->mes);
1006         return -ENOMEM;
1007     }
1008 
1009     ring->ring_obj = NULL;
1010     ring->use_doorbell = true;
1011     ring->is_mes_queue = true;
1012     ring->mes_ctx = ctx_data;
1013     ring->idx = idx;
1014     ring->no_scheduler = true;
1015 
1016     if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1017         int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1018                       compute[ring->idx].mec_hpd);
1019         ring->eop_gpu_addr =
1020             amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1021     }
1022 
1023     switch (queue_type) {
1024     case AMDGPU_RING_TYPE_GFX:
1025         ring->funcs = adev->gfx.gfx_ring[0].funcs;
1026         break;
1027     case AMDGPU_RING_TYPE_COMPUTE:
1028         ring->funcs = adev->gfx.compute_ring[0].funcs;
1029         break;
1030     case AMDGPU_RING_TYPE_SDMA:
1031         ring->funcs = adev->sdma.instance[0].ring.funcs;
1032         break;
1033     default:
1034         BUG();
1035     }
1036 
1037     r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1038                  AMDGPU_RING_PRIO_DEFAULT, NULL);
1039     if (r)
1040         goto clean_up_memory;
1041 
1042     amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1043 
1044     dma_fence_wait(gang->process->vm->last_update, false);
1045     dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1046     amdgpu_mes_unlock(&adev->mes);
1047 
1048     r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1049     if (r)
1050         goto clean_up_ring;
1051 
1052     ring->hw_queue_id = queue_id;
1053     ring->doorbell_index = qprops.doorbell_off;
1054 
1055     if (queue_type == AMDGPU_RING_TYPE_GFX)
1056         sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
1057     else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1058         sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1059             queue_id);
1060     else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1061         sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1062             queue_id);
1063     else
1064         BUG();
1065 
1066     *out = ring;
1067     return 0;
1068 
1069 clean_up_ring:
1070     amdgpu_ring_fini(ring);
1071 clean_up_memory:
1072     kfree(ring);
1073     amdgpu_mes_unlock(&adev->mes);
1074     return r;
1075 }
1076 
1077 void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
1078                 struct amdgpu_ring *ring)
1079 {
1080     if (!ring)
1081         return;
1082 
1083     amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
1084     amdgpu_ring_fini(ring);
1085     kfree(ring);
1086 }
1087 
1088 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
1089                            enum amdgpu_mes_priority_level prio)
1090 {
1091     return adev->mes.aggregated_doorbells[prio];
1092 }
1093 
1094 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
1095                    struct amdgpu_mes_ctx_data *ctx_data)
1096 {
1097     int r;
1098 
1099     r = amdgpu_bo_create_kernel(adev,
1100                 sizeof(struct amdgpu_mes_ctx_meta_data),
1101                 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1102                 &ctx_data->meta_data_obj,
1103                 &ctx_data->meta_data_mc_addr,
1104                 &ctx_data->meta_data_ptr);
1105     if (!ctx_data->meta_data_obj)
1106         return -ENOMEM;
1107 
1108     memset(ctx_data->meta_data_ptr, 0,
1109            sizeof(struct amdgpu_mes_ctx_meta_data));
1110 
1111     return 0;
1112 }
1113 
1114 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
1115 {
1116     if (ctx_data->meta_data_obj)
1117         amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
1118                       &ctx_data->meta_data_mc_addr,
1119                       &ctx_data->meta_data_ptr);
1120 }
1121 
1122 int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
1123                  struct amdgpu_vm *vm,
1124                  struct amdgpu_mes_ctx_data *ctx_data)
1125 {
1126     struct amdgpu_bo_va *bo_va;
1127     struct ww_acquire_ctx ticket;
1128     struct list_head list;
1129     struct amdgpu_bo_list_entry pd;
1130     struct ttm_validate_buffer csa_tv;
1131     struct amdgpu_sync sync;
1132     int r;
1133 
1134     amdgpu_sync_create(&sync);
1135     INIT_LIST_HEAD(&list);
1136     INIT_LIST_HEAD(&csa_tv.head);
1137 
1138     csa_tv.bo = &ctx_data->meta_data_obj->tbo;
1139     csa_tv.num_shared = 1;
1140 
1141     list_add(&csa_tv.head, &list);
1142     amdgpu_vm_get_pd_bo(vm, &list, &pd);
1143 
1144     r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
1145     if (r) {
1146         DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
1147         return r;
1148     }
1149 
1150     bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
1151     if (!bo_va) {
1152         ttm_eu_backoff_reservation(&ticket, &list);
1153         DRM_ERROR("failed to create bo_va for meta data BO\n");
1154         return -ENOMEM;
1155     }
1156 
1157     r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
1158                  sizeof(struct amdgpu_mes_ctx_meta_data),
1159                  AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
1160                  AMDGPU_PTE_EXECUTABLE);
1161 
1162     if (r) {
1163         DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
1164         goto error;
1165     }
1166 
1167     r = amdgpu_vm_bo_update(adev, bo_va, false);
1168     if (r) {
1169         DRM_ERROR("failed to do vm_bo_update on meta data\n");
1170         goto error;
1171     }
1172     amdgpu_sync_fence(&sync, bo_va->last_pt_update);
1173 
1174     r = amdgpu_vm_update_pdes(adev, vm, false);
1175     if (r) {
1176         DRM_ERROR("failed to update pdes on meta data\n");
1177         goto error;
1178     }
1179     amdgpu_sync_fence(&sync, vm->last_update);
1180 
1181     amdgpu_sync_wait(&sync, false);
1182     ttm_eu_backoff_reservation(&ticket, &list);
1183 
1184     amdgpu_sync_free(&sync);
1185     ctx_data->meta_data_va = bo_va;
1186     return 0;
1187 
1188 error:
1189     amdgpu_vm_bo_del(adev, bo_va);
1190     ttm_eu_backoff_reservation(&ticket, &list);
1191     amdgpu_sync_free(&sync);
1192     return r;
1193 }
1194 
1195 int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
1196                    struct amdgpu_mes_ctx_data *ctx_data)
1197 {
1198     struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
1199     struct amdgpu_bo *bo = ctx_data->meta_data_obj;
1200     struct amdgpu_vm *vm = bo_va->base.vm;
1201     struct amdgpu_bo_list_entry vm_pd;
1202     struct list_head list, duplicates;
1203     struct dma_fence *fence = NULL;
1204     struct ttm_validate_buffer tv;
1205     struct ww_acquire_ctx ticket;
1206     long r = 0;
1207 
1208     INIT_LIST_HEAD(&list);
1209     INIT_LIST_HEAD(&duplicates);
1210 
1211     tv.bo = &bo->tbo;
1212     tv.num_shared = 2;
1213     list_add(&tv.head, &list);
1214 
1215     amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
1216 
1217     r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
1218     if (r) {
1219         dev_err(adev->dev, "leaking bo va because "
1220             "we fail to reserve bo (%ld)\n", r);
1221         return r;
1222     }
1223 
1224     amdgpu_vm_bo_del(adev, bo_va);
1225     if (!amdgpu_vm_ready(vm))
1226         goto out_unlock;
1227 
1228     r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
1229     if (r)
1230         goto out_unlock;
1231     if (fence) {
1232         amdgpu_bo_fence(bo, fence, true);
1233         fence = NULL;
1234     }
1235 
1236     r = amdgpu_vm_clear_freed(adev, vm, &fence);
1237     if (r || !fence)
1238         goto out_unlock;
1239 
1240     dma_fence_wait(fence, false);
1241     amdgpu_bo_fence(bo, fence, true);
1242     dma_fence_put(fence);
1243 
1244 out_unlock:
1245     if (unlikely(r < 0))
1246         dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
1247     ttm_eu_backoff_reservation(&ticket, &list);
1248 
1249     return r;
1250 }
1251 
1252 static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
1253                       int pasid, int *gang_id,
1254                       int queue_type, int num_queue,
1255                       struct amdgpu_ring **added_rings,
1256                       struct amdgpu_mes_ctx_data *ctx_data)
1257 {
1258     struct amdgpu_ring *ring;
1259     struct amdgpu_mes_gang_properties gprops = {0};
1260     int r, j;
1261 
1262     /* create a gang for the process */
1263     gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1264     gprops.gang_quantum = adev->mes.default_gang_quantum;
1265     gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1266     gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1267     gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1268 
1269     r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
1270     if (r) {
1271         DRM_ERROR("failed to add gang\n");
1272         return r;
1273     }
1274 
1275     /* create queues for the gang */
1276     for (j = 0; j < num_queue; j++) {
1277         r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
1278                     ctx_data, &ring);
1279         if (r) {
1280             DRM_ERROR("failed to add ring\n");
1281             break;
1282         }
1283 
1284         DRM_INFO("ring %s was added\n", ring->name);
1285         added_rings[j] = ring;
1286     }
1287 
1288     return 0;
1289 }
1290 
1291 static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
1292 {
1293     struct amdgpu_ring *ring;
1294     int i, r;
1295 
1296     for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
1297         ring = added_rings[i];
1298         if (!ring)
1299             continue;
1300 
1301         r = amdgpu_ring_test_ring(ring);
1302         if (r) {
1303             DRM_DEV_ERROR(ring->adev->dev,
1304                       "ring %s test failed (%d)\n",
1305                       ring->name, r);
1306             return r;
1307         } else
1308             DRM_INFO("ring %s test pass\n", ring->name);
1309 
1310         r = amdgpu_ring_test_ib(ring, 1000 * 10);
1311         if (r) {
1312             DRM_DEV_ERROR(ring->adev->dev,
1313                       "ring %s ib test failed (%d)\n",
1314                       ring->name, r);
1315             return r;
1316         } else
1317             DRM_INFO("ring %s ib test pass\n", ring->name);
1318     }
1319 
1320     return 0;
1321 }
1322 
1323 int amdgpu_mes_self_test(struct amdgpu_device *adev)
1324 {
1325     struct amdgpu_vm *vm = NULL;
1326     struct amdgpu_mes_ctx_data ctx_data = {0};
1327     struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
1328     int gang_ids[3] = {0};
1329     int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX,
1330                    AMDGPU_MES_CTX_MAX_GFX_RINGS},
1331                  { AMDGPU_RING_TYPE_COMPUTE,
1332                    AMDGPU_MES_CTX_MAX_COMPUTE_RINGS},
1333                  { AMDGPU_RING_TYPE_SDMA,
1334                    AMDGPU_MES_CTX_MAX_SDMA_RINGS } };
1335     int i, r, pasid, k = 0;
1336 
1337     pasid = amdgpu_pasid_alloc(16);
1338     if (pasid < 0) {
1339         dev_warn(adev->dev, "No more PASIDs available!");
1340         pasid = 0;
1341     }
1342 
1343     vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1344     if (!vm) {
1345         r = -ENOMEM;
1346         goto error_pasid;
1347     }
1348 
1349     r = amdgpu_vm_init(adev, vm);
1350     if (r) {
1351         DRM_ERROR("failed to initialize vm\n");
1352         goto error_pasid;
1353     }
1354 
1355     r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
1356     if (r) {
1357         DRM_ERROR("failed to alloc ctx meta data\n");
1358         goto error_fini;
1359     }
1360 
1361     ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
1362     r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
1363     if (r) {
1364         DRM_ERROR("failed to map ctx meta data\n");
1365         goto error_vm;
1366     }
1367 
1368     r = amdgpu_mes_create_process(adev, pasid, vm);
1369     if (r) {
1370         DRM_ERROR("failed to create MES process\n");
1371         goto error_vm;
1372     }
1373 
1374     for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
1375         /* On GFX v10.3, fw hasn't supported to map sdma queue. */
1376         if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
1377             adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
1378             queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
1379             continue;
1380 
1381         r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
1382                                &gang_ids[i],
1383                                queue_types[i][0],
1384                                queue_types[i][1],
1385                                &added_rings[k],
1386                                &ctx_data);
1387         if (r)
1388             goto error_queues;
1389 
1390         k += queue_types[i][1];
1391     }
1392 
1393     /* start ring test and ib test for MES queues */
1394     amdgpu_mes_test_queues(added_rings);
1395 
1396 error_queues:
1397     /* remove all queues */
1398     for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
1399         if (!added_rings[i])
1400             continue;
1401         amdgpu_mes_remove_ring(adev, added_rings[i]);
1402     }
1403 
1404     for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
1405         if (!gang_ids[i])
1406             continue;
1407         amdgpu_mes_remove_gang(adev, gang_ids[i]);
1408     }
1409 
1410     amdgpu_mes_destroy_process(adev, pasid);
1411 
1412 error_vm:
1413     amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
1414 
1415 error_fini:
1416     amdgpu_vm_fini(adev, vm);
1417 
1418 error_pasid:
1419     if (pasid)
1420         amdgpu_pasid_free(pasid);
1421 
1422     amdgpu_mes_ctx_free_meta_data(&ctx_data);
1423     kfree(vm);
1424     return 0;
1425 }