0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <linux/dma-buf.h>
0023 #include <linux/list.h>
0024 #include <linux/pagemap.h>
0025 #include <linux/sched/mm.h>
0026 #include <linux/sched/task.h>
0027
0028 #include "amdgpu_object.h"
0029 #include "amdgpu_gem.h"
0030 #include "amdgpu_vm.h"
0031 #include "amdgpu_amdkfd.h"
0032 #include "amdgpu_dma_buf.h"
0033 #include <uapi/linux/kfd_ioctl.h>
0034 #include "amdgpu_xgmi.h"
0035 #include "kfd_smi_events.h"
0036
0037
0038
0039
0040 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
0041
0042
0043
0044
0045
0046 #define VRAM_AVAILABLITY_ALIGN (1 << 21)
0047
0048
0049 static struct {
0050 uint64_t max_system_mem_limit;
0051 uint64_t max_ttm_mem_limit;
0052 int64_t system_mem_used;
0053 int64_t ttm_mem_used;
0054 spinlock_t mem_limit_lock;
0055 } kfd_mem_limit;
0056
0057 static const char * const domain_bit_to_string[] = {
0058 "CPU",
0059 "GTT",
0060 "VRAM",
0061 "GDS",
0062 "GWS",
0063 "OA"
0064 };
0065
0066 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
0067
0068 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
0069
0070 static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
0071 struct kgd_mem *mem)
0072 {
0073 struct kfd_mem_attachment *entry;
0074
0075 list_for_each_entry(entry, &mem->attachments, list)
0076 if (entry->bo_va->base.vm == avm)
0077 return true;
0078
0079 return false;
0080 }
0081
0082
0083
0084
0085
0086 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
0087 {
0088 struct sysinfo si;
0089 uint64_t mem;
0090
0091 si_meminfo(&si);
0092 mem = si.freeram - si.freehigh;
0093 mem *= si.mem_unit;
0094
0095 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
0096 kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
0097 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
0098 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
0099 (kfd_mem_limit.max_system_mem_limit >> 20),
0100 (kfd_mem_limit.max_ttm_mem_limit >> 20));
0101 }
0102
0103 void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
0104 {
0105 kfd_mem_limit.system_mem_used += size;
0106 }
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119 #define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
0133 uint64_t size, u32 alloc_flag)
0134 {
0135 uint64_t reserved_for_pt =
0136 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
0137 size_t system_mem_needed, ttm_mem_needed, vram_needed;
0138 int ret = 0;
0139
0140 system_mem_needed = 0;
0141 ttm_mem_needed = 0;
0142 vram_needed = 0;
0143 if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
0144 system_mem_needed = size;
0145 ttm_mem_needed = size;
0146 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
0147
0148
0149
0150
0151
0152 vram_needed = size;
0153 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
0154 system_mem_needed = size;
0155 } else if (!(alloc_flag &
0156 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
0157 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
0158 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
0159 return -ENOMEM;
0160 }
0161
0162 spin_lock(&kfd_mem_limit.mem_limit_lock);
0163
0164 if (kfd_mem_limit.system_mem_used + system_mem_needed >
0165 kfd_mem_limit.max_system_mem_limit)
0166 pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
0167
0168 if ((kfd_mem_limit.system_mem_used + system_mem_needed >
0169 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
0170 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
0171 kfd_mem_limit.max_ttm_mem_limit) ||
0172 (adev && adev->kfd.vram_used + vram_needed >
0173 adev->gmc.real_vram_size -
0174 atomic64_read(&adev->vram_pin_size) -
0175 reserved_for_pt)) {
0176 ret = -ENOMEM;
0177 goto release;
0178 }
0179
0180
0181
0182
0183 WARN_ONCE(vram_needed && !adev,
0184 "adev reference can't be null when vram is used");
0185 if (adev) {
0186 adev->kfd.vram_used += vram_needed;
0187 adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
0188 }
0189 kfd_mem_limit.system_mem_used += system_mem_needed;
0190 kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
0191
0192 release:
0193 spin_unlock(&kfd_mem_limit.mem_limit_lock);
0194 return ret;
0195 }
0196
0197 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
0198 uint64_t size, u32 alloc_flag)
0199 {
0200 spin_lock(&kfd_mem_limit.mem_limit_lock);
0201
0202 if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
0203 kfd_mem_limit.system_mem_used -= size;
0204 kfd_mem_limit.ttm_mem_used -= size;
0205 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
0206 WARN_ONCE(!adev,
0207 "adev reference can't be null when alloc mem flags vram is set");
0208 if (adev) {
0209 adev->kfd.vram_used -= size;
0210 adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
0211 }
0212 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
0213 kfd_mem_limit.system_mem_used -= size;
0214 } else if (!(alloc_flag &
0215 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
0216 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
0217 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
0218 goto release;
0219 }
0220 WARN_ONCE(adev && adev->kfd.vram_used < 0,
0221 "KFD VRAM memory accounting unbalanced");
0222 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
0223 "KFD TTM memory accounting unbalanced");
0224 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
0225 "KFD system memory accounting unbalanced");
0226
0227 release:
0228 spin_unlock(&kfd_mem_limit.mem_limit_lock);
0229 }
0230
0231 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
0232 {
0233 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
0234 u32 alloc_flags = bo->kfd_bo->alloc_flags;
0235 u64 size = amdgpu_bo_size(bo);
0236
0237 amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
0238
0239 kfree(bo->kfd_bo);
0240 }
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250 static int
0251 create_dmamap_sg_bo(struct amdgpu_device *adev,
0252 struct kgd_mem *mem, struct amdgpu_bo **bo_out)
0253 {
0254 struct drm_gem_object *gem_obj;
0255 int ret, align;
0256
0257 ret = amdgpu_bo_reserve(mem->bo, false);
0258 if (ret)
0259 return ret;
0260
0261 align = 1;
0262 ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
0263 AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
0264 ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
0265
0266 amdgpu_bo_unreserve(mem->bo);
0267
0268 if (ret) {
0269 pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
0270 return -EINVAL;
0271 }
0272
0273 *bo_out = gem_to_amdgpu_bo(gem_obj);
0274 (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
0275 return ret;
0276 }
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
0288 struct amdgpu_amdkfd_fence *ef)
0289 {
0290 struct dma_fence *replacement;
0291
0292 if (!ef)
0293 return -EINVAL;
0294
0295
0296
0297
0298 replacement = dma_fence_get_stub();
0299 dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
0300 replacement, DMA_RESV_USAGE_READ);
0301 dma_fence_put(replacement);
0302 return 0;
0303 }
0304
0305 int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
0306 {
0307 struct amdgpu_bo *root = bo;
0308 struct amdgpu_vm_bo_base *vm_bo;
0309 struct amdgpu_vm *vm;
0310 struct amdkfd_process_info *info;
0311 struct amdgpu_amdkfd_fence *ef;
0312 int ret;
0313
0314
0315 while (root->parent)
0316 root = root->parent;
0317
0318 vm_bo = root->vm_bo;
0319 if (!vm_bo)
0320 return 0;
0321
0322 vm = vm_bo->vm;
0323 if (!vm)
0324 return 0;
0325
0326 info = vm->process_info;
0327 if (!info || !info->eviction_fence)
0328 return 0;
0329
0330 ef = container_of(dma_fence_get(&info->eviction_fence->base),
0331 struct amdgpu_amdkfd_fence, base);
0332
0333 BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
0334 ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
0335 dma_resv_unlock(bo->tbo.base.resv);
0336
0337 dma_fence_put(&ef->base);
0338 return ret;
0339 }
0340
0341 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
0342 bool wait)
0343 {
0344 struct ttm_operation_ctx ctx = { false, false };
0345 int ret;
0346
0347 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
0348 "Called with userptr BO"))
0349 return -EINVAL;
0350
0351 amdgpu_bo_placement_from_domain(bo, domain);
0352
0353 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0354 if (ret)
0355 goto validate_fail;
0356 if (wait)
0357 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
0358
0359 validate_fail:
0360 return ret;
0361 }
0362
0363 static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
0364 {
0365 return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
0366 }
0367
0368
0369
0370
0371
0372
0373
0374
0375 static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
0376 {
0377 struct amdgpu_bo *pd = vm->root.bo;
0378 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
0379 int ret;
0380
0381 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
0382 if (ret) {
0383 pr_err("failed to validate PT BOs\n");
0384 return ret;
0385 }
0386
0387 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
0388
0389 return 0;
0390 }
0391
0392 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
0393 {
0394 struct amdgpu_bo *pd = vm->root.bo;
0395 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
0396 int ret;
0397
0398 ret = amdgpu_vm_update_pdes(adev, vm, false);
0399 if (ret)
0400 return ret;
0401
0402 return amdgpu_sync_fence(sync, vm->last_update);
0403 }
0404
0405 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
0406 {
0407 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
0408 bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
0409 bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
0410 uint32_t mapping_flags;
0411 uint64_t pte_flags;
0412 bool snoop = false;
0413
0414 mapping_flags = AMDGPU_VM_PAGE_READABLE;
0415 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
0416 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
0417 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
0418 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
0419
0420 switch (adev->asic_type) {
0421 case CHIP_ARCTURUS:
0422 case CHIP_ALDEBARAN:
0423 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
0424 if (bo_adev == adev) {
0425 if (uncached)
0426 mapping_flags |= AMDGPU_VM_MTYPE_UC;
0427 else if (coherent)
0428 mapping_flags |= AMDGPU_VM_MTYPE_CC;
0429 else
0430 mapping_flags |= AMDGPU_VM_MTYPE_RW;
0431 if (adev->asic_type == CHIP_ALDEBARAN &&
0432 adev->gmc.xgmi.connected_to_cpu)
0433 snoop = true;
0434 } else {
0435 if (uncached || coherent)
0436 mapping_flags |= AMDGPU_VM_MTYPE_UC;
0437 else
0438 mapping_flags |= AMDGPU_VM_MTYPE_NC;
0439 if (amdgpu_xgmi_same_hive(adev, bo_adev))
0440 snoop = true;
0441 }
0442 } else {
0443 if (uncached || coherent)
0444 mapping_flags |= AMDGPU_VM_MTYPE_UC;
0445 else
0446 mapping_flags |= AMDGPU_VM_MTYPE_NC;
0447 snoop = true;
0448 }
0449 break;
0450 default:
0451 if (uncached || coherent)
0452 mapping_flags |= AMDGPU_VM_MTYPE_UC;
0453 else
0454 mapping_flags |= AMDGPU_VM_MTYPE_NC;
0455
0456 if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
0457 snoop = true;
0458 }
0459
0460 pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
0461 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
0462
0463 return pte_flags;
0464 }
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480 static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
0481 {
0482 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
0483
0484 if (!sg)
0485 return NULL;
0486 if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
0487 kfree(sg);
0488 return NULL;
0489 }
0490 sg_dma_address(sg->sgl) = addr;
0491 sg->sgl->length = size;
0492 #ifdef CONFIG_NEED_SG_DMA_LENGTH
0493 sg->sgl->dma_length = size;
0494 #endif
0495 return sg;
0496 }
0497
0498 static int
0499 kfd_mem_dmamap_userptr(struct kgd_mem *mem,
0500 struct kfd_mem_attachment *attachment)
0501 {
0502 enum dma_data_direction direction =
0503 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
0504 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
0505 struct ttm_operation_ctx ctx = {.interruptible = true};
0506 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0507 struct amdgpu_device *adev = attachment->adev;
0508 struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
0509 struct ttm_tt *ttm = bo->tbo.ttm;
0510 int ret;
0511
0512 ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
0513 if (unlikely(!ttm->sg))
0514 return -ENOMEM;
0515
0516 if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
0517 return -EINVAL;
0518
0519
0520 ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
0521 ttm->num_pages, 0,
0522 (u64)ttm->num_pages << PAGE_SHIFT,
0523 GFP_KERNEL);
0524 if (unlikely(ret))
0525 goto free_sg;
0526
0527 ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
0528 if (unlikely(ret))
0529 goto release_sg;
0530
0531 drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
0532 ttm->num_pages);
0533
0534 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
0535 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0536 if (ret)
0537 goto unmap_sg;
0538
0539 return 0;
0540
0541 unmap_sg:
0542 dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
0543 release_sg:
0544 pr_err("DMA map userptr failed: %d\n", ret);
0545 sg_free_table(ttm->sg);
0546 free_sg:
0547 kfree(ttm->sg);
0548 ttm->sg = NULL;
0549 return ret;
0550 }
0551
0552 static int
0553 kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
0554 {
0555 struct ttm_operation_ctx ctx = {.interruptible = true};
0556 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0557
0558 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
0559 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0560 }
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588 static int
0589 kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
0590 struct kfd_mem_attachment *attachment)
0591 {
0592 struct ttm_operation_ctx ctx = {.interruptible = true};
0593 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0594 struct amdgpu_device *adev = attachment->adev;
0595 struct ttm_tt *ttm = bo->tbo.ttm;
0596 enum dma_data_direction dir;
0597 dma_addr_t dma_addr;
0598 bool mmio;
0599 int ret;
0600
0601
0602 mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
0603 if (unlikely(ttm->sg)) {
0604 pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
0605 return -EINVAL;
0606 }
0607
0608 dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
0609 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
0610 dma_addr = mem->bo->tbo.sg->sgl->dma_address;
0611 pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
0612 pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
0613 dma_addr = dma_map_resource(adev->dev, dma_addr,
0614 mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
0615 ret = dma_mapping_error(adev->dev, dma_addr);
0616 if (unlikely(ret))
0617 return ret;
0618 pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
0619
0620 ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
0621 if (unlikely(!ttm->sg)) {
0622 ret = -ENOMEM;
0623 goto unmap_sg;
0624 }
0625
0626 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
0627 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0628 if (unlikely(ret))
0629 goto free_sg;
0630
0631 return ret;
0632
0633 free_sg:
0634 sg_free_table(ttm->sg);
0635 kfree(ttm->sg);
0636 ttm->sg = NULL;
0637 unmap_sg:
0638 dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
0639 dir, DMA_ATTR_SKIP_CPU_SYNC);
0640 return ret;
0641 }
0642
0643 static int
0644 kfd_mem_dmamap_attachment(struct kgd_mem *mem,
0645 struct kfd_mem_attachment *attachment)
0646 {
0647 switch (attachment->type) {
0648 case KFD_MEM_ATT_SHARED:
0649 return 0;
0650 case KFD_MEM_ATT_USERPTR:
0651 return kfd_mem_dmamap_userptr(mem, attachment);
0652 case KFD_MEM_ATT_DMABUF:
0653 return kfd_mem_dmamap_dmabuf(attachment);
0654 case KFD_MEM_ATT_SG:
0655 return kfd_mem_dmamap_sg_bo(mem, attachment);
0656 default:
0657 WARN_ON_ONCE(1);
0658 }
0659 return -EINVAL;
0660 }
0661
0662 static void
0663 kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
0664 struct kfd_mem_attachment *attachment)
0665 {
0666 enum dma_data_direction direction =
0667 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
0668 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
0669 struct ttm_operation_ctx ctx = {.interruptible = false};
0670 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0671 struct amdgpu_device *adev = attachment->adev;
0672 struct ttm_tt *ttm = bo->tbo.ttm;
0673
0674 if (unlikely(!ttm->sg))
0675 return;
0676
0677 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
0678 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0679
0680 dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
0681 sg_free_table(ttm->sg);
0682 kfree(ttm->sg);
0683 ttm->sg = NULL;
0684 }
0685
0686 static void
0687 kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
0688 {
0689 struct ttm_operation_ctx ctx = {.interruptible = true};
0690 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0691
0692 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
0693 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0694 }
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712 static void
0713 kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
0714 struct kfd_mem_attachment *attachment)
0715 {
0716 struct ttm_operation_ctx ctx = {.interruptible = true};
0717 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0718 struct amdgpu_device *adev = attachment->adev;
0719 struct ttm_tt *ttm = bo->tbo.ttm;
0720 enum dma_data_direction dir;
0721
0722 if (unlikely(!ttm->sg)) {
0723 pr_err("SG Table of BO is UNEXPECTEDLY NULL");
0724 return;
0725 }
0726
0727 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
0728 ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0729
0730 dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
0731 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
0732 dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
0733 ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
0734 sg_free_table(ttm->sg);
0735 kfree(ttm->sg);
0736 ttm->sg = NULL;
0737 bo->tbo.sg = NULL;
0738 }
0739
0740 static void
0741 kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
0742 struct kfd_mem_attachment *attachment)
0743 {
0744 switch (attachment->type) {
0745 case KFD_MEM_ATT_SHARED:
0746 break;
0747 case KFD_MEM_ATT_USERPTR:
0748 kfd_mem_dmaunmap_userptr(mem, attachment);
0749 break;
0750 case KFD_MEM_ATT_DMABUF:
0751 kfd_mem_dmaunmap_dmabuf(attachment);
0752 break;
0753 case KFD_MEM_ATT_SG:
0754 kfd_mem_dmaunmap_sg_bo(mem, attachment);
0755 break;
0756 default:
0757 WARN_ON_ONCE(1);
0758 }
0759 }
0760
0761 static int
0762 kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
0763 struct amdgpu_bo **bo)
0764 {
0765 struct drm_gem_object *gobj;
0766 int ret;
0767
0768 if (!mem->dmabuf) {
0769 mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
0770 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
0771 DRM_RDWR : 0);
0772 if (IS_ERR(mem->dmabuf)) {
0773 ret = PTR_ERR(mem->dmabuf);
0774 mem->dmabuf = NULL;
0775 return ret;
0776 }
0777 }
0778
0779 gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
0780 if (IS_ERR(gobj))
0781 return PTR_ERR(gobj);
0782
0783 *bo = gem_to_amdgpu_bo(gobj);
0784 (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
0785
0786 return 0;
0787 }
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
0803 struct amdgpu_vm *vm, bool is_aql)
0804 {
0805 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
0806 unsigned long bo_size = mem->bo->tbo.base.size;
0807 uint64_t va = mem->va;
0808 struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
0809 struct amdgpu_bo *bo[2] = {NULL, NULL};
0810 bool same_hive = false;
0811 int i, ret;
0812
0813 if (!va) {
0814 pr_err("Invalid VA when adding BO to VM\n");
0815 return -EINVAL;
0816 }
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826 if ((adev != bo_adev) &&
0827 ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
0828 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
0829 (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
0830 if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
0831 same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
0832 if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
0833 return -EINVAL;
0834 }
0835
0836 for (i = 0; i <= is_aql; i++) {
0837 attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
0838 if (unlikely(!attachment[i])) {
0839 ret = -ENOMEM;
0840 goto unwind;
0841 }
0842
0843 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
0844 va + bo_size, vm);
0845
0846 if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
0847 (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
0848 same_hive) {
0849
0850
0851
0852
0853 attachment[i]->type = KFD_MEM_ATT_SHARED;
0854 bo[i] = mem->bo;
0855 drm_gem_object_get(&bo[i]->tbo.base);
0856 } else if (i > 0) {
0857
0858 attachment[i]->type = KFD_MEM_ATT_SHARED;
0859 bo[i] = bo[0];
0860 drm_gem_object_get(&bo[i]->tbo.base);
0861 } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
0862
0863 attachment[i]->type = KFD_MEM_ATT_USERPTR;
0864 ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
0865 if (ret)
0866 goto unwind;
0867
0868 } else if (mem->bo->tbo.type == ttm_bo_type_sg) {
0869 WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
0870 mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
0871 "Handing invalid SG BO in ATTACH request");
0872 attachment[i]->type = KFD_MEM_ATT_SG;
0873 ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
0874 if (ret)
0875 goto unwind;
0876
0877 } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
0878 mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
0879 attachment[i]->type = KFD_MEM_ATT_DMABUF;
0880 ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
0881 if (ret)
0882 goto unwind;
0883 pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
0884 } else {
0885 WARN_ONCE(true, "Handling invalid ATTACH request");
0886 ret = -EINVAL;
0887 goto unwind;
0888 }
0889
0890
0891 ret = amdgpu_bo_reserve(bo[i], false);
0892 if (ret) {
0893 pr_debug("Unable to reserve BO during memory attach");
0894 goto unwind;
0895 }
0896 attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
0897 amdgpu_bo_unreserve(bo[i]);
0898 if (unlikely(!attachment[i]->bo_va)) {
0899 ret = -ENOMEM;
0900 pr_err("Failed to add BO object to VM. ret == %d\n",
0901 ret);
0902 goto unwind;
0903 }
0904 attachment[i]->va = va;
0905 attachment[i]->pte_flags = get_pte_flags(adev, mem);
0906 attachment[i]->adev = adev;
0907 list_add(&attachment[i]->list, &mem->attachments);
0908
0909 va += bo_size;
0910 }
0911
0912 return 0;
0913
0914 unwind:
0915 for (; i >= 0; i--) {
0916 if (!attachment[i])
0917 continue;
0918 if (attachment[i]->bo_va) {
0919 amdgpu_bo_reserve(bo[i], true);
0920 amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
0921 amdgpu_bo_unreserve(bo[i]);
0922 list_del(&attachment[i]->list);
0923 }
0924 if (bo[i])
0925 drm_gem_object_put(&bo[i]->tbo.base);
0926 kfree(attachment[i]);
0927 }
0928 return ret;
0929 }
0930
0931 static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
0932 {
0933 struct amdgpu_bo *bo = attachment->bo_va->base.bo;
0934
0935 pr_debug("\t remove VA 0x%llx in entry %p\n",
0936 attachment->va, attachment);
0937 amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
0938 drm_gem_object_put(&bo->tbo.base);
0939 list_del(&attachment->list);
0940 kfree(attachment);
0941 }
0942
0943 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
0944 struct amdkfd_process_info *process_info,
0945 bool userptr)
0946 {
0947 struct ttm_validate_buffer *entry = &mem->validate_list;
0948 struct amdgpu_bo *bo = mem->bo;
0949
0950 INIT_LIST_HEAD(&entry->head);
0951 entry->num_shared = 1;
0952 entry->bo = &bo->tbo;
0953 mutex_lock(&process_info->lock);
0954 if (userptr)
0955 list_add_tail(&entry->head, &process_info->userptr_valid_list);
0956 else
0957 list_add_tail(&entry->head, &process_info->kfd_bo_list);
0958 mutex_unlock(&process_info->lock);
0959 }
0960
0961 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
0962 struct amdkfd_process_info *process_info)
0963 {
0964 struct ttm_validate_buffer *bo_list_entry;
0965
0966 bo_list_entry = &mem->validate_list;
0967 mutex_lock(&process_info->lock);
0968 list_del(&bo_list_entry->head);
0969 mutex_unlock(&process_info->lock);
0970 }
0971
0972
0973
0974
0975
0976
0977
0978
0979
0980
0981
0982
0983
0984 static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
0985 bool criu_resume)
0986 {
0987 struct amdkfd_process_info *process_info = mem->process_info;
0988 struct amdgpu_bo *bo = mem->bo;
0989 struct ttm_operation_ctx ctx = { true, false };
0990 int ret = 0;
0991
0992 mutex_lock(&process_info->lock);
0993
0994 ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
0995 if (ret) {
0996 pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
0997 goto out;
0998 }
0999
1000 ret = amdgpu_mn_register(bo, user_addr);
1001 if (ret) {
1002 pr_err("%s: Failed to register MMU notifier: %d\n",
1003 __func__, ret);
1004 goto out;
1005 }
1006
1007 if (criu_resume) {
1008
1009
1010
1011
1012
1013
1014 atomic_inc(&mem->invalid);
1015 mutex_unlock(&process_info->lock);
1016 return 0;
1017 }
1018
1019 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
1020 if (ret) {
1021 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
1022 goto unregister_out;
1023 }
1024
1025 ret = amdgpu_bo_reserve(bo, true);
1026 if (ret) {
1027 pr_err("%s: Failed to reserve BO\n", __func__);
1028 goto release_out;
1029 }
1030 amdgpu_bo_placement_from_domain(bo, mem->domain);
1031 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1032 if (ret)
1033 pr_err("%s: failed to validate BO\n", __func__);
1034 amdgpu_bo_unreserve(bo);
1035
1036 release_out:
1037 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1038 unregister_out:
1039 if (ret)
1040 amdgpu_mn_unregister(bo);
1041 out:
1042 mutex_unlock(&process_info->lock);
1043 return ret;
1044 }
1045
1046
1047
1048
1049
1050
1051 struct bo_vm_reservation_context {
1052 struct amdgpu_bo_list_entry kfd_bo;
1053 unsigned int n_vms;
1054 struct amdgpu_bo_list_entry *vm_pd;
1055 struct ww_acquire_ctx ticket;
1056 struct list_head list, duplicates;
1057 struct amdgpu_sync *sync;
1058 bool reserved;
1059 };
1060
1061 enum bo_vm_match {
1062 BO_VM_NOT_MAPPED = 0,
1063 BO_VM_MAPPED,
1064 BO_VM_ALL,
1065 };
1066
1067
1068
1069
1070
1071
1072
1073 static int reserve_bo_and_vm(struct kgd_mem *mem,
1074 struct amdgpu_vm *vm,
1075 struct bo_vm_reservation_context *ctx)
1076 {
1077 struct amdgpu_bo *bo = mem->bo;
1078 int ret;
1079
1080 WARN_ON(!vm);
1081
1082 ctx->reserved = false;
1083 ctx->n_vms = 1;
1084 ctx->sync = &mem->sync;
1085
1086 INIT_LIST_HEAD(&ctx->list);
1087 INIT_LIST_HEAD(&ctx->duplicates);
1088
1089 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
1090 if (!ctx->vm_pd)
1091 return -ENOMEM;
1092
1093 ctx->kfd_bo.priority = 0;
1094 ctx->kfd_bo.tv.bo = &bo->tbo;
1095 ctx->kfd_bo.tv.num_shared = 1;
1096 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
1097
1098 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
1099
1100 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
1101 false, &ctx->duplicates);
1102 if (ret) {
1103 pr_err("Failed to reserve buffers in ttm.\n");
1104 kfree(ctx->vm_pd);
1105 ctx->vm_pd = NULL;
1106 return ret;
1107 }
1108
1109 ctx->reserved = true;
1110 return 0;
1111 }
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
1124 struct amdgpu_vm *vm, enum bo_vm_match map_type,
1125 struct bo_vm_reservation_context *ctx)
1126 {
1127 struct amdgpu_bo *bo = mem->bo;
1128 struct kfd_mem_attachment *entry;
1129 unsigned int i;
1130 int ret;
1131
1132 ctx->reserved = false;
1133 ctx->n_vms = 0;
1134 ctx->vm_pd = NULL;
1135 ctx->sync = &mem->sync;
1136
1137 INIT_LIST_HEAD(&ctx->list);
1138 INIT_LIST_HEAD(&ctx->duplicates);
1139
1140 list_for_each_entry(entry, &mem->attachments, list) {
1141 if ((vm && vm != entry->bo_va->base.vm) ||
1142 (entry->is_mapped != map_type
1143 && map_type != BO_VM_ALL))
1144 continue;
1145
1146 ctx->n_vms++;
1147 }
1148
1149 if (ctx->n_vms != 0) {
1150 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
1151 GFP_KERNEL);
1152 if (!ctx->vm_pd)
1153 return -ENOMEM;
1154 }
1155
1156 ctx->kfd_bo.priority = 0;
1157 ctx->kfd_bo.tv.bo = &bo->tbo;
1158 ctx->kfd_bo.tv.num_shared = 1;
1159 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
1160
1161 i = 0;
1162 list_for_each_entry(entry, &mem->attachments, list) {
1163 if ((vm && vm != entry->bo_va->base.vm) ||
1164 (entry->is_mapped != map_type
1165 && map_type != BO_VM_ALL))
1166 continue;
1167
1168 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
1169 &ctx->vm_pd[i]);
1170 i++;
1171 }
1172
1173 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
1174 false, &ctx->duplicates);
1175 if (ret) {
1176 pr_err("Failed to reserve buffers in ttm.\n");
1177 kfree(ctx->vm_pd);
1178 ctx->vm_pd = NULL;
1179 return ret;
1180 }
1181
1182 ctx->reserved = true;
1183 return 0;
1184 }
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
1197 bool wait, bool intr)
1198 {
1199 int ret = 0;
1200
1201 if (wait)
1202 ret = amdgpu_sync_wait(ctx->sync, intr);
1203
1204 if (ctx->reserved)
1205 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
1206 kfree(ctx->vm_pd);
1207
1208 ctx->sync = NULL;
1209
1210 ctx->reserved = false;
1211 ctx->vm_pd = NULL;
1212
1213 return ret;
1214 }
1215
1216 static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
1217 struct kfd_mem_attachment *entry,
1218 struct amdgpu_sync *sync)
1219 {
1220 struct amdgpu_bo_va *bo_va = entry->bo_va;
1221 struct amdgpu_device *adev = entry->adev;
1222 struct amdgpu_vm *vm = bo_va->base.vm;
1223
1224 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
1225
1226 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
1227
1228 amdgpu_sync_fence(sync, bo_va->last_pt_update);
1229
1230 kfd_mem_dmaunmap_attachment(mem, entry);
1231 }
1232
1233 static int update_gpuvm_pte(struct kgd_mem *mem,
1234 struct kfd_mem_attachment *entry,
1235 struct amdgpu_sync *sync)
1236 {
1237 struct amdgpu_bo_va *bo_va = entry->bo_va;
1238 struct amdgpu_device *adev = entry->adev;
1239 int ret;
1240
1241 ret = kfd_mem_dmamap_attachment(mem, entry);
1242 if (ret)
1243 return ret;
1244
1245
1246 ret = amdgpu_vm_bo_update(adev, bo_va, false);
1247 if (ret) {
1248 pr_err("amdgpu_vm_bo_update failed\n");
1249 return ret;
1250 }
1251
1252 return amdgpu_sync_fence(sync, bo_va->last_pt_update);
1253 }
1254
1255 static int map_bo_to_gpuvm(struct kgd_mem *mem,
1256 struct kfd_mem_attachment *entry,
1257 struct amdgpu_sync *sync,
1258 bool no_update_pte)
1259 {
1260 int ret;
1261
1262
1263 ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
1264 amdgpu_bo_size(entry->bo_va->base.bo),
1265 entry->pte_flags);
1266 if (ret) {
1267 pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
1268 entry->va, ret);
1269 return ret;
1270 }
1271
1272 if (no_update_pte)
1273 return 0;
1274
1275 ret = update_gpuvm_pte(mem, entry, sync);
1276 if (ret) {
1277 pr_err("update_gpuvm_pte() failed\n");
1278 goto update_gpuvm_pte_failed;
1279 }
1280
1281 return 0;
1282
1283 update_gpuvm_pte_failed:
1284 unmap_bo_from_gpuvm(mem, entry, sync);
1285 return ret;
1286 }
1287
1288 static int process_validate_vms(struct amdkfd_process_info *process_info)
1289 {
1290 struct amdgpu_vm *peer_vm;
1291 int ret;
1292
1293 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1294 vm_list_node) {
1295 ret = vm_validate_pt_pd_bos(peer_vm);
1296 if (ret)
1297 return ret;
1298 }
1299
1300 return 0;
1301 }
1302
1303 static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
1304 struct amdgpu_sync *sync)
1305 {
1306 struct amdgpu_vm *peer_vm;
1307 int ret;
1308
1309 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1310 vm_list_node) {
1311 struct amdgpu_bo *pd = peer_vm->root.bo;
1312
1313 ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
1314 AMDGPU_SYNC_NE_OWNER,
1315 AMDGPU_FENCE_OWNER_KFD);
1316 if (ret)
1317 return ret;
1318 }
1319
1320 return 0;
1321 }
1322
1323 static int process_update_pds(struct amdkfd_process_info *process_info,
1324 struct amdgpu_sync *sync)
1325 {
1326 struct amdgpu_vm *peer_vm;
1327 int ret;
1328
1329 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1330 vm_list_node) {
1331 ret = vm_update_pds(peer_vm, sync);
1332 if (ret)
1333 return ret;
1334 }
1335
1336 return 0;
1337 }
1338
1339 static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
1340 struct dma_fence **ef)
1341 {
1342 struct amdkfd_process_info *info = NULL;
1343 int ret;
1344
1345 if (!*process_info) {
1346 info = kzalloc(sizeof(*info), GFP_KERNEL);
1347 if (!info)
1348 return -ENOMEM;
1349
1350 mutex_init(&info->lock);
1351 INIT_LIST_HEAD(&info->vm_list_head);
1352 INIT_LIST_HEAD(&info->kfd_bo_list);
1353 INIT_LIST_HEAD(&info->userptr_valid_list);
1354 INIT_LIST_HEAD(&info->userptr_inval_list);
1355
1356 info->eviction_fence =
1357 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
1358 current->mm,
1359 NULL);
1360 if (!info->eviction_fence) {
1361 pr_err("Failed to create eviction fence\n");
1362 ret = -ENOMEM;
1363 goto create_evict_fence_fail;
1364 }
1365
1366 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
1367 atomic_set(&info->evicted_bos, 0);
1368 INIT_DELAYED_WORK(&info->restore_userptr_work,
1369 amdgpu_amdkfd_restore_userptr_worker);
1370
1371 *process_info = info;
1372 *ef = dma_fence_get(&info->eviction_fence->base);
1373 }
1374
1375 vm->process_info = *process_info;
1376
1377
1378 ret = amdgpu_bo_reserve(vm->root.bo, true);
1379 if (ret)
1380 goto reserve_pd_fail;
1381 ret = vm_validate_pt_pd_bos(vm);
1382 if (ret) {
1383 pr_err("validate_pt_pd_bos() failed\n");
1384 goto validate_pd_fail;
1385 }
1386 ret = amdgpu_bo_sync_wait(vm->root.bo,
1387 AMDGPU_FENCE_OWNER_KFD, false);
1388 if (ret)
1389 goto wait_pd_fail;
1390 ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
1391 if (ret)
1392 goto reserve_shared_fail;
1393 amdgpu_bo_fence(vm->root.bo,
1394 &vm->process_info->eviction_fence->base, true);
1395 amdgpu_bo_unreserve(vm->root.bo);
1396
1397
1398 mutex_lock(&vm->process_info->lock);
1399 list_add_tail(&vm->vm_list_node,
1400 &(vm->process_info->vm_list_head));
1401 vm->process_info->n_vms++;
1402 mutex_unlock(&vm->process_info->lock);
1403
1404 return 0;
1405
1406 reserve_shared_fail:
1407 wait_pd_fail:
1408 validate_pd_fail:
1409 amdgpu_bo_unreserve(vm->root.bo);
1410 reserve_pd_fail:
1411 vm->process_info = NULL;
1412 if (info) {
1413
1414 dma_fence_put(&info->eviction_fence->base);
1415 dma_fence_put(*ef);
1416 *ef = NULL;
1417 *process_info = NULL;
1418 put_pid(info->pid);
1419 create_evict_fence_fail:
1420 mutex_destroy(&info->lock);
1421 kfree(info);
1422 }
1423 return ret;
1424 }
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437 static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
1438 {
1439 int ret = 0;
1440
1441 ret = amdgpu_bo_reserve(bo, false);
1442 if (unlikely(ret))
1443 return ret;
1444
1445 ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
1446 if (ret)
1447 pr_err("Error in Pinning BO to domain: %d\n", domain);
1448
1449 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
1450 amdgpu_bo_unreserve(bo);
1451
1452 return ret;
1453 }
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463 static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
1464 {
1465 int ret = 0;
1466
1467 ret = amdgpu_bo_reserve(bo, false);
1468 if (unlikely(ret))
1469 return;
1470
1471 amdgpu_bo_unpin(bo);
1472 amdgpu_bo_unreserve(bo);
1473 }
1474
1475 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
1476 struct file *filp, u32 pasid,
1477 void **process_info,
1478 struct dma_fence **ef)
1479 {
1480 struct amdgpu_fpriv *drv_priv;
1481 struct amdgpu_vm *avm;
1482 int ret;
1483
1484 ret = amdgpu_file_to_fpriv(filp, &drv_priv);
1485 if (ret)
1486 return ret;
1487 avm = &drv_priv->vm;
1488
1489
1490 if (avm->process_info)
1491 return -EINVAL;
1492
1493
1494
1495
1496 if (avm->pasid) {
1497 amdgpu_pasid_free(avm->pasid);
1498 amdgpu_vm_set_pasid(adev, avm, 0);
1499 }
1500
1501
1502 ret = amdgpu_vm_make_compute(adev, avm);
1503 if (ret)
1504 return ret;
1505
1506 ret = amdgpu_vm_set_pasid(adev, avm, pasid);
1507 if (ret)
1508 return ret;
1509
1510 ret = init_kfd_vm(avm, process_info, ef);
1511 if (ret)
1512 return ret;
1513
1514 amdgpu_vm_set_task_info(avm);
1515
1516 return 0;
1517 }
1518
1519 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
1520 struct amdgpu_vm *vm)
1521 {
1522 struct amdkfd_process_info *process_info = vm->process_info;
1523
1524 if (!process_info)
1525 return;
1526
1527
1528 mutex_lock(&process_info->lock);
1529 process_info->n_vms--;
1530 list_del(&vm->vm_list_node);
1531 mutex_unlock(&process_info->lock);
1532
1533 vm->process_info = NULL;
1534
1535
1536 if (!process_info->n_vms) {
1537 WARN_ON(!list_empty(&process_info->kfd_bo_list));
1538 WARN_ON(!list_empty(&process_info->userptr_valid_list));
1539 WARN_ON(!list_empty(&process_info->userptr_inval_list));
1540
1541 dma_fence_put(&process_info->eviction_fence->base);
1542 cancel_delayed_work_sync(&process_info->restore_userptr_work);
1543 put_pid(process_info->pid);
1544 mutex_destroy(&process_info->lock);
1545 kfree(process_info);
1546 }
1547 }
1548
1549 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
1550 void *drm_priv)
1551 {
1552 struct amdgpu_vm *avm;
1553
1554 if (WARN_ON(!adev || !drm_priv))
1555 return;
1556
1557 avm = drm_priv_to_vm(drm_priv);
1558
1559 pr_debug("Releasing process vm %p\n", avm);
1560
1561
1562
1563
1564
1565
1566
1567 amdgpu_vm_release_compute(adev, avm);
1568 }
1569
1570 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
1571 {
1572 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1573 struct amdgpu_bo *pd = avm->root.bo;
1574 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
1575
1576 if (adev->asic_type < CHIP_VEGA10)
1577 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1578 return avm->pd_phys_addr;
1579 }
1580
1581 void amdgpu_amdkfd_block_mmu_notifications(void *p)
1582 {
1583 struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
1584
1585 mutex_lock(&pinfo->lock);
1586 WRITE_ONCE(pinfo->block_mmu_notifications, true);
1587 mutex_unlock(&pinfo->lock);
1588 }
1589
1590 int amdgpu_amdkfd_criu_resume(void *p)
1591 {
1592 int ret = 0;
1593 struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
1594
1595 mutex_lock(&pinfo->lock);
1596 pr_debug("scheduling work\n");
1597 atomic_inc(&pinfo->evicted_bos);
1598 if (!READ_ONCE(pinfo->block_mmu_notifications)) {
1599 ret = -EINVAL;
1600 goto out_unlock;
1601 }
1602 WRITE_ONCE(pinfo->block_mmu_notifications, false);
1603 schedule_delayed_work(&pinfo->restore_userptr_work, 0);
1604
1605 out_unlock:
1606 mutex_unlock(&pinfo->lock);
1607 return ret;
1608 }
1609
1610 size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
1611 {
1612 uint64_t reserved_for_pt =
1613 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
1614 size_t available;
1615 spin_lock(&kfd_mem_limit.mem_limit_lock);
1616 available = adev->gmc.real_vram_size
1617 - adev->kfd.vram_used_aligned
1618 - atomic64_read(&adev->vram_pin_size)
1619 - reserved_for_pt;
1620 spin_unlock(&kfd_mem_limit.mem_limit_lock);
1621
1622 return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
1623 }
1624
1625 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1626 struct amdgpu_device *adev, uint64_t va, uint64_t size,
1627 void *drm_priv, struct kgd_mem **mem,
1628 uint64_t *offset, uint32_t flags, bool criu_resume)
1629 {
1630 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1631 enum ttm_bo_type bo_type = ttm_bo_type_device;
1632 struct sg_table *sg = NULL;
1633 uint64_t user_addr = 0;
1634 struct amdgpu_bo *bo;
1635 struct drm_gem_object *gobj = NULL;
1636 u32 domain, alloc_domain;
1637 u64 alloc_flags;
1638 int ret;
1639
1640
1641
1642
1643 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
1644 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1645 alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
1646 alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
1647 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
1648 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
1649 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1650 alloc_flags = 0;
1651 } else {
1652 domain = AMDGPU_GEM_DOMAIN_GTT;
1653 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1654 alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
1655
1656 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1657 if (!offset || !*offset)
1658 return -EINVAL;
1659 user_addr = untagged_addr(*offset);
1660 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1661 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1662 bo_type = ttm_bo_type_sg;
1663 if (size > UINT_MAX)
1664 return -EINVAL;
1665 sg = create_sg_table(*offset, size);
1666 if (!sg)
1667 return -ENOMEM;
1668 } else {
1669 return -EINVAL;
1670 }
1671 }
1672
1673 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1674 if (!*mem) {
1675 ret = -ENOMEM;
1676 goto err;
1677 }
1678 INIT_LIST_HEAD(&(*mem)->attachments);
1679 mutex_init(&(*mem)->lock);
1680 (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1681
1682
1683
1684
1685
1686 if ((*mem)->aql_queue)
1687 size = size >> 1;
1688
1689 (*mem)->alloc_flags = flags;
1690
1691 amdgpu_sync_create(&(*mem)->sync);
1692
1693 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
1694 if (ret) {
1695 pr_debug("Insufficient memory\n");
1696 goto err_reserve_limit;
1697 }
1698
1699 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
1700 va, size, domain_string(alloc_domain));
1701
1702 ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
1703 bo_type, NULL, &gobj);
1704 if (ret) {
1705 pr_debug("Failed to create BO on domain %s. ret %d\n",
1706 domain_string(alloc_domain), ret);
1707 goto err_bo_create;
1708 }
1709 ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
1710 if (ret) {
1711 pr_debug("Failed to allow vma node access. ret %d\n", ret);
1712 goto err_node_allow;
1713 }
1714 bo = gem_to_amdgpu_bo(gobj);
1715 if (bo_type == ttm_bo_type_sg) {
1716 bo->tbo.sg = sg;
1717 bo->tbo.ttm->sg = sg;
1718 }
1719 bo->kfd_bo = *mem;
1720 (*mem)->bo = bo;
1721 if (user_addr)
1722 bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
1723
1724 (*mem)->va = va;
1725 (*mem)->domain = domain;
1726 (*mem)->mapped_to_gpu_memory = 0;
1727 (*mem)->process_info = avm->process_info;
1728 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
1729
1730 if (user_addr) {
1731 pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
1732 ret = init_user_pages(*mem, user_addr, criu_resume);
1733 if (ret)
1734 goto allocate_init_user_pages_failed;
1735 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1736 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1737 ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
1738 if (ret) {
1739 pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
1740 goto err_pin_bo;
1741 }
1742 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
1743 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
1744 }
1745
1746 if (offset)
1747 *offset = amdgpu_bo_mmap_offset(bo);
1748
1749 return 0;
1750
1751 allocate_init_user_pages_failed:
1752 err_pin_bo:
1753 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
1754 drm_vma_node_revoke(&gobj->vma_node, drm_priv);
1755 err_node_allow:
1756
1757 goto err_reserve_limit;
1758 err_bo_create:
1759 amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
1760 err_reserve_limit:
1761 mutex_destroy(&(*mem)->lock);
1762 if (gobj)
1763 drm_gem_object_put(gobj);
1764 else
1765 kfree(*mem);
1766 err:
1767 if (sg) {
1768 sg_free_table(sg);
1769 kfree(sg);
1770 }
1771 return ret;
1772 }
1773
1774 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1775 struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
1776 uint64_t *size)
1777 {
1778 struct amdkfd_process_info *process_info = mem->process_info;
1779 unsigned long bo_size = mem->bo->tbo.base.size;
1780 bool use_release_notifier = (mem->bo->kfd_bo == mem);
1781 struct kfd_mem_attachment *entry, *tmp;
1782 struct bo_vm_reservation_context ctx;
1783 struct ttm_validate_buffer *bo_list_entry;
1784 unsigned int mapped_to_gpu_memory;
1785 int ret;
1786 bool is_imported = false;
1787
1788 mutex_lock(&mem->lock);
1789
1790
1791 if (mem->alloc_flags &
1792 (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1793 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1794 amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
1795 }
1796
1797 mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
1798 is_imported = mem->is_imported;
1799 mutex_unlock(&mem->lock);
1800
1801
1802
1803
1804 if (mapped_to_gpu_memory > 0) {
1805 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1806 mem->va, bo_size);
1807 return -EBUSY;
1808 }
1809
1810
1811 bo_list_entry = &mem->validate_list;
1812 mutex_lock(&process_info->lock);
1813 list_del(&bo_list_entry->head);
1814 mutex_unlock(&process_info->lock);
1815
1816
1817 amdgpu_mn_unregister(mem->bo);
1818
1819 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1820 if (unlikely(ret))
1821 return ret;
1822
1823
1824
1825
1826
1827 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1828 process_info->eviction_fence);
1829 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1830 mem->va + bo_size * (1 + mem->aql_queue));
1831
1832
1833 list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
1834 kfd_mem_detach(entry);
1835
1836 ret = unreserve_bo_and_vms(&ctx, false, false);
1837
1838
1839 amdgpu_sync_free(&mem->sync);
1840
1841
1842
1843
1844 if (mem->bo->tbo.sg) {
1845 sg_free_table(mem->bo->tbo.sg);
1846 kfree(mem->bo->tbo.sg);
1847 }
1848
1849
1850
1851
1852 if (size) {
1853 if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
1854 (!is_imported))
1855 *size = bo_size;
1856 else
1857 *size = 0;
1858 }
1859
1860
1861 drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
1862 if (mem->dmabuf)
1863 dma_buf_put(mem->dmabuf);
1864 mutex_destroy(&mem->lock);
1865
1866
1867
1868
1869
1870 drm_gem_object_put(&mem->bo->tbo.base);
1871
1872
1873
1874
1875
1876 if (!use_release_notifier)
1877 kfree(mem);
1878
1879 return ret;
1880 }
1881
1882 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1883 struct amdgpu_device *adev, struct kgd_mem *mem,
1884 void *drm_priv)
1885 {
1886 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
1887 int ret;
1888 struct amdgpu_bo *bo;
1889 uint32_t domain;
1890 struct kfd_mem_attachment *entry;
1891 struct bo_vm_reservation_context ctx;
1892 unsigned long bo_size;
1893 bool is_invalid_userptr = false;
1894
1895 bo = mem->bo;
1896 if (!bo) {
1897 pr_err("Invalid BO when mapping memory to GPU\n");
1898 return -EINVAL;
1899 }
1900
1901
1902
1903
1904
1905 mutex_lock(&mem->process_info->lock);
1906
1907
1908
1909
1910
1911 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1912 mmap_write_lock(current->mm);
1913 is_invalid_userptr = atomic_read(&mem->invalid);
1914 mmap_write_unlock(current->mm);
1915 }
1916
1917 mutex_lock(&mem->lock);
1918
1919 domain = mem->domain;
1920 bo_size = bo->tbo.base.size;
1921
1922 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
1923 mem->va,
1924 mem->va + bo_size * (1 + mem->aql_queue),
1925 avm, domain_string(domain));
1926
1927 if (!kfd_mem_is_attached(avm, mem)) {
1928 ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
1929 if (ret)
1930 goto out;
1931 }
1932
1933 ret = reserve_bo_and_vm(mem, avm, &ctx);
1934 if (unlikely(ret))
1935 goto out;
1936
1937
1938
1939
1940
1941
1942 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1943 bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
1944 is_invalid_userptr = true;
1945
1946 ret = vm_validate_pt_pd_bos(avm);
1947 if (unlikely(ret))
1948 goto out_unreserve;
1949
1950 if (mem->mapped_to_gpu_memory == 0 &&
1951 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1952
1953
1954
1955
1956 ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
1957 if (ret) {
1958 pr_debug("Validate failed\n");
1959 goto out_unreserve;
1960 }
1961 }
1962
1963 list_for_each_entry(entry, &mem->attachments, list) {
1964 if (entry->bo_va->base.vm != avm || entry->is_mapped)
1965 continue;
1966
1967 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
1968 entry->va, entry->va + bo_size, entry);
1969
1970 ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
1971 is_invalid_userptr);
1972 if (ret) {
1973 pr_err("Failed to map bo to gpuvm\n");
1974 goto out_unreserve;
1975 }
1976
1977 ret = vm_update_pds(avm, ctx.sync);
1978 if (ret) {
1979 pr_err("Failed to update page directories\n");
1980 goto out_unreserve;
1981 }
1982
1983 entry->is_mapped = true;
1984 mem->mapped_to_gpu_memory++;
1985 pr_debug("\t INC mapping count %d\n",
1986 mem->mapped_to_gpu_memory);
1987 }
1988
1989 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
1990 amdgpu_bo_fence(bo,
1991 &avm->process_info->eviction_fence->base,
1992 true);
1993 ret = unreserve_bo_and_vms(&ctx, false, false);
1994
1995 goto out;
1996
1997 out_unreserve:
1998 unreserve_bo_and_vms(&ctx, false, false);
1999 out:
2000 mutex_unlock(&mem->process_info->lock);
2001 mutex_unlock(&mem->lock);
2002 return ret;
2003 }
2004
2005 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
2006 struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
2007 {
2008 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
2009 struct amdkfd_process_info *process_info = avm->process_info;
2010 unsigned long bo_size = mem->bo->tbo.base.size;
2011 struct kfd_mem_attachment *entry;
2012 struct bo_vm_reservation_context ctx;
2013 int ret;
2014
2015 mutex_lock(&mem->lock);
2016
2017 ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
2018 if (unlikely(ret))
2019 goto out;
2020
2021 if (ctx.n_vms == 0) {
2022 ret = -EINVAL;
2023 goto unreserve_out;
2024 }
2025
2026 ret = vm_validate_pt_pd_bos(avm);
2027 if (unlikely(ret))
2028 goto unreserve_out;
2029
2030 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
2031 mem->va,
2032 mem->va + bo_size * (1 + mem->aql_queue),
2033 avm);
2034
2035 list_for_each_entry(entry, &mem->attachments, list) {
2036 if (entry->bo_va->base.vm != avm || !entry->is_mapped)
2037 continue;
2038
2039 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
2040 entry->va, entry->va + bo_size, entry);
2041
2042 unmap_bo_from_gpuvm(mem, entry, ctx.sync);
2043 entry->is_mapped = false;
2044
2045 mem->mapped_to_gpu_memory--;
2046 pr_debug("\t DEC mapping count %d\n",
2047 mem->mapped_to_gpu_memory);
2048 }
2049
2050
2051
2052
2053 if (mem->mapped_to_gpu_memory == 0 &&
2054 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
2055 !mem->bo->tbo.pin_count)
2056 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
2057 process_info->eviction_fence);
2058
2059 unreserve_out:
2060 unreserve_bo_and_vms(&ctx, false, false);
2061 out:
2062 mutex_unlock(&mem->lock);
2063 return ret;
2064 }
2065
2066 int amdgpu_amdkfd_gpuvm_sync_memory(
2067 struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
2068 {
2069 struct amdgpu_sync sync;
2070 int ret;
2071
2072 amdgpu_sync_create(&sync);
2073
2074 mutex_lock(&mem->lock);
2075 amdgpu_sync_clone(&mem->sync, &sync);
2076 mutex_unlock(&mem->lock);
2077
2078 ret = amdgpu_sync_wait(&sync, intr);
2079 amdgpu_sync_free(&sync);
2080 return ret;
2081 }
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
2092 {
2093 int ret;
2094
2095 ret = amdgpu_bo_reserve(bo, true);
2096 if (ret) {
2097 pr_err("Failed to reserve bo. ret %d\n", ret);
2098 goto err_reserve_bo_failed;
2099 }
2100
2101 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
2102 if (ret) {
2103 pr_err("Failed to pin bo. ret %d\n", ret);
2104 goto err_pin_bo_failed;
2105 }
2106
2107 ret = amdgpu_ttm_alloc_gart(&bo->tbo);
2108 if (ret) {
2109 pr_err("Failed to bind bo to GART. ret %d\n", ret);
2110 goto err_map_bo_gart_failed;
2111 }
2112
2113 amdgpu_amdkfd_remove_eviction_fence(
2114 bo, bo->kfd_bo->process_info->eviction_fence);
2115
2116 amdgpu_bo_unreserve(bo);
2117
2118 bo = amdgpu_bo_ref(bo);
2119
2120 return 0;
2121
2122 err_map_bo_gart_failed:
2123 amdgpu_bo_unpin(bo);
2124 err_pin_bo_failed:
2125 amdgpu_bo_unreserve(bo);
2126 err_reserve_bo_failed:
2127
2128 return ret;
2129 }
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
2145 void **kptr, uint64_t *size)
2146 {
2147 int ret;
2148 struct amdgpu_bo *bo = mem->bo;
2149
2150 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
2151 pr_err("userptr can't be mapped to kernel\n");
2152 return -EINVAL;
2153 }
2154
2155 mutex_lock(&mem->process_info->lock);
2156
2157 ret = amdgpu_bo_reserve(bo, true);
2158 if (ret) {
2159 pr_err("Failed to reserve bo. ret %d\n", ret);
2160 goto bo_reserve_failed;
2161 }
2162
2163 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
2164 if (ret) {
2165 pr_err("Failed to pin bo. ret %d\n", ret);
2166 goto pin_failed;
2167 }
2168
2169 ret = amdgpu_bo_kmap(bo, kptr);
2170 if (ret) {
2171 pr_err("Failed to map bo to kernel. ret %d\n", ret);
2172 goto kmap_failed;
2173 }
2174
2175 amdgpu_amdkfd_remove_eviction_fence(
2176 bo, mem->process_info->eviction_fence);
2177
2178 if (size)
2179 *size = amdgpu_bo_size(bo);
2180
2181 amdgpu_bo_unreserve(bo);
2182
2183 mutex_unlock(&mem->process_info->lock);
2184 return 0;
2185
2186 kmap_failed:
2187 amdgpu_bo_unpin(bo);
2188 pin_failed:
2189 amdgpu_bo_unreserve(bo);
2190 bo_reserve_failed:
2191 mutex_unlock(&mem->process_info->lock);
2192
2193 return ret;
2194 }
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204 void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
2205 {
2206 struct amdgpu_bo *bo = mem->bo;
2207
2208 amdgpu_bo_reserve(bo, true);
2209 amdgpu_bo_kunmap(bo);
2210 amdgpu_bo_unpin(bo);
2211 amdgpu_bo_unreserve(bo);
2212 }
2213
2214 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
2215 struct kfd_vm_fault_info *mem)
2216 {
2217 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
2218 *mem = *adev->gmc.vm_fault_info;
2219 mb();
2220 atomic_set(&adev->gmc.vm_fault_info_updated, 0);
2221 }
2222 return 0;
2223 }
2224
2225 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
2226 struct dma_buf *dma_buf,
2227 uint64_t va, void *drm_priv,
2228 struct kgd_mem **mem, uint64_t *size,
2229 uint64_t *mmap_offset)
2230 {
2231 struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
2232 struct drm_gem_object *obj;
2233 struct amdgpu_bo *bo;
2234 int ret;
2235
2236 if (dma_buf->ops != &amdgpu_dmabuf_ops)
2237
2238 return -EINVAL;
2239
2240 obj = dma_buf->priv;
2241 if (drm_to_adev(obj->dev) != adev)
2242
2243 return -EINVAL;
2244
2245 bo = gem_to_amdgpu_bo(obj);
2246 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
2247 AMDGPU_GEM_DOMAIN_GTT)))
2248
2249 return -EINVAL;
2250
2251 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2252 if (!*mem)
2253 return -ENOMEM;
2254
2255 ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
2256 if (ret) {
2257 kfree(mem);
2258 return ret;
2259 }
2260
2261 if (size)
2262 *size = amdgpu_bo_size(bo);
2263
2264 if (mmap_offset)
2265 *mmap_offset = amdgpu_bo_mmap_offset(bo);
2266
2267 INIT_LIST_HEAD(&(*mem)->attachments);
2268 mutex_init(&(*mem)->lock);
2269
2270 (*mem)->alloc_flags =
2271 ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
2272 KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
2273 | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
2274 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
2275
2276 drm_gem_object_get(&bo->tbo.base);
2277 (*mem)->bo = bo;
2278 (*mem)->va = va;
2279 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
2280 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
2281 (*mem)->mapped_to_gpu_memory = 0;
2282 (*mem)->process_info = avm->process_info;
2283 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
2284 amdgpu_sync_create(&(*mem)->sync);
2285 (*mem)->is_imported = true;
2286
2287 return 0;
2288 }
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301 int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
2302 struct mm_struct *mm)
2303 {
2304 struct amdkfd_process_info *process_info = mem->process_info;
2305 int evicted_bos;
2306 int r = 0;
2307
2308
2309 if (READ_ONCE(process_info->block_mmu_notifications))
2310 return 0;
2311
2312 atomic_inc(&mem->invalid);
2313 evicted_bos = atomic_inc_return(&process_info->evicted_bos);
2314 if (evicted_bos == 1) {
2315
2316 r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
2317 if (r)
2318 pr_err("Failed to quiesce KFD\n");
2319 schedule_delayed_work(&process_info->restore_userptr_work,
2320 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
2321 }
2322
2323 return r;
2324 }
2325
2326
2327
2328
2329
2330
2331
2332 static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
2333 struct mm_struct *mm)
2334 {
2335 struct kgd_mem *mem, *tmp_mem;
2336 struct amdgpu_bo *bo;
2337 struct ttm_operation_ctx ctx = { false, false };
2338 int invalid, ret;
2339
2340
2341
2342
2343 list_for_each_entry_safe(mem, tmp_mem,
2344 &process_info->userptr_valid_list,
2345 validate_list.head) {
2346 if (!atomic_read(&mem->invalid))
2347 continue;
2348
2349 bo = mem->bo;
2350
2351 if (amdgpu_bo_reserve(bo, true))
2352 return -EAGAIN;
2353 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
2354 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
2355 amdgpu_bo_unreserve(bo);
2356 if (ret) {
2357 pr_err("%s: Failed to invalidate userptr BO\n",
2358 __func__);
2359 return -EAGAIN;
2360 }
2361
2362 list_move_tail(&mem->validate_list.head,
2363 &process_info->userptr_inval_list);
2364 }
2365
2366 if (list_empty(&process_info->userptr_inval_list))
2367 return 0;
2368
2369
2370 list_for_each_entry(mem, &process_info->userptr_inval_list,
2371 validate_list.head) {
2372 invalid = atomic_read(&mem->invalid);
2373 if (!invalid)
2374
2375
2376
2377 continue;
2378
2379 bo = mem->bo;
2380
2381
2382 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
2383 if (ret) {
2384 pr_debug("Failed %d to get user pages\n", ret);
2385
2386
2387
2388
2389
2390
2391
2392
2393 if (ret != -EFAULT)
2394 return ret;
2395 } else {
2396
2397
2398
2399
2400
2401 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
2402 }
2403
2404
2405
2406
2407 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
2408 return -EAGAIN;
2409 }
2410
2411 return 0;
2412 }
2413
2414
2415
2416
2417
2418
2419
2420 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
2421 {
2422 struct amdgpu_bo_list_entry *pd_bo_list_entries;
2423 struct list_head resv_list, duplicates;
2424 struct ww_acquire_ctx ticket;
2425 struct amdgpu_sync sync;
2426
2427 struct amdgpu_vm *peer_vm;
2428 struct kgd_mem *mem, *tmp_mem;
2429 struct amdgpu_bo *bo;
2430 struct ttm_operation_ctx ctx = { false, false };
2431 int i, ret;
2432
2433 pd_bo_list_entries = kcalloc(process_info->n_vms,
2434 sizeof(struct amdgpu_bo_list_entry),
2435 GFP_KERNEL);
2436 if (!pd_bo_list_entries) {
2437 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
2438 ret = -ENOMEM;
2439 goto out_no_mem;
2440 }
2441
2442 INIT_LIST_HEAD(&resv_list);
2443 INIT_LIST_HEAD(&duplicates);
2444
2445
2446 i = 0;
2447 list_for_each_entry(peer_vm, &process_info->vm_list_head,
2448 vm_list_node)
2449 amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
2450 &pd_bo_list_entries[i++]);
2451
2452 list_for_each_entry(mem, &process_info->userptr_inval_list,
2453 validate_list.head) {
2454 list_add_tail(&mem->resv_list.head, &resv_list);
2455 mem->resv_list.bo = mem->validate_list.bo;
2456 mem->resv_list.num_shared = mem->validate_list.num_shared;
2457 }
2458
2459
2460 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
2461 WARN(!list_empty(&duplicates), "Duplicates should be empty");
2462 if (ret)
2463 goto out_free;
2464
2465 amdgpu_sync_create(&sync);
2466
2467 ret = process_validate_vms(process_info);
2468 if (ret)
2469 goto unreserve_out;
2470
2471
2472 list_for_each_entry_safe(mem, tmp_mem,
2473 &process_info->userptr_inval_list,
2474 validate_list.head) {
2475 struct kfd_mem_attachment *attachment;
2476
2477 bo = mem->bo;
2478
2479
2480 if (bo->tbo.ttm->pages[0]) {
2481 amdgpu_bo_placement_from_domain(bo, mem->domain);
2482 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
2483 if (ret) {
2484 pr_err("%s: failed to validate BO\n", __func__);
2485 goto unreserve_out;
2486 }
2487 }
2488
2489 list_move_tail(&mem->validate_list.head,
2490 &process_info->userptr_valid_list);
2491
2492
2493
2494
2495
2496
2497
2498 list_for_each_entry(attachment, &mem->attachments, list) {
2499 if (!attachment->is_mapped)
2500 continue;
2501
2502 kfd_mem_dmaunmap_attachment(mem, attachment);
2503 ret = update_gpuvm_pte(mem, attachment, &sync);
2504 if (ret) {
2505 pr_err("%s: update PTE failed\n", __func__);
2506
2507 atomic_inc(&mem->invalid);
2508 goto unreserve_out;
2509 }
2510 }
2511 }
2512
2513
2514 ret = process_update_pds(process_info, &sync);
2515
2516 unreserve_out:
2517 ttm_eu_backoff_reservation(&ticket, &resv_list);
2518 amdgpu_sync_wait(&sync, false);
2519 amdgpu_sync_free(&sync);
2520 out_free:
2521 kfree(pd_bo_list_entries);
2522 out_no_mem:
2523
2524 return ret;
2525 }
2526
2527
2528
2529
2530
2531
2532
2533 static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
2534 {
2535 struct delayed_work *dwork = to_delayed_work(work);
2536 struct amdkfd_process_info *process_info =
2537 container_of(dwork, struct amdkfd_process_info,
2538 restore_userptr_work);
2539 struct task_struct *usertask;
2540 struct mm_struct *mm;
2541 int evicted_bos;
2542
2543 evicted_bos = atomic_read(&process_info->evicted_bos);
2544 if (!evicted_bos)
2545 return;
2546
2547
2548 usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
2549 if (!usertask)
2550 return;
2551 mm = get_task_mm(usertask);
2552 if (!mm) {
2553 put_task_struct(usertask);
2554 return;
2555 }
2556
2557 mutex_lock(&process_info->lock);
2558
2559 if (update_invalid_user_pages(process_info, mm))
2560 goto unlock_out;
2561
2562
2563
2564
2565 if (!list_empty(&process_info->userptr_inval_list)) {
2566 if (atomic_read(&process_info->evicted_bos) != evicted_bos)
2567 goto unlock_out;
2568
2569 if (validate_invalid_user_pages(process_info))
2570 goto unlock_out;
2571 }
2572
2573
2574
2575
2576
2577 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
2578 evicted_bos)
2579 goto unlock_out;
2580 evicted_bos = 0;
2581 if (kgd2kfd_resume_mm(mm)) {
2582 pr_err("%s: Failed to resume KFD\n", __func__);
2583
2584
2585
2586 }
2587
2588 unlock_out:
2589 mutex_unlock(&process_info->lock);
2590
2591
2592 if (evicted_bos) {
2593 schedule_delayed_work(&process_info->restore_userptr_work,
2594 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
2595
2596 kfd_smi_event_queue_restore_rescheduled(mm);
2597 }
2598 mmput(mm);
2599 put_task_struct(usertask);
2600 }
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
2621 {
2622 struct amdgpu_bo_list_entry *pd_bo_list;
2623 struct amdkfd_process_info *process_info = info;
2624 struct amdgpu_vm *peer_vm;
2625 struct kgd_mem *mem;
2626 struct bo_vm_reservation_context ctx;
2627 struct amdgpu_amdkfd_fence *new_fence;
2628 int ret = 0, i;
2629 struct list_head duplicate_save;
2630 struct amdgpu_sync sync_obj;
2631 unsigned long failed_size = 0;
2632 unsigned long total_size = 0;
2633
2634 INIT_LIST_HEAD(&duplicate_save);
2635 INIT_LIST_HEAD(&ctx.list);
2636 INIT_LIST_HEAD(&ctx.duplicates);
2637
2638 pd_bo_list = kcalloc(process_info->n_vms,
2639 sizeof(struct amdgpu_bo_list_entry),
2640 GFP_KERNEL);
2641 if (!pd_bo_list)
2642 return -ENOMEM;
2643
2644 i = 0;
2645 mutex_lock(&process_info->lock);
2646 list_for_each_entry(peer_vm, &process_info->vm_list_head,
2647 vm_list_node)
2648 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
2649
2650
2651
2652
2653 list_for_each_entry(mem, &process_info->kfd_bo_list,
2654 validate_list.head) {
2655
2656 list_add_tail(&mem->resv_list.head, &ctx.list);
2657 mem->resv_list.bo = mem->validate_list.bo;
2658 mem->resv_list.num_shared = mem->validate_list.num_shared;
2659 }
2660
2661 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
2662 false, &duplicate_save);
2663 if (ret) {
2664 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
2665 goto ttm_reserve_fail;
2666 }
2667
2668 amdgpu_sync_create(&sync_obj);
2669
2670
2671 ret = process_validate_vms(process_info);
2672 if (ret)
2673 goto validate_map_fail;
2674
2675 ret = process_sync_pds_resv(process_info, &sync_obj);
2676 if (ret) {
2677 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
2678 goto validate_map_fail;
2679 }
2680
2681
2682 list_for_each_entry(mem, &process_info->kfd_bo_list,
2683 validate_list.head) {
2684
2685 struct amdgpu_bo *bo = mem->bo;
2686 uint32_t domain = mem->domain;
2687 struct kfd_mem_attachment *attachment;
2688 struct dma_resv_iter cursor;
2689 struct dma_fence *fence;
2690
2691 total_size += amdgpu_bo_size(bo);
2692
2693 ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
2694 if (ret) {
2695 pr_debug("Memory eviction: Validate BOs failed\n");
2696 failed_size += amdgpu_bo_size(bo);
2697 ret = amdgpu_amdkfd_bo_validate(bo,
2698 AMDGPU_GEM_DOMAIN_GTT, false);
2699 if (ret) {
2700 pr_debug("Memory eviction: Try again\n");
2701 goto validate_map_fail;
2702 }
2703 }
2704 dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
2705 DMA_RESV_USAGE_KERNEL, fence) {
2706 ret = amdgpu_sync_fence(&sync_obj, fence);
2707 if (ret) {
2708 pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
2709 goto validate_map_fail;
2710 }
2711 }
2712 list_for_each_entry(attachment, &mem->attachments, list) {
2713 if (!attachment->is_mapped)
2714 continue;
2715
2716 kfd_mem_dmaunmap_attachment(mem, attachment);
2717 ret = update_gpuvm_pte(mem, attachment, &sync_obj);
2718 if (ret) {
2719 pr_debug("Memory eviction: update PTE failed. Try again\n");
2720 goto validate_map_fail;
2721 }
2722 }
2723 }
2724
2725 if (failed_size)
2726 pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
2727
2728
2729 ret = process_update_pds(process_info, &sync_obj);
2730 if (ret) {
2731 pr_debug("Memory eviction: update PDs failed. Try again\n");
2732 goto validate_map_fail;
2733 }
2734
2735
2736 amdgpu_sync_wait(&sync_obj, false);
2737
2738
2739
2740
2741
2742 new_fence = amdgpu_amdkfd_fence_create(
2743 process_info->eviction_fence->base.context,
2744 process_info->eviction_fence->mm,
2745 NULL);
2746 if (!new_fence) {
2747 pr_err("Failed to create eviction fence\n");
2748 ret = -ENOMEM;
2749 goto validate_map_fail;
2750 }
2751 dma_fence_put(&process_info->eviction_fence->base);
2752 process_info->eviction_fence = new_fence;
2753 *ef = dma_fence_get(&new_fence->base);
2754
2755
2756 list_for_each_entry(mem, &process_info->kfd_bo_list,
2757 validate_list.head) {
2758 if (mem->bo->tbo.pin_count)
2759 continue;
2760
2761 amdgpu_bo_fence(mem->bo,
2762 &process_info->eviction_fence->base, true);
2763 }
2764
2765 list_for_each_entry(peer_vm, &process_info->vm_list_head,
2766 vm_list_node) {
2767 struct amdgpu_bo *bo = peer_vm->root.bo;
2768
2769 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
2770 }
2771
2772 validate_map_fail:
2773 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
2774 amdgpu_sync_free(&sync_obj);
2775 ttm_reserve_fail:
2776 mutex_unlock(&process_info->lock);
2777 kfree(pd_bo_list);
2778 return ret;
2779 }
2780
2781 int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2782 {
2783 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2784 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2785 int ret;
2786
2787 if (!info || !gws)
2788 return -EINVAL;
2789
2790 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2791 if (!*mem)
2792 return -ENOMEM;
2793
2794 mutex_init(&(*mem)->lock);
2795 INIT_LIST_HEAD(&(*mem)->attachments);
2796 (*mem)->bo = amdgpu_bo_ref(gws_bo);
2797 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2798 (*mem)->process_info = process_info;
2799 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2800 amdgpu_sync_create(&(*mem)->sync);
2801
2802
2803
2804 mutex_lock(&(*mem)->process_info->lock);
2805 ret = amdgpu_bo_reserve(gws_bo, false);
2806 if (unlikely(ret)) {
2807 pr_err("Reserve gws bo failed %d\n", ret);
2808 goto bo_reservation_failure;
2809 }
2810
2811 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2812 if (ret) {
2813 pr_err("GWS BO validate failed %d\n", ret);
2814 goto bo_validation_failure;
2815 }
2816
2817
2818
2819
2820 ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
2821 if (ret)
2822 goto reserve_shared_fail;
2823 amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
2824 amdgpu_bo_unreserve(gws_bo);
2825 mutex_unlock(&(*mem)->process_info->lock);
2826
2827 return ret;
2828
2829 reserve_shared_fail:
2830 bo_validation_failure:
2831 amdgpu_bo_unreserve(gws_bo);
2832 bo_reservation_failure:
2833 mutex_unlock(&(*mem)->process_info->lock);
2834 amdgpu_sync_free(&(*mem)->sync);
2835 remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2836 amdgpu_bo_unref(&gws_bo);
2837 mutex_destroy(&(*mem)->lock);
2838 kfree(*mem);
2839 *mem = NULL;
2840 return ret;
2841 }
2842
2843 int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2844 {
2845 int ret;
2846 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2847 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2848 struct amdgpu_bo *gws_bo = kgd_mem->bo;
2849
2850
2851
2852
2853 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2854
2855 ret = amdgpu_bo_reserve(gws_bo, false);
2856 if (unlikely(ret)) {
2857 pr_err("Reserve gws bo failed %d\n", ret);
2858
2859 return ret;
2860 }
2861 amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2862 process_info->eviction_fence);
2863 amdgpu_bo_unreserve(gws_bo);
2864 amdgpu_sync_free(&kgd_mem->sync);
2865 amdgpu_bo_unref(&gws_bo);
2866 mutex_destroy(&kgd_mem->lock);
2867 kfree(mem);
2868 return 0;
2869 }
2870
2871
2872 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
2873 struct tile_config *config)
2874 {
2875 config->gb_addr_config = adev->gfx.config.gb_addr_config;
2876 config->tile_config_ptr = adev->gfx.config.tile_mode_array;
2877 config->num_tile_configs =
2878 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2879 config->macro_tile_config_ptr =
2880 adev->gfx.config.macrotile_mode_array;
2881 config->num_macro_tile_configs =
2882 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2883
2884
2885 config->num_banks = adev->gfx.config.num_banks;
2886 config->num_ranks = adev->gfx.config.num_ranks;
2887
2888 return 0;
2889 }
2890
2891 bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
2892 {
2893 struct kfd_mem_attachment *entry;
2894
2895 list_for_each_entry(entry, &mem->attachments, list) {
2896 if (entry->is_mapped && entry->adev == adev)
2897 return true;
2898 }
2899 return false;
2900 }
2901
2902 #if defined(CONFIG_DEBUG_FS)
2903
2904 int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
2905 {
2906
2907 spin_lock(&kfd_mem_limit.mem_limit_lock);
2908 seq_printf(m, "System mem used %lldM out of %lluM\n",
2909 (kfd_mem_limit.system_mem_used >> 20),
2910 (kfd_mem_limit.max_system_mem_limit >> 20));
2911 seq_printf(m, "TTM mem used %lldM out of %lluM\n",
2912 (kfd_mem_limit.ttm_mem_used >> 20),
2913 (kfd_mem_limit.max_ttm_mem_limit >> 20));
2914 spin_unlock(&kfd_mem_limit.mem_limit_lock);
2915
2916 return 0;
2917 }
2918
2919 #endif