0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include <linux/types.h>
0024 #include <linux/hmm.h>
0025 #include <linux/dma-direction.h>
0026 #include <linux/dma-mapping.h>
0027 #include <linux/migrate.h>
0028 #include "amdgpu_sync.h"
0029 #include "amdgpu_object.h"
0030 #include "amdgpu_vm.h"
0031 #include "amdgpu_mn.h"
0032 #include "amdgpu_res_cursor.h"
0033 #include "kfd_priv.h"
0034 #include "kfd_svm.h"
0035 #include "kfd_migrate.h"
0036 #include "kfd_smi_events.h"
0037
0038 #ifdef dev_fmt
0039 #undef dev_fmt
0040 #endif
0041 #define dev_fmt(fmt) "kfd_migrate: " fmt
0042
0043 static uint64_t
0044 svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
0045 {
0046 return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
0047 }
0048
0049 static int
0050 svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
0051 dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
0052 {
0053 struct amdgpu_device *adev = ring->adev;
0054 struct amdgpu_job *job;
0055 unsigned int num_dw, num_bytes;
0056 struct dma_fence *fence;
0057 uint64_t src_addr, dst_addr;
0058 uint64_t pte_flags;
0059 void *cpu_addr;
0060 int r;
0061
0062
0063 *gart_addr = adev->gmc.gart_start;
0064
0065 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
0066 num_bytes = npages * 8;
0067
0068 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
0069 AMDGPU_IB_POOL_DELAYED, &job);
0070 if (r)
0071 return r;
0072
0073 src_addr = num_dw * 4;
0074 src_addr += job->ibs[0].gpu_addr;
0075
0076 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
0077 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
0078 dst_addr, num_bytes, false);
0079
0080 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
0081 WARN_ON(job->ibs[0].length_dw > num_dw);
0082
0083 pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
0084 pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
0085 if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
0086 pte_flags |= AMDGPU_PTE_WRITEABLE;
0087 pte_flags |= adev->gart.gart_pte_flags;
0088
0089 cpu_addr = &job->ibs[0].ptr[num_dw];
0090
0091 amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
0092 r = amdgpu_job_submit(job, &adev->mman.entity,
0093 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
0094 if (r)
0095 goto error_free;
0096
0097 dma_fence_put(fence);
0098
0099 return r;
0100
0101 error_free:
0102 amdgpu_job_free(job);
0103 return r;
0104 }
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129 static int
0130 svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
0131 uint64_t *vram, uint64_t npages,
0132 enum MIGRATION_COPY_DIR direction,
0133 struct dma_fence **mfence)
0134 {
0135 const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
0136 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
0137 uint64_t gart_s, gart_d;
0138 struct dma_fence *next;
0139 uint64_t size;
0140 int r;
0141
0142 mutex_lock(&adev->mman.gtt_window_lock);
0143
0144 while (npages) {
0145 size = min(GTT_MAX_PAGES, npages);
0146
0147 if (direction == FROM_VRAM_TO_RAM) {
0148 gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
0149 r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
0150
0151 } else if (direction == FROM_RAM_TO_VRAM) {
0152 r = svm_migrate_gart_map(ring, size, sys, &gart_s,
0153 KFD_IOCTL_SVM_FLAG_GPU_RO);
0154 gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
0155 }
0156 if (r) {
0157 dev_err(adev->dev, "fail %d create gart mapping\n", r);
0158 goto out_unlock;
0159 }
0160
0161 r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
0162 NULL, &next, false, true, false);
0163 if (r) {
0164 dev_err(adev->dev, "fail %d to copy memory\n", r);
0165 goto out_unlock;
0166 }
0167
0168 dma_fence_put(*mfence);
0169 *mfence = next;
0170 npages -= size;
0171 if (npages) {
0172 sys += size;
0173 vram += size;
0174 }
0175 }
0176
0177 out_unlock:
0178 mutex_unlock(&adev->mman.gtt_window_lock);
0179
0180 return r;
0181 }
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 static int
0199 svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
0200 {
0201 int r = 0;
0202
0203 if (mfence) {
0204 r = dma_fence_wait(mfence, false);
0205 dma_fence_put(mfence);
0206 pr_debug("sdma copy memory fence done\n");
0207 }
0208
0209 return r;
0210 }
0211
0212 unsigned long
0213 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
0214 {
0215 return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
0216 }
0217
0218 static void
0219 svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
0220 {
0221 struct page *page;
0222
0223 page = pfn_to_page(pfn);
0224 svm_range_bo_ref(prange->svm_bo);
0225 page->zone_device_data = prange->svm_bo;
0226 lock_page(page);
0227 }
0228
0229 static void
0230 svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
0231 {
0232 struct page *page;
0233
0234 page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
0235 unlock_page(page);
0236 put_page(page);
0237 }
0238
0239 static unsigned long
0240 svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
0241 {
0242 unsigned long addr;
0243
0244 addr = page_to_pfn(page) << PAGE_SHIFT;
0245 return (addr - adev->kfd.dev->pgmap.range.start);
0246 }
0247
0248 static struct page *
0249 svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
0250 {
0251 struct page *page;
0252
0253 page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
0254 if (page)
0255 lock_page(page);
0256
0257 return page;
0258 }
0259
0260 static void svm_migrate_put_sys_page(unsigned long addr)
0261 {
0262 struct page *page;
0263
0264 page = pfn_to_page(addr >> PAGE_SHIFT);
0265 unlock_page(page);
0266 put_page(page);
0267 }
0268
0269 static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
0270 {
0271 unsigned long cpages = 0;
0272 unsigned long i;
0273
0274 for (i = 0; i < migrate->npages; i++) {
0275 if (migrate->src[i] & MIGRATE_PFN_VALID &&
0276 migrate->src[i] & MIGRATE_PFN_MIGRATE)
0277 cpages++;
0278 }
0279 return cpages;
0280 }
0281
0282 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
0283 {
0284 unsigned long upages = 0;
0285 unsigned long i;
0286
0287 for (i = 0; i < migrate->npages; i++) {
0288 if (migrate->src[i] & MIGRATE_PFN_VALID &&
0289 !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
0290 upages++;
0291 }
0292 return upages;
0293 }
0294
0295 static int
0296 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
0297 struct migrate_vma *migrate, struct dma_fence **mfence,
0298 dma_addr_t *scratch)
0299 {
0300 uint64_t npages = migrate->npages;
0301 struct device *dev = adev->dev;
0302 struct amdgpu_res_cursor cursor;
0303 dma_addr_t *src;
0304 uint64_t *dst;
0305 uint64_t i, j;
0306 int r;
0307
0308 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
0309 prange->last);
0310
0311 src = scratch;
0312 dst = (uint64_t *)(scratch + npages);
0313
0314 r = svm_range_vram_node_new(adev, prange, true);
0315 if (r) {
0316 dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
0317 goto out;
0318 }
0319
0320 amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
0321 npages << PAGE_SHIFT, &cursor);
0322 for (i = j = 0; i < npages; i++) {
0323 struct page *spage;
0324
0325 spage = migrate_pfn_to_page(migrate->src[i]);
0326 if (spage && !is_zone_device_page(spage)) {
0327 dst[i] = cursor.start + (j << PAGE_SHIFT);
0328 migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
0329 svm_migrate_get_vram_page(prange, migrate->dst[i]);
0330 migrate->dst[i] = migrate_pfn(migrate->dst[i]);
0331 src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
0332 DMA_TO_DEVICE);
0333 r = dma_mapping_error(dev, src[i]);
0334 if (r) {
0335 dev_err(adev->dev, "%s: fail %d dma_map_page\n",
0336 __func__, r);
0337 goto out_free_vram_pages;
0338 }
0339 } else {
0340 if (j) {
0341 r = svm_migrate_copy_memory_gart(
0342 adev, src + i - j,
0343 dst + i - j, j,
0344 FROM_RAM_TO_VRAM,
0345 mfence);
0346 if (r)
0347 goto out_free_vram_pages;
0348 amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
0349 j = 0;
0350 } else {
0351 amdgpu_res_next(&cursor, PAGE_SIZE);
0352 }
0353 continue;
0354 }
0355
0356 pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
0357 src[i] >> PAGE_SHIFT, page_to_pfn(spage));
0358
0359 if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
0360 r = svm_migrate_copy_memory_gart(adev, src + i - j,
0361 dst + i - j, j + 1,
0362 FROM_RAM_TO_VRAM,
0363 mfence);
0364 if (r)
0365 goto out_free_vram_pages;
0366 amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
0367 j = 0;
0368 } else {
0369 j++;
0370 }
0371 }
0372
0373 r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
0374 FROM_RAM_TO_VRAM, mfence);
0375
0376 out_free_vram_pages:
0377 if (r) {
0378 pr_debug("failed %d to copy memory to vram\n", r);
0379 while (i--) {
0380 svm_migrate_put_vram_page(adev, dst[i]);
0381 migrate->dst[i] = 0;
0382 }
0383 }
0384
0385 #ifdef DEBUG_FORCE_MIXED_DOMAINS
0386 for (i = 0, j = 0; i < npages; i += 4, j++) {
0387 if (j & 1)
0388 continue;
0389 svm_migrate_put_vram_page(adev, dst[i]);
0390 migrate->dst[i] = 0;
0391 svm_migrate_put_vram_page(adev, dst[i + 1]);
0392 migrate->dst[i + 1] = 0;
0393 svm_migrate_put_vram_page(adev, dst[i + 2]);
0394 migrate->dst[i + 2] = 0;
0395 svm_migrate_put_vram_page(adev, dst[i + 3]);
0396 migrate->dst[i + 3] = 0;
0397 }
0398 #endif
0399 out:
0400 return r;
0401 }
0402
0403 static long
0404 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
0405 struct vm_area_struct *vma, uint64_t start,
0406 uint64_t end, uint32_t trigger)
0407 {
0408 struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
0409 uint64_t npages = (end - start) >> PAGE_SHIFT;
0410 struct kfd_process_device *pdd;
0411 struct dma_fence *mfence = NULL;
0412 struct migrate_vma migrate;
0413 unsigned long cpages = 0;
0414 dma_addr_t *scratch;
0415 void *buf;
0416 int r = -ENOMEM;
0417
0418 memset(&migrate, 0, sizeof(migrate));
0419 migrate.vma = vma;
0420 migrate.start = start;
0421 migrate.end = end;
0422 migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
0423 migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
0424
0425 buf = kvcalloc(npages,
0426 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
0427 GFP_KERNEL);
0428 if (!buf)
0429 goto out;
0430
0431 migrate.src = buf;
0432 migrate.dst = migrate.src + npages;
0433 scratch = (dma_addr_t *)(migrate.dst + npages);
0434
0435 kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
0436 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0437 0, adev->kfd.dev->id, prange->prefetch_loc,
0438 prange->preferred_loc, trigger);
0439
0440 r = migrate_vma_setup(&migrate);
0441 if (r) {
0442 dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
0443 __func__, r, prange->start, prange->last);
0444 goto out_free;
0445 }
0446
0447 cpages = migrate.cpages;
0448 if (!cpages) {
0449 pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
0450 prange->start, prange->last);
0451 goto out_free;
0452 }
0453 if (cpages != npages)
0454 pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
0455 cpages, npages);
0456 else
0457 pr_debug("0x%lx pages migrated\n", cpages);
0458
0459 r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
0460 migrate_vma_pages(&migrate);
0461
0462 pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
0463 svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
0464
0465 svm_migrate_copy_done(adev, mfence);
0466 migrate_vma_finalize(&migrate);
0467
0468 kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
0469 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0470 0, adev->kfd.dev->id, trigger);
0471
0472 svm_range_dma_unmap(adev->dev, scratch, 0, npages);
0473 svm_range_free_dma_mappings(prange);
0474
0475 out_free:
0476 kvfree(buf);
0477 out:
0478 if (!r && cpages) {
0479 pdd = svm_range_get_pdd_by_adev(prange, adev);
0480 if (pdd)
0481 WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
0482
0483 return cpages;
0484 }
0485 return r;
0486 }
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500 static int
0501 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
0502 struct mm_struct *mm, uint32_t trigger)
0503 {
0504 unsigned long addr, start, end;
0505 struct vm_area_struct *vma;
0506 struct amdgpu_device *adev;
0507 unsigned long cpages = 0;
0508 long r = 0;
0509
0510 if (prange->actual_loc == best_loc) {
0511 pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
0512 prange->svms, prange->start, prange->last, best_loc);
0513 return 0;
0514 }
0515
0516 adev = svm_range_get_adev_by_id(prange, best_loc);
0517 if (!adev) {
0518 pr_debug("failed to get device by id 0x%x\n", best_loc);
0519 return -ENODEV;
0520 }
0521
0522 pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
0523 prange->start, prange->last, best_loc);
0524
0525
0526 svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
0527
0528 start = prange->start << PAGE_SHIFT;
0529 end = (prange->last + 1) << PAGE_SHIFT;
0530
0531 for (addr = start; addr < end;) {
0532 unsigned long next;
0533
0534 vma = find_vma(mm, addr);
0535 if (!vma || addr < vma->vm_start)
0536 break;
0537
0538 next = min(vma->vm_end, end);
0539 r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
0540 if (r < 0) {
0541 pr_debug("failed %ld to migrate\n", r);
0542 break;
0543 } else {
0544 cpages += r;
0545 }
0546 addr = next;
0547 }
0548
0549 if (cpages)
0550 prange->actual_loc = best_loc;
0551
0552 return r < 0 ? r : 0;
0553 }
0554
0555 static void svm_migrate_page_free(struct page *page)
0556 {
0557 struct svm_range_bo *svm_bo = page->zone_device_data;
0558
0559 if (svm_bo) {
0560 pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
0561 svm_range_bo_unref_async(svm_bo);
0562 }
0563 }
0564
0565 static int
0566 svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
0567 struct migrate_vma *migrate, struct dma_fence **mfence,
0568 dma_addr_t *scratch, uint64_t npages)
0569 {
0570 struct device *dev = adev->dev;
0571 uint64_t *src;
0572 dma_addr_t *dst;
0573 struct page *dpage;
0574 uint64_t i = 0, j;
0575 uint64_t addr;
0576 int r = 0;
0577
0578 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
0579 prange->last);
0580
0581 addr = prange->start << PAGE_SHIFT;
0582
0583 src = (uint64_t *)(scratch + npages);
0584 dst = scratch;
0585
0586 for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
0587 struct page *spage;
0588
0589 spage = migrate_pfn_to_page(migrate->src[i]);
0590 if (!spage || !is_zone_device_page(spage)) {
0591 pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
0592 prange->svms, prange->start, prange->last);
0593 if (j) {
0594 r = svm_migrate_copy_memory_gart(adev, dst + i - j,
0595 src + i - j, j,
0596 FROM_VRAM_TO_RAM,
0597 mfence);
0598 if (r)
0599 goto out_oom;
0600 j = 0;
0601 }
0602 continue;
0603 }
0604 src[i] = svm_migrate_addr(adev, spage);
0605 if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
0606 r = svm_migrate_copy_memory_gart(adev, dst + i - j,
0607 src + i - j, j,
0608 FROM_VRAM_TO_RAM,
0609 mfence);
0610 if (r)
0611 goto out_oom;
0612 j = 0;
0613 }
0614
0615 dpage = svm_migrate_get_sys_page(migrate->vma, addr);
0616 if (!dpage) {
0617 pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
0618 prange->svms, prange->start, prange->last);
0619 r = -ENOMEM;
0620 goto out_oom;
0621 }
0622
0623 dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
0624 r = dma_mapping_error(dev, dst[i]);
0625 if (r) {
0626 dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
0627 goto out_oom;
0628 }
0629
0630 pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
0631 dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
0632
0633 migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
0634 j++;
0635 }
0636
0637 r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
0638 FROM_VRAM_TO_RAM, mfence);
0639
0640 out_oom:
0641 if (r) {
0642 pr_debug("failed %d copy to ram\n", r);
0643 while (i--) {
0644 svm_migrate_put_sys_page(dst[i]);
0645 migrate->dst[i] = 0;
0646 }
0647 }
0648
0649 return r;
0650 }
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668 static long
0669 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
0670 struct vm_area_struct *vma, uint64_t start, uint64_t end,
0671 uint32_t trigger)
0672 {
0673 struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
0674 uint64_t npages = (end - start) >> PAGE_SHIFT;
0675 unsigned long upages = npages;
0676 unsigned long cpages = 0;
0677 struct kfd_process_device *pdd;
0678 struct dma_fence *mfence = NULL;
0679 struct migrate_vma migrate;
0680 dma_addr_t *scratch;
0681 void *buf;
0682 int r = -ENOMEM;
0683
0684 memset(&migrate, 0, sizeof(migrate));
0685 migrate.vma = vma;
0686 migrate.start = start;
0687 migrate.end = end;
0688 migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
0689 if (adev->gmc.xgmi.connected_to_cpu)
0690 migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
0691 else
0692 migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
0693
0694 buf = kvcalloc(npages,
0695 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
0696 GFP_KERNEL);
0697 if (!buf)
0698 goto out;
0699
0700 migrate.src = buf;
0701 migrate.dst = migrate.src + npages;
0702 scratch = (dma_addr_t *)(migrate.dst + npages);
0703
0704 kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
0705 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0706 adev->kfd.dev->id, 0, prange->prefetch_loc,
0707 prange->preferred_loc, trigger);
0708
0709 r = migrate_vma_setup(&migrate);
0710 if (r) {
0711 dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
0712 __func__, r, prange->start, prange->last);
0713 goto out_free;
0714 }
0715
0716 cpages = migrate.cpages;
0717 if (!cpages) {
0718 pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
0719 prange->start, prange->last);
0720 upages = svm_migrate_unsuccessful_pages(&migrate);
0721 goto out_free;
0722 }
0723 if (cpages != npages)
0724 pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
0725 cpages, npages);
0726 else
0727 pr_debug("0x%lx pages migrated\n", cpages);
0728
0729 r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
0730 scratch, npages);
0731 migrate_vma_pages(&migrate);
0732
0733 upages = svm_migrate_unsuccessful_pages(&migrate);
0734 pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
0735 upages, cpages, migrate.npages);
0736
0737 svm_migrate_copy_done(adev, mfence);
0738 migrate_vma_finalize(&migrate);
0739
0740 kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
0741 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0742 adev->kfd.dev->id, 0, trigger);
0743
0744 svm_range_dma_unmap(adev->dev, scratch, 0, npages);
0745
0746 out_free:
0747 kvfree(buf);
0748 out:
0749 if (!r && cpages) {
0750 pdd = svm_range_get_pdd_by_adev(prange, adev);
0751 if (pdd)
0752 WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
0753 }
0754 return r ? r : upages;
0755 }
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
0769 uint32_t trigger)
0770 {
0771 struct amdgpu_device *adev;
0772 struct vm_area_struct *vma;
0773 unsigned long addr;
0774 unsigned long start;
0775 unsigned long end;
0776 unsigned long upages = 0;
0777 long r = 0;
0778
0779 if (!prange->actual_loc) {
0780 pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
0781 prange->start, prange->last);
0782 return 0;
0783 }
0784
0785 adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
0786 if (!adev) {
0787 pr_debug("failed to get device by id 0x%x\n",
0788 prange->actual_loc);
0789 return -ENODEV;
0790 }
0791
0792 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
0793 prange->svms, prange, prange->start, prange->last,
0794 prange->actual_loc);
0795
0796 start = prange->start << PAGE_SHIFT;
0797 end = (prange->last + 1) << PAGE_SHIFT;
0798
0799 for (addr = start; addr < end;) {
0800 unsigned long next;
0801
0802 vma = find_vma(mm, addr);
0803 if (!vma || addr < vma->vm_start) {
0804 pr_debug("failed to find vma for prange %p\n", prange);
0805 r = -EFAULT;
0806 break;
0807 }
0808
0809 next = min(vma->vm_end, end);
0810 r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
0811 if (r < 0) {
0812 pr_debug("failed %ld to migrate prange %p\n", r, prange);
0813 break;
0814 } else {
0815 upages += r;
0816 }
0817 addr = next;
0818 }
0819
0820 if (r >= 0 && !upages) {
0821 svm_range_vram_node_free(prange);
0822 prange->actual_loc = 0;
0823 }
0824
0825 return r < 0 ? r : 0;
0826 }
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840 static int
0841 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
0842 struct mm_struct *mm, uint32_t trigger)
0843 {
0844 int r, retries = 3;
0845
0846
0847
0848
0849
0850
0851 pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
0852
0853 do {
0854 r = svm_migrate_vram_to_ram(prange, mm, trigger);
0855 if (r)
0856 return r;
0857 } while (prange->actual_loc && --retries);
0858
0859 if (prange->actual_loc)
0860 return -EDEADLK;
0861
0862 return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
0863 }
0864
0865 int
0866 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
0867 struct mm_struct *mm, uint32_t trigger)
0868 {
0869 if (!prange->actual_loc)
0870 return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
0871 else
0872 return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
0873
0874 }
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884
0885
0886 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
0887 {
0888 unsigned long addr = vmf->address;
0889 struct vm_area_struct *vma;
0890 enum svm_work_list_ops op;
0891 struct svm_range *parent;
0892 struct svm_range *prange;
0893 struct kfd_process *p;
0894 struct mm_struct *mm;
0895 int r = 0;
0896
0897 vma = vmf->vma;
0898 mm = vma->vm_mm;
0899
0900 p = kfd_lookup_process_by_mm(vma->vm_mm);
0901 if (!p) {
0902 pr_debug("failed find process at fault address 0x%lx\n", addr);
0903 return VM_FAULT_SIGBUS;
0904 }
0905 if (READ_ONCE(p->svms.faulting_task) == current) {
0906 pr_debug("skipping ram migration\n");
0907 kfd_unref_process(p);
0908 return 0;
0909 }
0910 addr >>= PAGE_SHIFT;
0911 pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
0912
0913 mutex_lock(&p->svms.lock);
0914
0915 prange = svm_range_from_addr(&p->svms, addr, &parent);
0916 if (!prange) {
0917 pr_debug("cannot find svm range at 0x%lx\n", addr);
0918 r = -EFAULT;
0919 goto out;
0920 }
0921
0922 mutex_lock(&parent->migrate_mutex);
0923 if (prange != parent)
0924 mutex_lock_nested(&prange->migrate_mutex, 1);
0925
0926 if (!prange->actual_loc)
0927 goto out_unlock_prange;
0928
0929 svm_range_lock(parent);
0930 if (prange != parent)
0931 mutex_lock_nested(&prange->lock, 1);
0932 r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
0933 if (prange != parent)
0934 mutex_unlock(&prange->lock);
0935 svm_range_unlock(parent);
0936 if (r) {
0937 pr_debug("failed %d to split range by granularity\n", r);
0938 goto out_unlock_prange;
0939 }
0940
0941 r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
0942 if (r)
0943 pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
0944 prange, prange->start, prange->last);
0945
0946
0947 if (p->xnack_enabled && parent == prange)
0948 op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
0949 else
0950 op = SVM_OP_UPDATE_RANGE_NOTIFIER;
0951 svm_range_add_list_work(&p->svms, parent, mm, op);
0952 schedule_deferred_list_work(&p->svms);
0953
0954 out_unlock_prange:
0955 if (prange != parent)
0956 mutex_unlock(&prange->migrate_mutex);
0957 mutex_unlock(&parent->migrate_mutex);
0958 out:
0959 mutex_unlock(&p->svms.lock);
0960 kfd_unref_process(p);
0961
0962 pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
0963
0964 return r ? VM_FAULT_SIGBUS : 0;
0965 }
0966
0967 static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
0968 .page_free = svm_migrate_page_free,
0969 .migrate_to_ram = svm_migrate_to_ram,
0970 };
0971
0972
0973 #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
0974
0975 int svm_migrate_init(struct amdgpu_device *adev)
0976 {
0977 struct kfd_dev *kfddev = adev->kfd.dev;
0978 struct dev_pagemap *pgmap;
0979 struct resource *res = NULL;
0980 unsigned long size;
0981 void *r;
0982
0983
0984 if (!KFD_IS_SOC15(kfddev))
0985 return -EINVAL;
0986
0987 pgmap = &kfddev->pgmap;
0988 memset(pgmap, 0, sizeof(*pgmap));
0989
0990
0991
0992
0993 size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
0994 if (adev->gmc.xgmi.connected_to_cpu) {
0995 pgmap->range.start = adev->gmc.aper_base;
0996 pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
0997 pgmap->type = MEMORY_DEVICE_COHERENT;
0998 } else {
0999 res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
1000 if (IS_ERR(res))
1001 return -ENOMEM;
1002 pgmap->range.start = res->start;
1003 pgmap->range.end = res->end;
1004 pgmap->type = MEMORY_DEVICE_PRIVATE;
1005 }
1006
1007 pgmap->nr_range = 1;
1008 pgmap->ops = &svm_migrate_pgmap_ops;
1009 pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
1010 pgmap->flags = 0;
1011
1012
1013
1014 r = devm_memremap_pages(adev->dev, pgmap);
1015 if (IS_ERR(r)) {
1016 pr_err("failed to register HMM device memory\n");
1017
1018 pgmap->type = 0;
1019 if (pgmap->type == MEMORY_DEVICE_PRIVATE)
1020 devm_release_mem_region(adev->dev, res->start,
1021 res->end - res->start + 1);
1022 return PTR_ERR(r);
1023 }
1024
1025 pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
1026 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
1027
1028 amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
1029
1030 svm_range_set_max_pages(adev);
1031
1032 pr_info("HMM registered %ldMB device memory\n", size >> 20);
1033
1034 return 0;
1035 }