Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR MIT
0002 /*
0003  * Copyright 2020-2021 Advanced Micro Devices, Inc.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the "Software"),
0007  * to deal in the Software without restriction, including without limitation
0008  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0009  * and/or sell copies of the Software, and to permit persons to whom the
0010  * Software is furnished to do so, subject to the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included in
0013  * all copies or substantial portions of the Software.
0014  *
0015  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0016  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0017  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0018  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0019  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0020  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0021  * OTHER DEALINGS IN THE SOFTWARE.
0022  */
0023 #include <linux/types.h>
0024 #include <linux/hmm.h>
0025 #include <linux/dma-direction.h>
0026 #include <linux/dma-mapping.h>
0027 #include <linux/migrate.h>
0028 #include "amdgpu_sync.h"
0029 #include "amdgpu_object.h"
0030 #include "amdgpu_vm.h"
0031 #include "amdgpu_mn.h"
0032 #include "amdgpu_res_cursor.h"
0033 #include "kfd_priv.h"
0034 #include "kfd_svm.h"
0035 #include "kfd_migrate.h"
0036 #include "kfd_smi_events.h"
0037 
0038 #ifdef dev_fmt
0039 #undef dev_fmt
0040 #endif
0041 #define dev_fmt(fmt) "kfd_migrate: " fmt
0042 
0043 static uint64_t
0044 svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
0045 {
0046     return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
0047 }
0048 
0049 static int
0050 svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
0051              dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
0052 {
0053     struct amdgpu_device *adev = ring->adev;
0054     struct amdgpu_job *job;
0055     unsigned int num_dw, num_bytes;
0056     struct dma_fence *fence;
0057     uint64_t src_addr, dst_addr;
0058     uint64_t pte_flags;
0059     void *cpu_addr;
0060     int r;
0061 
0062     /* use gart window 0 */
0063     *gart_addr = adev->gmc.gart_start;
0064 
0065     num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
0066     num_bytes = npages * 8;
0067 
0068     r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
0069                      AMDGPU_IB_POOL_DELAYED, &job);
0070     if (r)
0071         return r;
0072 
0073     src_addr = num_dw * 4;
0074     src_addr += job->ibs[0].gpu_addr;
0075 
0076     dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
0077     amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
0078                 dst_addr, num_bytes, false);
0079 
0080     amdgpu_ring_pad_ib(ring, &job->ibs[0]);
0081     WARN_ON(job->ibs[0].length_dw > num_dw);
0082 
0083     pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
0084     pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
0085     if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
0086         pte_flags |= AMDGPU_PTE_WRITEABLE;
0087     pte_flags |= adev->gart.gart_pte_flags;
0088 
0089     cpu_addr = &job->ibs[0].ptr[num_dw];
0090 
0091     amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
0092     r = amdgpu_job_submit(job, &adev->mman.entity,
0093                   AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
0094     if (r)
0095         goto error_free;
0096 
0097     dma_fence_put(fence);
0098 
0099     return r;
0100 
0101 error_free:
0102     amdgpu_job_free(job);
0103     return r;
0104 }
0105 
0106 /**
0107  * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
0108  *
0109  * @adev: amdgpu device the sdma ring running
0110  * @sys: system DMA pointer to be copied
0111  * @vram: vram destination DMA pointer
0112  * @npages: number of pages to copy
0113  * @direction: enum MIGRATION_COPY_DIR
0114  * @mfence: output, sdma fence to signal after sdma is done
0115  *
0116  * ram address uses GART table continuous entries mapping to ram pages,
0117  * vram address uses direct mapping of vram pages, which must have npages
0118  * number of continuous pages.
0119  * GART update and sdma uses same buf copy function ring, sdma is splited to
0120  * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
0121  * the last sdma finish fence which is returned to check copy memory is done.
0122  *
0123  * Context: Process context, takes and releases gtt_window_lock
0124  *
0125  * Return:
0126  * 0 - OK, otherwise error code
0127  */
0128 
0129 static int
0130 svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
0131                  uint64_t *vram, uint64_t npages,
0132                  enum MIGRATION_COPY_DIR direction,
0133                  struct dma_fence **mfence)
0134 {
0135     const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
0136     struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
0137     uint64_t gart_s, gart_d;
0138     struct dma_fence *next;
0139     uint64_t size;
0140     int r;
0141 
0142     mutex_lock(&adev->mman.gtt_window_lock);
0143 
0144     while (npages) {
0145         size = min(GTT_MAX_PAGES, npages);
0146 
0147         if (direction == FROM_VRAM_TO_RAM) {
0148             gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
0149             r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
0150 
0151         } else if (direction == FROM_RAM_TO_VRAM) {
0152             r = svm_migrate_gart_map(ring, size, sys, &gart_s,
0153                          KFD_IOCTL_SVM_FLAG_GPU_RO);
0154             gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
0155         }
0156         if (r) {
0157             dev_err(adev->dev, "fail %d create gart mapping\n", r);
0158             goto out_unlock;
0159         }
0160 
0161         r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
0162                        NULL, &next, false, true, false);
0163         if (r) {
0164             dev_err(adev->dev, "fail %d to copy memory\n", r);
0165             goto out_unlock;
0166         }
0167 
0168         dma_fence_put(*mfence);
0169         *mfence = next;
0170         npages -= size;
0171         if (npages) {
0172             sys += size;
0173             vram += size;
0174         }
0175     }
0176 
0177 out_unlock:
0178     mutex_unlock(&adev->mman.gtt_window_lock);
0179 
0180     return r;
0181 }
0182 
0183 /**
0184  * svm_migrate_copy_done - wait for memory copy sdma is done
0185  *
0186  * @adev: amdgpu device the sdma memory copy is executing on
0187  * @mfence: migrate fence
0188  *
0189  * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
0190  * operations, this is the last sdma operation fence.
0191  *
0192  * Context: called after svm_migrate_copy_memory
0193  *
0194  * Return:
0195  * 0        - success
0196  * otherwise    - error code from dma fence signal
0197  */
0198 static int
0199 svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
0200 {
0201     int r = 0;
0202 
0203     if (mfence) {
0204         r = dma_fence_wait(mfence, false);
0205         dma_fence_put(mfence);
0206         pr_debug("sdma copy memory fence done\n");
0207     }
0208 
0209     return r;
0210 }
0211 
0212 unsigned long
0213 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
0214 {
0215     return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
0216 }
0217 
0218 static void
0219 svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
0220 {
0221     struct page *page;
0222 
0223     page = pfn_to_page(pfn);
0224     svm_range_bo_ref(prange->svm_bo);
0225     page->zone_device_data = prange->svm_bo;
0226     lock_page(page);
0227 }
0228 
0229 static void
0230 svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
0231 {
0232     struct page *page;
0233 
0234     page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
0235     unlock_page(page);
0236     put_page(page);
0237 }
0238 
0239 static unsigned long
0240 svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
0241 {
0242     unsigned long addr;
0243 
0244     addr = page_to_pfn(page) << PAGE_SHIFT;
0245     return (addr - adev->kfd.dev->pgmap.range.start);
0246 }
0247 
0248 static struct page *
0249 svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
0250 {
0251     struct page *page;
0252 
0253     page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
0254     if (page)
0255         lock_page(page);
0256 
0257     return page;
0258 }
0259 
0260 static void svm_migrate_put_sys_page(unsigned long addr)
0261 {
0262     struct page *page;
0263 
0264     page = pfn_to_page(addr >> PAGE_SHIFT);
0265     unlock_page(page);
0266     put_page(page);
0267 }
0268 
0269 static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
0270 {
0271     unsigned long cpages = 0;
0272     unsigned long i;
0273 
0274     for (i = 0; i < migrate->npages; i++) {
0275         if (migrate->src[i] & MIGRATE_PFN_VALID &&
0276             migrate->src[i] & MIGRATE_PFN_MIGRATE)
0277             cpages++;
0278     }
0279     return cpages;
0280 }
0281 
0282 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
0283 {
0284     unsigned long upages = 0;
0285     unsigned long i;
0286 
0287     for (i = 0; i < migrate->npages; i++) {
0288         if (migrate->src[i] & MIGRATE_PFN_VALID &&
0289             !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
0290             upages++;
0291     }
0292     return upages;
0293 }
0294 
0295 static int
0296 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
0297              struct migrate_vma *migrate, struct dma_fence **mfence,
0298              dma_addr_t *scratch)
0299 {
0300     uint64_t npages = migrate->npages;
0301     struct device *dev = adev->dev;
0302     struct amdgpu_res_cursor cursor;
0303     dma_addr_t *src;
0304     uint64_t *dst;
0305     uint64_t i, j;
0306     int r;
0307 
0308     pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
0309          prange->last);
0310 
0311     src = scratch;
0312     dst = (uint64_t *)(scratch + npages);
0313 
0314     r = svm_range_vram_node_new(adev, prange, true);
0315     if (r) {
0316         dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
0317         goto out;
0318     }
0319 
0320     amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
0321              npages << PAGE_SHIFT, &cursor);
0322     for (i = j = 0; i < npages; i++) {
0323         struct page *spage;
0324 
0325         spage = migrate_pfn_to_page(migrate->src[i]);
0326         if (spage && !is_zone_device_page(spage)) {
0327             dst[i] = cursor.start + (j << PAGE_SHIFT);
0328             migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
0329             svm_migrate_get_vram_page(prange, migrate->dst[i]);
0330             migrate->dst[i] = migrate_pfn(migrate->dst[i]);
0331             src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
0332                           DMA_TO_DEVICE);
0333             r = dma_mapping_error(dev, src[i]);
0334             if (r) {
0335                 dev_err(adev->dev, "%s: fail %d dma_map_page\n",
0336                     __func__, r);
0337                 goto out_free_vram_pages;
0338             }
0339         } else {
0340             if (j) {
0341                 r = svm_migrate_copy_memory_gart(
0342                         adev, src + i - j,
0343                         dst + i - j, j,
0344                         FROM_RAM_TO_VRAM,
0345                         mfence);
0346                 if (r)
0347                     goto out_free_vram_pages;
0348                 amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
0349                 j = 0;
0350             } else {
0351                 amdgpu_res_next(&cursor, PAGE_SIZE);
0352             }
0353             continue;
0354         }
0355 
0356         pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
0357                      src[i] >> PAGE_SHIFT, page_to_pfn(spage));
0358 
0359         if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
0360             r = svm_migrate_copy_memory_gart(adev, src + i - j,
0361                              dst + i - j, j + 1,
0362                              FROM_RAM_TO_VRAM,
0363                              mfence);
0364             if (r)
0365                 goto out_free_vram_pages;
0366             amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
0367             j = 0;
0368         } else {
0369             j++;
0370         }
0371     }
0372 
0373     r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
0374                      FROM_RAM_TO_VRAM, mfence);
0375 
0376 out_free_vram_pages:
0377     if (r) {
0378         pr_debug("failed %d to copy memory to vram\n", r);
0379         while (i--) {
0380             svm_migrate_put_vram_page(adev, dst[i]);
0381             migrate->dst[i] = 0;
0382         }
0383     }
0384 
0385 #ifdef DEBUG_FORCE_MIXED_DOMAINS
0386     for (i = 0, j = 0; i < npages; i += 4, j++) {
0387         if (j & 1)
0388             continue;
0389         svm_migrate_put_vram_page(adev, dst[i]);
0390         migrate->dst[i] = 0;
0391         svm_migrate_put_vram_page(adev, dst[i + 1]);
0392         migrate->dst[i + 1] = 0;
0393         svm_migrate_put_vram_page(adev, dst[i + 2]);
0394         migrate->dst[i + 2] = 0;
0395         svm_migrate_put_vram_page(adev, dst[i + 3]);
0396         migrate->dst[i + 3] = 0;
0397     }
0398 #endif
0399 out:
0400     return r;
0401 }
0402 
0403 static long
0404 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
0405             struct vm_area_struct *vma, uint64_t start,
0406             uint64_t end, uint32_t trigger)
0407 {
0408     struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
0409     uint64_t npages = (end - start) >> PAGE_SHIFT;
0410     struct kfd_process_device *pdd;
0411     struct dma_fence *mfence = NULL;
0412     struct migrate_vma migrate;
0413     unsigned long cpages = 0;
0414     dma_addr_t *scratch;
0415     void *buf;
0416     int r = -ENOMEM;
0417 
0418     memset(&migrate, 0, sizeof(migrate));
0419     migrate.vma = vma;
0420     migrate.start = start;
0421     migrate.end = end;
0422     migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
0423     migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
0424 
0425     buf = kvcalloc(npages,
0426                2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
0427                GFP_KERNEL);
0428     if (!buf)
0429         goto out;
0430 
0431     migrate.src = buf;
0432     migrate.dst = migrate.src + npages;
0433     scratch = (dma_addr_t *)(migrate.dst + npages);
0434 
0435     kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
0436                       start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0437                       0, adev->kfd.dev->id, prange->prefetch_loc,
0438                       prange->preferred_loc, trigger);
0439 
0440     r = migrate_vma_setup(&migrate);
0441     if (r) {
0442         dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
0443             __func__, r, prange->start, prange->last);
0444         goto out_free;
0445     }
0446 
0447     cpages = migrate.cpages;
0448     if (!cpages) {
0449         pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
0450              prange->start, prange->last);
0451         goto out_free;
0452     }
0453     if (cpages != npages)
0454         pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
0455              cpages, npages);
0456     else
0457         pr_debug("0x%lx pages migrated\n", cpages);
0458 
0459     r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
0460     migrate_vma_pages(&migrate);
0461 
0462     pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
0463         svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
0464 
0465     svm_migrate_copy_done(adev, mfence);
0466     migrate_vma_finalize(&migrate);
0467 
0468     kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
0469                     start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0470                     0, adev->kfd.dev->id, trigger);
0471 
0472     svm_range_dma_unmap(adev->dev, scratch, 0, npages);
0473     svm_range_free_dma_mappings(prange);
0474 
0475 out_free:
0476     kvfree(buf);
0477 out:
0478     if (!r && cpages) {
0479         pdd = svm_range_get_pdd_by_adev(prange, adev);
0480         if (pdd)
0481             WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
0482 
0483         return cpages;
0484     }
0485     return r;
0486 }
0487 
0488 /**
0489  * svm_migrate_ram_to_vram - migrate svm range from system to device
0490  * @prange: range structure
0491  * @best_loc: the device to migrate to
0492  * @mm: the process mm structure
0493  * @trigger: reason of migration
0494  *
0495  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
0496  *
0497  * Return:
0498  * 0 - OK, otherwise error code
0499  */
0500 static int
0501 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
0502             struct mm_struct *mm, uint32_t trigger)
0503 {
0504     unsigned long addr, start, end;
0505     struct vm_area_struct *vma;
0506     struct amdgpu_device *adev;
0507     unsigned long cpages = 0;
0508     long r = 0;
0509 
0510     if (prange->actual_loc == best_loc) {
0511         pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
0512              prange->svms, prange->start, prange->last, best_loc);
0513         return 0;
0514     }
0515 
0516     adev = svm_range_get_adev_by_id(prange, best_loc);
0517     if (!adev) {
0518         pr_debug("failed to get device by id 0x%x\n", best_loc);
0519         return -ENODEV;
0520     }
0521 
0522     pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
0523          prange->start, prange->last, best_loc);
0524 
0525     /* FIXME: workaround for page locking bug with invalid pages */
0526     svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
0527 
0528     start = prange->start << PAGE_SHIFT;
0529     end = (prange->last + 1) << PAGE_SHIFT;
0530 
0531     for (addr = start; addr < end;) {
0532         unsigned long next;
0533 
0534         vma = find_vma(mm, addr);
0535         if (!vma || addr < vma->vm_start)
0536             break;
0537 
0538         next = min(vma->vm_end, end);
0539         r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
0540         if (r < 0) {
0541             pr_debug("failed %ld to migrate\n", r);
0542             break;
0543         } else {
0544             cpages += r;
0545         }
0546         addr = next;
0547     }
0548 
0549     if (cpages)
0550         prange->actual_loc = best_loc;
0551 
0552     return r < 0 ? r : 0;
0553 }
0554 
0555 static void svm_migrate_page_free(struct page *page)
0556 {
0557     struct svm_range_bo *svm_bo = page->zone_device_data;
0558 
0559     if (svm_bo) {
0560         pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
0561         svm_range_bo_unref_async(svm_bo);
0562     }
0563 }
0564 
0565 static int
0566 svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
0567             struct migrate_vma *migrate, struct dma_fence **mfence,
0568             dma_addr_t *scratch, uint64_t npages)
0569 {
0570     struct device *dev = adev->dev;
0571     uint64_t *src;
0572     dma_addr_t *dst;
0573     struct page *dpage;
0574     uint64_t i = 0, j;
0575     uint64_t addr;
0576     int r = 0;
0577 
0578     pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
0579          prange->last);
0580 
0581     addr = prange->start << PAGE_SHIFT;
0582 
0583     src = (uint64_t *)(scratch + npages);
0584     dst = scratch;
0585 
0586     for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
0587         struct page *spage;
0588 
0589         spage = migrate_pfn_to_page(migrate->src[i]);
0590         if (!spage || !is_zone_device_page(spage)) {
0591             pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
0592                  prange->svms, prange->start, prange->last);
0593             if (j) {
0594                 r = svm_migrate_copy_memory_gart(adev, dst + i - j,
0595                                  src + i - j, j,
0596                                  FROM_VRAM_TO_RAM,
0597                                  mfence);
0598                 if (r)
0599                     goto out_oom;
0600                 j = 0;
0601             }
0602             continue;
0603         }
0604         src[i] = svm_migrate_addr(adev, spage);
0605         if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
0606             r = svm_migrate_copy_memory_gart(adev, dst + i - j,
0607                              src + i - j, j,
0608                              FROM_VRAM_TO_RAM,
0609                              mfence);
0610             if (r)
0611                 goto out_oom;
0612             j = 0;
0613         }
0614 
0615         dpage = svm_migrate_get_sys_page(migrate->vma, addr);
0616         if (!dpage) {
0617             pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
0618                  prange->svms, prange->start, prange->last);
0619             r = -ENOMEM;
0620             goto out_oom;
0621         }
0622 
0623         dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
0624         r = dma_mapping_error(dev, dst[i]);
0625         if (r) {
0626             dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
0627             goto out_oom;
0628         }
0629 
0630         pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
0631                      dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
0632 
0633         migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
0634         j++;
0635     }
0636 
0637     r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
0638                      FROM_VRAM_TO_RAM, mfence);
0639 
0640 out_oom:
0641     if (r) {
0642         pr_debug("failed %d copy to ram\n", r);
0643         while (i--) {
0644             svm_migrate_put_sys_page(dst[i]);
0645             migrate->dst[i] = 0;
0646         }
0647     }
0648 
0649     return r;
0650 }
0651 
0652 /**
0653  * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
0654  *
0655  * @adev: amdgpu device to migrate from
0656  * @prange: svm range structure
0657  * @vma: vm_area_struct that range [start, end] belongs to
0658  * @start: range start virtual address in pages
0659  * @end: range end virtual address in pages
0660  *
0661  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
0662  *
0663  * Return:
0664  *   0 - success with all pages migrated
0665  *   negative values - indicate error
0666  *   positive values - partial migration, number of pages not migrated
0667  */
0668 static long
0669 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
0670                struct vm_area_struct *vma, uint64_t start, uint64_t end,
0671                uint32_t trigger)
0672 {
0673     struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
0674     uint64_t npages = (end - start) >> PAGE_SHIFT;
0675     unsigned long upages = npages;
0676     unsigned long cpages = 0;
0677     struct kfd_process_device *pdd;
0678     struct dma_fence *mfence = NULL;
0679     struct migrate_vma migrate;
0680     dma_addr_t *scratch;
0681     void *buf;
0682     int r = -ENOMEM;
0683 
0684     memset(&migrate, 0, sizeof(migrate));
0685     migrate.vma = vma;
0686     migrate.start = start;
0687     migrate.end = end;
0688     migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
0689     if (adev->gmc.xgmi.connected_to_cpu)
0690         migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
0691     else
0692         migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
0693 
0694     buf = kvcalloc(npages,
0695                2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
0696                GFP_KERNEL);
0697     if (!buf)
0698         goto out;
0699 
0700     migrate.src = buf;
0701     migrate.dst = migrate.src + npages;
0702     scratch = (dma_addr_t *)(migrate.dst + npages);
0703 
0704     kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
0705                       start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0706                       adev->kfd.dev->id, 0, prange->prefetch_loc,
0707                       prange->preferred_loc, trigger);
0708 
0709     r = migrate_vma_setup(&migrate);
0710     if (r) {
0711         dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
0712             __func__, r, prange->start, prange->last);
0713         goto out_free;
0714     }
0715 
0716     cpages = migrate.cpages;
0717     if (!cpages) {
0718         pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
0719              prange->start, prange->last);
0720         upages = svm_migrate_unsuccessful_pages(&migrate);
0721         goto out_free;
0722     }
0723     if (cpages != npages)
0724         pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
0725              cpages, npages);
0726     else
0727         pr_debug("0x%lx pages migrated\n", cpages);
0728 
0729     r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
0730                     scratch, npages);
0731     migrate_vma_pages(&migrate);
0732 
0733     upages = svm_migrate_unsuccessful_pages(&migrate);
0734     pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
0735          upages, cpages, migrate.npages);
0736 
0737     svm_migrate_copy_done(adev, mfence);
0738     migrate_vma_finalize(&migrate);
0739 
0740     kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
0741                     start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0742                     adev->kfd.dev->id, 0, trigger);
0743 
0744     svm_range_dma_unmap(adev->dev, scratch, 0, npages);
0745 
0746 out_free:
0747     kvfree(buf);
0748 out:
0749     if (!r && cpages) {
0750         pdd = svm_range_get_pdd_by_adev(prange, adev);
0751         if (pdd)
0752             WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
0753     }
0754     return r ? r : upages;
0755 }
0756 
0757 /**
0758  * svm_migrate_vram_to_ram - migrate svm range from device to system
0759  * @prange: range structure
0760  * @mm: process mm, use current->mm if NULL
0761  * @trigger: reason of migration
0762  *
0763  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
0764  *
0765  * Return:
0766  * 0 - OK, otherwise error code
0767  */
0768 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
0769                 uint32_t trigger)
0770 {
0771     struct amdgpu_device *adev;
0772     struct vm_area_struct *vma;
0773     unsigned long addr;
0774     unsigned long start;
0775     unsigned long end;
0776     unsigned long upages = 0;
0777     long r = 0;
0778 
0779     if (!prange->actual_loc) {
0780         pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
0781              prange->start, prange->last);
0782         return 0;
0783     }
0784 
0785     adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
0786     if (!adev) {
0787         pr_debug("failed to get device by id 0x%x\n",
0788              prange->actual_loc);
0789         return -ENODEV;
0790     }
0791 
0792     pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
0793          prange->svms, prange, prange->start, prange->last,
0794          prange->actual_loc);
0795 
0796     start = prange->start << PAGE_SHIFT;
0797     end = (prange->last + 1) << PAGE_SHIFT;
0798 
0799     for (addr = start; addr < end;) {
0800         unsigned long next;
0801 
0802         vma = find_vma(mm, addr);
0803         if (!vma || addr < vma->vm_start) {
0804             pr_debug("failed to find vma for prange %p\n", prange);
0805             r = -EFAULT;
0806             break;
0807         }
0808 
0809         next = min(vma->vm_end, end);
0810         r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
0811         if (r < 0) {
0812             pr_debug("failed %ld to migrate prange %p\n", r, prange);
0813             break;
0814         } else {
0815             upages += r;
0816         }
0817         addr = next;
0818     }
0819 
0820     if (r >= 0 && !upages) {
0821         svm_range_vram_node_free(prange);
0822         prange->actual_loc = 0;
0823     }
0824 
0825     return r < 0 ? r : 0;
0826 }
0827 
0828 /**
0829  * svm_migrate_vram_to_vram - migrate svm range from device to device
0830  * @prange: range structure
0831  * @best_loc: the device to migrate to
0832  * @mm: process mm, use current->mm if NULL
0833  * @trigger: reason of migration
0834  *
0835  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
0836  *
0837  * Return:
0838  * 0 - OK, otherwise error code
0839  */
0840 static int
0841 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
0842              struct mm_struct *mm, uint32_t trigger)
0843 {
0844     int r, retries = 3;
0845 
0846     /*
0847      * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
0848      * system memory as migration bridge
0849      */
0850 
0851     pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
0852 
0853     do {
0854         r = svm_migrate_vram_to_ram(prange, mm, trigger);
0855         if (r)
0856             return r;
0857     } while (prange->actual_loc && --retries);
0858 
0859     if (prange->actual_loc)
0860         return -EDEADLK;
0861 
0862     return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
0863 }
0864 
0865 int
0866 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
0867             struct mm_struct *mm, uint32_t trigger)
0868 {
0869     if  (!prange->actual_loc)
0870         return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
0871     else
0872         return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
0873 
0874 }
0875 
0876 /**
0877  * svm_migrate_to_ram - CPU page fault handler
0878  * @vmf: CPU vm fault vma, address
0879  *
0880  * Context: vm fault handler, caller holds the mmap read lock
0881  *
0882  * Return:
0883  * 0 - OK
0884  * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
0885  */
0886 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
0887 {
0888     unsigned long addr = vmf->address;
0889     struct vm_area_struct *vma;
0890     enum svm_work_list_ops op;
0891     struct svm_range *parent;
0892     struct svm_range *prange;
0893     struct kfd_process *p;
0894     struct mm_struct *mm;
0895     int r = 0;
0896 
0897     vma = vmf->vma;
0898     mm = vma->vm_mm;
0899 
0900     p = kfd_lookup_process_by_mm(vma->vm_mm);
0901     if (!p) {
0902         pr_debug("failed find process at fault address 0x%lx\n", addr);
0903         return VM_FAULT_SIGBUS;
0904     }
0905     if (READ_ONCE(p->svms.faulting_task) == current) {
0906         pr_debug("skipping ram migration\n");
0907         kfd_unref_process(p);
0908         return 0;
0909     }
0910     addr >>= PAGE_SHIFT;
0911     pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
0912 
0913     mutex_lock(&p->svms.lock);
0914 
0915     prange = svm_range_from_addr(&p->svms, addr, &parent);
0916     if (!prange) {
0917         pr_debug("cannot find svm range at 0x%lx\n", addr);
0918         r = -EFAULT;
0919         goto out;
0920     }
0921 
0922     mutex_lock(&parent->migrate_mutex);
0923     if (prange != parent)
0924         mutex_lock_nested(&prange->migrate_mutex, 1);
0925 
0926     if (!prange->actual_loc)
0927         goto out_unlock_prange;
0928 
0929     svm_range_lock(parent);
0930     if (prange != parent)
0931         mutex_lock_nested(&prange->lock, 1);
0932     r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
0933     if (prange != parent)
0934         mutex_unlock(&prange->lock);
0935     svm_range_unlock(parent);
0936     if (r) {
0937         pr_debug("failed %d to split range by granularity\n", r);
0938         goto out_unlock_prange;
0939     }
0940 
0941     r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
0942     if (r)
0943         pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
0944              prange, prange->start, prange->last);
0945 
0946     /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
0947     if (p->xnack_enabled && parent == prange)
0948         op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
0949     else
0950         op = SVM_OP_UPDATE_RANGE_NOTIFIER;
0951     svm_range_add_list_work(&p->svms, parent, mm, op);
0952     schedule_deferred_list_work(&p->svms);
0953 
0954 out_unlock_prange:
0955     if (prange != parent)
0956         mutex_unlock(&prange->migrate_mutex);
0957     mutex_unlock(&parent->migrate_mutex);
0958 out:
0959     mutex_unlock(&p->svms.lock);
0960     kfd_unref_process(p);
0961 
0962     pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
0963 
0964     return r ? VM_FAULT_SIGBUS : 0;
0965 }
0966 
0967 static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
0968     .page_free      = svm_migrate_page_free,
0969     .migrate_to_ram     = svm_migrate_to_ram,
0970 };
0971 
0972 /* Each VRAM page uses sizeof(struct page) on system memory */
0973 #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
0974 
0975 int svm_migrate_init(struct amdgpu_device *adev)
0976 {
0977     struct kfd_dev *kfddev = adev->kfd.dev;
0978     struct dev_pagemap *pgmap;
0979     struct resource *res = NULL;
0980     unsigned long size;
0981     void *r;
0982 
0983     /* Page migration works on Vega10 or newer */
0984     if (!KFD_IS_SOC15(kfddev))
0985         return -EINVAL;
0986 
0987     pgmap = &kfddev->pgmap;
0988     memset(pgmap, 0, sizeof(*pgmap));
0989 
0990     /* TODO: register all vram to HMM for now.
0991      * should remove reserved size
0992      */
0993     size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
0994     if (adev->gmc.xgmi.connected_to_cpu) {
0995         pgmap->range.start = adev->gmc.aper_base;
0996         pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
0997         pgmap->type = MEMORY_DEVICE_COHERENT;
0998     } else {
0999         res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
1000         if (IS_ERR(res))
1001             return -ENOMEM;
1002         pgmap->range.start = res->start;
1003         pgmap->range.end = res->end;
1004         pgmap->type = MEMORY_DEVICE_PRIVATE;
1005     }
1006 
1007     pgmap->nr_range = 1;
1008     pgmap->ops = &svm_migrate_pgmap_ops;
1009     pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
1010     pgmap->flags = 0;
1011     /* Device manager releases device-specific resources, memory region and
1012      * pgmap when driver disconnects from device.
1013      */
1014     r = devm_memremap_pages(adev->dev, pgmap);
1015     if (IS_ERR(r)) {
1016         pr_err("failed to register HMM device memory\n");
1017         /* Disable SVM support capability */
1018         pgmap->type = 0;
1019         if (pgmap->type == MEMORY_DEVICE_PRIVATE)
1020             devm_release_mem_region(adev->dev, res->start,
1021                         res->end - res->start + 1);
1022         return PTR_ERR(r);
1023     }
1024 
1025     pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
1026          SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
1027 
1028     amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
1029 
1030     svm_range_set_max_pages(adev);
1031 
1032     pr_info("HMM registered %ldMB device memory\n", size >> 20);
1033 
1034     return 0;
1035 }