drm/nouveau/nouveau_svm.c

0001 /*
0002  * Copyright 2018 Red Hat Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  */
0022 #include "nouveau_svm.h"
0023 #include "nouveau_drv.h"
0024 #include "nouveau_chan.h"
0025 #include "nouveau_dmem.h"
0026
0027 #include <nvif/notify.h>
0028 #include <nvif/object.h>
0029 #include <nvif/vmm.h>
0030
0031 #include <nvif/class.h>
0032 #include <nvif/clb069.h>
0033 #include <nvif/ifc00d.h>
0034
0035 #include <linux/sched/mm.h>
0036 #include <linux/sort.h>
0037 #include <linux/hmm.h>
0038 #include <linux/memremap.h>
0039 #include <linux/rmap.h>
0040
0041 struct nouveau_svm {
0042     struct nouveau_drm *drm;
0043     struct mutex mutex;
0044     struct list_head inst;
0045
0046     struct nouveau_svm_fault_buffer {
0047         int id;
0048         struct nvif_object object;
0049         u32 entries;
0050         u32 getaddr;
0051         u32 putaddr;
0052         u32 get;
0053         u32 put;
0054         struct nvif_notify notify;
0055
0056         struct nouveau_svm_fault {
0057             u64 inst;
0058             u64 addr;
0059             u64 time;
0060             u32 engine;
0061             u8  gpc;
0062             u8  hub;
0063             u8  access;
0064             u8  client;
0065             u8  fault;
0066             struct nouveau_svmm *svmm;
0067         } **fault;
0068         int fault_nr;
0069     } buffer[1];
0070 };
0071
0072 #define FAULT_ACCESS_READ 0
0073 #define FAULT_ACCESS_WRITE 1
0074 #define FAULT_ACCESS_ATOMIC 2
0075 #define FAULT_ACCESS_PREFETCH 3
0076
0077 #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
0078 #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
0079
0080 struct nouveau_pfnmap_args {
0081     struct nvif_ioctl_v0 i;
0082     struct nvif_ioctl_mthd_v0 m;
0083     struct nvif_vmm_pfnmap_v0 p;
0084 };
0085
0086 struct nouveau_ivmm {
0087     struct nouveau_svmm *svmm;
0088     u64 inst;
0089     struct list_head head;
0090 };
0091
0092 static struct nouveau_ivmm *
0093 nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
0094 {
0095     struct nouveau_ivmm *ivmm;
0096     list_for_each_entry(ivmm, &svm->inst, head) {
0097         if (ivmm->inst == inst)
0098             return ivmm;
0099     }
0100     return NULL;
0101 }
0102
0103 #define SVMM_DBG(s,f,a...)                                                     \
0104     NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
0105 #define SVMM_ERR(s,f,a...)                                                     \
0106     NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
0107
0108 int
0109 nouveau_svmm_bind(struct drm_device *dev, void *data,
0110           struct drm_file *file_priv)
0111 {
0112     struct nouveau_cli *cli = nouveau_cli(file_priv);
0113     struct drm_nouveau_svm_bind *args = data;
0114     unsigned target, cmd, priority;
0115     unsigned long addr, end;
0116     struct mm_struct *mm;
0117
0118     args->va_start &= PAGE_MASK;
0119     args->va_end = ALIGN(args->va_end, PAGE_SIZE);
0120
0121     /* Sanity check arguments */
0122     if (args->reserved0 || args->reserved1)
0123         return -EINVAL;
0124     if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
0125         return -EINVAL;
0126     if (args->va_start >= args->va_end)
0127         return -EINVAL;
0128
0129     cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
0130     cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
0131     switch (cmd) {
0132     case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
0133         break;
0134     default:
0135         return -EINVAL;
0136     }
0137
0138     priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
0139     priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
0140
0141     /* FIXME support CPU target ie all target value < GPU_VRAM */
0142     target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
0143     target &= NOUVEAU_SVM_BIND_TARGET_MASK;
0144     switch (target) {
0145     case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
0146         break;
0147     default:
0148         return -EINVAL;
0149     }
0150
0151     /*
0152      * FIXME: For now refuse non 0 stride, we need to change the migrate
0153      * kernel function to handle stride to avoid to create a mess within
0154      * each device driver.
0155      */
0156     if (args->stride)
0157         return -EINVAL;
0158
0159     /*
0160      * Ok we are ask to do something sane, for now we only support migrate
0161      * commands but we will add things like memory policy (what to do on
0162      * page fault) and maybe some other commands.
0163      */
0164
0165     mm = get_task_mm(current);
0166     if (!mm) {
0167         return -EINVAL;
0168     }
0169     mmap_read_lock(mm);
0170
0171     if (!cli->svm.svmm) {
0172         mmap_read_unlock(mm);
0173         mmput(mm);
0174         return -EINVAL;
0175     }
0176
0177     for (addr = args->va_start, end = args->va_end; addr < end;) {
0178         struct vm_area_struct *vma;
0179         unsigned long next;
0180
0181         vma = find_vma_intersection(mm, addr, end);
0182         if (!vma)
0183             break;
0184
0185         addr = max(addr, vma->vm_start);
0186         next = min(vma->vm_end, end);
0187         /* This is a best effort so we ignore errors */
0188         nouveau_dmem_migrate_vma(cli->drm, cli->svm.svmm, vma, addr,
0189                      next);
0190         addr = next;
0191     }
0192
0193     /*
0194      * FIXME Return the number of page we have migrated, again we need to
0195      * update the migrate API to return that information so that we can
0196      * report it to user space.
0197      */
0198     args->result = 0;
0199
0200     mmap_read_unlock(mm);
0201     mmput(mm);
0202
0203     return 0;
0204 }
0205
0206 /* Unlink channel instance from SVMM. */
0207 void
0208 nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
0209 {
0210     struct nouveau_ivmm *ivmm;
0211     if (svmm) {
0212         mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
0213         ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
0214         if (ivmm) {
0215             list_del(&ivmm->head);
0216             kfree(ivmm);
0217         }
0218         mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
0219     }
0220 }
0221
0222 /* Link channel instance to SVMM. */
0223 int
0224 nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
0225 {
0226     struct nouveau_ivmm *ivmm;
0227     if (svmm) {
0228         if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
0229             return -ENOMEM;
0230         ivmm->svmm = svmm;
0231         ivmm->inst = inst;
0232
0233         mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
0234         list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
0235         mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
0236     }
0237     return 0;
0238 }
0239
0240 /* Invalidate SVMM address-range on GPU. */
0241 void
0242 nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
0243 {
0244     if (limit > start) {
0245         nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
0246                  &(struct nvif_vmm_pfnclr_v0) {
0247                     .addr = start,
0248                     .size = limit - start,
0249                  }, sizeof(struct nvif_vmm_pfnclr_v0));
0250     }
0251 }
0252
0253 static int
0254 nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
0255                     const struct mmu_notifier_range *update)
0256 {
0257     struct nouveau_svmm *svmm =
0258         container_of(mn, struct nouveau_svmm, notifier);
0259     unsigned long start = update->start;
0260     unsigned long limit = update->end;
0261
0262     if (!mmu_notifier_range_blockable(update))
0263         return -EAGAIN;
0264
0265     SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
0266
0267     mutex_lock(&svmm->mutex);
0268     if (unlikely(!svmm->vmm))
0269         goto out;
0270
0271     /*
0272      * Ignore invalidation callbacks for device private pages since
0273      * the invalidation is handled as part of the migration process.
0274      */
0275     if (update->event == MMU_NOTIFY_MIGRATE &&
0276         update->owner == svmm->vmm->cli->drm->dev)
0277         goto out;
0278
0279     if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
0280         if (start < svmm->unmanaged.start) {
0281             nouveau_svmm_invalidate(svmm, start,
0282                         svmm->unmanaged.limit);
0283         }
0284         start = svmm->unmanaged.limit;
0285     }
0286
0287     nouveau_svmm_invalidate(svmm, start, limit);
0288
0289 out:
0290     mutex_unlock(&svmm->mutex);
0291     return 0;
0292 }
0293
0294 static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
0295 {
0296     kfree(container_of(mn, struct nouveau_svmm, notifier));
0297 }
0298
0299 static const struct mmu_notifier_ops nouveau_mn_ops = {
0300     .invalidate_range_start = nouveau_svmm_invalidate_range_start,
0301     .free_notifier = nouveau_svmm_free_notifier,
0302 };
0303
0304 void
0305 nouveau_svmm_fini(struct nouveau_svmm **psvmm)
0306 {
0307     struct nouveau_svmm *svmm = *psvmm;
0308     if (svmm) {
0309         mutex_lock(&svmm->mutex);
0310         svmm->vmm = NULL;
0311         mutex_unlock(&svmm->mutex);
0312         mmu_notifier_put(&svmm->notifier);
0313         *psvmm = NULL;
0314     }
0315 }
0316
0317 int
0318 nouveau_svmm_init(struct drm_device *dev, void *data,
0319           struct drm_file *file_priv)
0320 {
0321     struct nouveau_cli *cli = nouveau_cli(file_priv);
0322     struct nouveau_svmm *svmm;
0323     struct drm_nouveau_svm_init *args = data;
0324     int ret;
0325
0326     /* We need to fail if svm is disabled */
0327     if (!cli->drm->svm)
0328         return -ENOSYS;
0329
0330     /* Allocate tracking for SVM-enabled VMM. */
0331     if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
0332         return -ENOMEM;
0333     svmm->vmm = &cli->svm;
0334     svmm->unmanaged.start = args->unmanaged_addr;
0335     svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
0336     mutex_init(&svmm->mutex);
0337
0338     /* Check that SVM isn't already enabled for the client. */
0339     mutex_lock(&cli->mutex);
0340     if (cli->svm.cli) {
0341         ret = -EBUSY;
0342         goto out_free;
0343     }
0344
0345     /* Allocate a new GPU VMM that can support SVM (managed by the
0346      * client, with replayable faults enabled).
0347      *
0348      * All future channel/memory allocations will make use of this
0349      * VMM instead of the standard one.
0350      */
0351     ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
0352                 cli->vmm.vmm.object.oclass, true,
0353                 args->unmanaged_addr, args->unmanaged_size,
0354                 &(struct gp100_vmm_v0) {
0355                 .fault_replay = true,
0356                 }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
0357     if (ret)
0358         goto out_free;
0359
0360     mmap_write_lock(current->mm);
0361     svmm->notifier.ops = &nouveau_mn_ops;
0362     ret = __mmu_notifier_register(&svmm->notifier, current->mm);
0363     if (ret)
0364         goto out_mm_unlock;
0365     /* Note, ownership of svmm transfers to mmu_notifier */
0366
0367     cli->svm.svmm = svmm;
0368     cli->svm.cli = cli;
0369     mmap_write_unlock(current->mm);
0370     mutex_unlock(&cli->mutex);
0371     return 0;
0372
0373 out_mm_unlock:
0374     mmap_write_unlock(current->mm);
0375 out_free:
0376     mutex_unlock(&cli->mutex);
0377     kfree(svmm);
0378     return ret;
0379 }
0380
0381 /* Issue fault replay for GPU to retry accesses that faulted previously. */
0382 static void
0383 nouveau_svm_fault_replay(struct nouveau_svm *svm)
0384 {
0385     SVM_DBG(svm, "replay");
0386     WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
0387                  GP100_VMM_VN_FAULT_REPLAY,
0388                  &(struct gp100_vmm_fault_replay_vn) {},
0389                  sizeof(struct gp100_vmm_fault_replay_vn)));
0390 }
0391
0392 /* Cancel a replayable fault that could not be handled.
0393  *
0394  * Cancelling the fault will trigger recovery to reset the engine
0395  * and kill the offending channel (ie. GPU SIGSEGV).
0396  */
0397 static void
0398 nouveau_svm_fault_cancel(struct nouveau_svm *svm,
0399              u64 inst, u8 hub, u8 gpc, u8 client)
0400 {
0401     SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
0402     WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
0403                  GP100_VMM_VN_FAULT_CANCEL,
0404                  &(struct gp100_vmm_fault_cancel_v0) {
0405                     .hub = hub,
0406                     .gpc = gpc,
0407                     .client = client,
0408                     .inst = inst,
0409                  }, sizeof(struct gp100_vmm_fault_cancel_v0)));
0410 }
0411
0412 static void
0413 nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
0414                    struct nouveau_svm_fault *fault)
0415 {
0416     nouveau_svm_fault_cancel(svm, fault->inst,
0417                       fault->hub,
0418                       fault->gpc,
0419                       fault->client);
0420 }
0421
0422 static int
0423 nouveau_svm_fault_priority(u8 fault)
0424 {
0425     switch (fault) {
0426     case FAULT_ACCESS_PREFETCH:
0427         return 0;
0428     case FAULT_ACCESS_READ:
0429         return 1;
0430     case FAULT_ACCESS_WRITE:
0431         return 2;
0432     case FAULT_ACCESS_ATOMIC:
0433         return 3;
0434     default:
0435         WARN_ON_ONCE(1);
0436         return -1;
0437     }
0438 }
0439
0440 static int
0441 nouveau_svm_fault_cmp(const void *a, const void *b)
0442 {
0443     const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
0444     const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
0445     int ret;
0446     if ((ret = (s64)fa->inst - fb->inst))
0447         return ret;
0448     if ((ret = (s64)fa->addr - fb->addr))
0449         return ret;
0450     return nouveau_svm_fault_priority(fa->access) -
0451         nouveau_svm_fault_priority(fb->access);
0452 }
0453
0454 static void
0455 nouveau_svm_fault_cache(struct nouveau_svm *svm,
0456             struct nouveau_svm_fault_buffer *buffer, u32 offset)
0457 {
0458     struct nvif_object *memory = &buffer->object;
0459     const u32 instlo = nvif_rd32(memory, offset + 0x00);
0460     const u32 insthi = nvif_rd32(memory, offset + 0x04);
0461     const u32 addrlo = nvif_rd32(memory, offset + 0x08);
0462     const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
0463     const u32 timelo = nvif_rd32(memory, offset + 0x10);
0464     const u32 timehi = nvif_rd32(memory, offset + 0x14);
0465     const u32 engine = nvif_rd32(memory, offset + 0x18);
0466     const u32   info = nvif_rd32(memory, offset + 0x1c);
0467     const u64   inst = (u64)insthi << 32 | instlo;
0468     const u8     gpc = (info & 0x1f000000) >> 24;
0469     const u8     hub = (info & 0x00100000) >> 20;
0470     const u8  client = (info & 0x00007f00) >> 8;
0471     struct nouveau_svm_fault *fault;
0472
0473     //XXX: i think we're supposed to spin waiting */
0474     if (WARN_ON(!(info & 0x80000000)))
0475         return;
0476
0477     nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
0478
0479     if (!buffer->fault[buffer->fault_nr]) {
0480         fault = kmalloc(sizeof(*fault), GFP_KERNEL);
0481         if (WARN_ON(!fault)) {
0482             nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
0483             return;
0484         }
0485         buffer->fault[buffer->fault_nr] = fault;
0486     }
0487
0488     fault = buffer->fault[buffer->fault_nr++];
0489     fault->inst   = inst;
0490     fault->addr   = (u64)addrhi << 32 | addrlo;
0491     fault->time   = (u64)timehi << 32 | timelo;
0492     fault->engine = engine;
0493     fault->gpc    = gpc;
0494     fault->hub    = hub;
0495     fault->access = (info & 0x000f0000) >> 16;
0496     fault->client = client;
0497     fault->fault  = (info & 0x0000001f);
0498
0499     SVM_DBG(svm, "fault %016llx %016llx %02x",
0500         fault->inst, fault->addr, fault->access);
0501 }
0502
0503 struct svm_notifier {
0504     struct mmu_interval_notifier notifier;
0505     struct nouveau_svmm *svmm;
0506 };
0507
0508 static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
0509                      const struct mmu_notifier_range *range,
0510                      unsigned long cur_seq)
0511 {
0512     struct svm_notifier *sn =
0513         container_of(mni, struct svm_notifier, notifier);
0514
0515     if (range->event == MMU_NOTIFY_EXCLUSIVE &&
0516         range->owner == sn->svmm->vmm->cli->drm->dev)
0517         return true;
0518
0519     /*
0520      * serializes the update to mni->invalidate_seq done by caller and
0521      * prevents invalidation of the PTE from progressing while HW is being
0522      * programmed. This is very hacky and only works because the normal
0523      * notifier that does invalidation is always called after the range
0524      * notifier.
0525      */
0526     if (mmu_notifier_range_blockable(range))
0527         mutex_lock(&sn->svmm->mutex);
0528     else if (!mutex_trylock(&sn->svmm->mutex))
0529         return false;
0530     mmu_interval_set_seq(mni, cur_seq);
0531     mutex_unlock(&sn->svmm->mutex);
0532     return true;
0533 }
0534
0535 static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
0536     .invalidate = nouveau_svm_range_invalidate,
0537 };
0538
0539 static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
0540                     struct hmm_range *range,
0541                     struct nouveau_pfnmap_args *args)
0542 {
0543     struct page *page;
0544
0545     /*
0546      * The address prepared here is passed through nvif_object_ioctl()
0547      * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
0548      *
0549      * This is all just encoding the internal hmm representation into a
0550      * different nouveau internal representation.
0551      */
0552     if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
0553         args->p.phys[0] = 0;
0554         return;
0555     }
0556
0557     page = hmm_pfn_to_page(range->hmm_pfns[0]);
0558     /*
0559      * Only map compound pages to the GPU if the CPU is also mapping the
0560      * page as a compound page. Otherwise, the PTE protections might not be
0561      * consistent (e.g., CPU only maps part of a compound page).
0562      * Note that the underlying page might still be larger than the
0563      * CPU mapping (e.g., a PUD sized compound page partially mapped with
0564      * a PMD sized page table entry).
0565      */
0566     if (hmm_pfn_to_map_order(range->hmm_pfns[0])) {
0567         unsigned long addr = args->p.addr;
0568
0569         args->p.page = hmm_pfn_to_map_order(range->hmm_pfns[0]) +
0570                 PAGE_SHIFT;
0571         args->p.size = 1UL << args->p.page;
0572         args->p.addr &= ~(args->p.size - 1);
0573         page -= (addr - args->p.addr) >> PAGE_SHIFT;
0574     }
0575     if (is_device_private_page(page))
0576         args->p.phys[0] = nouveau_dmem_page_addr(page) |
0577                 NVIF_VMM_PFNMAP_V0_V |
0578                 NVIF_VMM_PFNMAP_V0_VRAM;
0579     else
0580         args->p.phys[0] = page_to_phys(page) |
0581                 NVIF_VMM_PFNMAP_V0_V |
0582                 NVIF_VMM_PFNMAP_V0_HOST;
0583     if (range->hmm_pfns[0] & HMM_PFN_WRITE)
0584         args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W;
0585 }
0586
0587 static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
0588                    struct nouveau_drm *drm,
0589                    struct nouveau_pfnmap_args *args, u32 size,
0590                    struct svm_notifier *notifier)
0591 {
0592     unsigned long timeout =
0593         jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
0594     struct mm_struct *mm = svmm->notifier.mm;
0595     struct page *page;
0596     unsigned long start = args->p.addr;
0597     unsigned long notifier_seq;
0598     int ret = 0;
0599
0600     ret = mmu_interval_notifier_insert(&notifier->notifier, mm,
0601                     args->p.addr, args->p.size,
0602                     &nouveau_svm_mni_ops);
0603     if (ret)
0604         return ret;
0605
0606     while (true) {
0607         if (time_after(jiffies, timeout)) {
0608             ret = -EBUSY;
0609             goto out;
0610         }
0611
0612         notifier_seq = mmu_interval_read_begin(&notifier->notifier);
0613         mmap_read_lock(mm);
0614         ret = make_device_exclusive_range(mm, start, start + PAGE_SIZE,
0615                         &page, drm->dev);
0616         mmap_read_unlock(mm);
0617         if (ret <= 0 || !page) {
0618             ret = -EINVAL;
0619             goto out;
0620         }
0621
0622         mutex_lock(&svmm->mutex);
0623         if (!mmu_interval_read_retry(&notifier->notifier,
0624                          notifier_seq))
0625             break;
0626         mutex_unlock(&svmm->mutex);
0627     }
0628
0629     /* Map the page on the GPU. */
0630     args->p.page = 12;
0631     args->p.size = PAGE_SIZE;
0632     args->p.addr = start;
0633     args->p.phys[0] = page_to_phys(page) |
0634         NVIF_VMM_PFNMAP_V0_V |
0635         NVIF_VMM_PFNMAP_V0_W |
0636         NVIF_VMM_PFNMAP_V0_A |
0637         NVIF_VMM_PFNMAP_V0_HOST;
0638
0639     ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
0640     mutex_unlock(&svmm->mutex);
0641
0642     unlock_page(page);
0643     put_page(page);
0644
0645 out:
0646     mmu_interval_notifier_remove(&notifier->notifier);
0647     return ret;
0648 }
0649
0650 static int nouveau_range_fault(struct nouveau_svmm *svmm,
0651                    struct nouveau_drm *drm,
0652                    struct nouveau_pfnmap_args *args, u32 size,
0653                    unsigned long hmm_flags,
0654                    struct svm_notifier *notifier)
0655 {
0656     unsigned long timeout =
0657         jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
0658     /* Have HMM fault pages within the fault window to the GPU. */
0659     unsigned long hmm_pfns[1];
0660     struct hmm_range range = {
0661         .notifier = &notifier->notifier,
0662         .default_flags = hmm_flags,
0663         .hmm_pfns = hmm_pfns,
0664         .dev_private_owner = drm->dev,
0665     };
0666     struct mm_struct *mm = svmm->notifier.mm;
0667     int ret;
0668
0669     ret = mmu_interval_notifier_insert(&notifier->notifier, mm,
0670                     args->p.addr, args->p.size,
0671                     &nouveau_svm_mni_ops);
0672     if (ret)
0673         return ret;
0674
0675     range.start = notifier->notifier.interval_tree.start;
0676     range.end = notifier->notifier.interval_tree.last + 1;
0677
0678     while (true) {
0679         if (time_after(jiffies, timeout)) {
0680             ret = -EBUSY;
0681             goto out;
0682         }
0683
0684         range.notifier_seq = mmu_interval_read_begin(range.notifier);
0685         mmap_read_lock(mm);
0686         ret = hmm_range_fault(&range);
0687         mmap_read_unlock(mm);
0688         if (ret) {
0689             if (ret == -EBUSY)
0690                 continue;
0691             goto out;
0692         }
0693
0694         mutex_lock(&svmm->mutex);
0695         if (mmu_interval_read_retry(range.notifier,
0696                         range.notifier_seq)) {
0697             mutex_unlock(&svmm->mutex);
0698             continue;
0699         }
0700         break;
0701     }
0702
0703     nouveau_hmm_convert_pfn(drm, &range, args);
0704
0705     ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
0706     mutex_unlock(&svmm->mutex);
0707
0708 out:
0709     mmu_interval_notifier_remove(&notifier->notifier);
0710
0711     return ret;
0712 }
0713
0714 static int
0715 nouveau_svm_fault(struct nvif_notify *notify)
0716 {
0717     struct nouveau_svm_fault_buffer *buffer =
0718         container_of(notify, typeof(*buffer), notify);
0719     struct nouveau_svm *svm =
0720         container_of(buffer, typeof(*svm), buffer[buffer->id]);
0721     struct nvif_object *device = &svm->drm->client.device.object;
0722     struct nouveau_svmm *svmm;
0723     struct {
0724         struct nouveau_pfnmap_args i;
0725         u64 phys[1];
0726     } args;
0727     unsigned long hmm_flags;
0728     u64 inst, start, limit;
0729     int fi, fn;
0730     int replay = 0, atomic = 0, ret;
0731
0732     /* Parse available fault buffer entries into a cache, and update
0733      * the GET pointer so HW can reuse the entries.
0734      */
0735     SVM_DBG(svm, "fault handler");
0736     if (buffer->get == buffer->put) {
0737         buffer->put = nvif_rd32(device, buffer->putaddr);
0738         buffer->get = nvif_rd32(device, buffer->getaddr);
0739         if (buffer->get == buffer->put)
0740             return NVIF_NOTIFY_KEEP;
0741     }
0742     buffer->fault_nr = 0;
0743
0744     SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
0745     while (buffer->get != buffer->put) {
0746         nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
0747         if (++buffer->get == buffer->entries)
0748             buffer->get = 0;
0749     }
0750     nvif_wr32(device, buffer->getaddr, buffer->get);
0751     SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
0752
0753     /* Sort parsed faults by instance pointer to prevent unnecessary
0754      * instance to SVMM translations, followed by address and access
0755      * type to reduce the amount of work when handling the faults.
0756      */
0757     sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
0758          nouveau_svm_fault_cmp, NULL);
0759
0760     /* Lookup SVMM structure for each unique instance pointer. */
0761     mutex_lock(&svm->mutex);
0762     for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
0763         if (!svmm || buffer->fault[fi]->inst != inst) {
0764             struct nouveau_ivmm *ivmm =
0765                 nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
0766             svmm = ivmm ? ivmm->svmm : NULL;
0767             inst = buffer->fault[fi]->inst;
0768             SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
0769         }
0770         buffer->fault[fi]->svmm = svmm;
0771     }
0772     mutex_unlock(&svm->mutex);
0773
0774     /* Process list of faults. */
0775     args.i.i.version = 0;
0776     args.i.i.type = NVIF_IOCTL_V0_MTHD;
0777     args.i.m.version = 0;
0778     args.i.m.method = NVIF_VMM_V0_PFNMAP;
0779     args.i.p.version = 0;
0780
0781     for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
0782         struct svm_notifier notifier;
0783         struct mm_struct *mm;
0784
0785         /* Cancel any faults from non-SVM channels. */
0786         if (!(svmm = buffer->fault[fi]->svmm)) {
0787             nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
0788             continue;
0789         }
0790         SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
0791
0792         /* We try and group handling of faults within a small
0793          * window into a single update.
0794          */
0795         start = buffer->fault[fi]->addr;
0796         limit = start + PAGE_SIZE;
0797         if (start < svmm->unmanaged.limit)
0798             limit = min_t(u64, limit, svmm->unmanaged.start);
0799
0800         /*
0801          * Prepare the GPU-side update of all pages within the
0802          * fault window, determining required pages and access
0803          * permissions based on pending faults.
0804          */
0805         args.i.p.addr = start;
0806         args.i.p.page = PAGE_SHIFT;
0807         args.i.p.size = PAGE_SIZE;
0808         /*
0809          * Determine required permissions based on GPU fault
0810          * access flags.
0811          */
0812         switch (buffer->fault[fi]->access) {
0813         case 0: /* READ. */
0814             hmm_flags = HMM_PFN_REQ_FAULT;
0815             break;
0816         case 2: /* ATOMIC. */
0817             atomic = true;
0818             break;
0819         case 3: /* PREFETCH. */
0820             hmm_flags = 0;
0821             break;
0822         default:
0823             hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
0824             break;
0825         }
0826
0827         mm = svmm->notifier.mm;
0828         if (!mmget_not_zero(mm)) {
0829             nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
0830             continue;
0831         }
0832
0833         notifier.svmm = svmm;
0834         if (atomic)
0835             ret = nouveau_atomic_range_fault(svmm, svm->drm,
0836                              &args.i, sizeof(args),
0837                              &notifier);
0838         else
0839             ret = nouveau_range_fault(svmm, svm->drm, &args.i,
0840                           sizeof(args), hmm_flags,
0841                           &notifier);
0842         mmput(mm);
0843
0844         limit = args.i.p.addr + args.i.p.size;
0845         for (fn = fi; ++fn < buffer->fault_nr; ) {
0846             /* It's okay to skip over duplicate addresses from the
0847              * same SVMM as faults are ordered by access type such
0848              * that only the first one needs to be handled.
0849              *
0850              * ie. WRITE faults appear first, thus any handling of
0851              * pending READ faults will already be satisfied.
0852              * But if a large page is mapped, make sure subsequent
0853              * fault addresses have sufficient access permission.
0854              */
0855             if (buffer->fault[fn]->svmm != svmm ||
0856                 buffer->fault[fn]->addr >= limit ||
0857                 (buffer->fault[fi]->access == FAULT_ACCESS_READ &&
0858                  !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) ||
0859                 (buffer->fault[fi]->access != FAULT_ACCESS_READ &&
0860                  buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
0861                  !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) ||
0862                 (buffer->fault[fi]->access != FAULT_ACCESS_READ &&
0863                  buffer->fault[fi]->access != FAULT_ACCESS_WRITE &&
0864                  buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
0865                  !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A)))
0866                 break;
0867         }
0868
0869         /* If handling failed completely, cancel all faults. */
0870         if (ret) {
0871             while (fi < fn) {
0872                 struct nouveau_svm_fault *fault =
0873                     buffer->fault[fi++];
0874
0875                 nouveau_svm_fault_cancel_fault(svm, fault);
0876             }
0877         } else
0878             replay++;
0879     }
0880
0881     /* Issue fault replay to the GPU. */
0882     if (replay)
0883         nouveau_svm_fault_replay(svm);
0884     return NVIF_NOTIFY_KEEP;
0885 }
0886
0887 static struct nouveau_pfnmap_args *
0888 nouveau_pfns_to_args(void *pfns)
0889 {
0890     return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
0891 }
0892
0893 u64 *
0894 nouveau_pfns_alloc(unsigned long npages)
0895 {
0896     struct nouveau_pfnmap_args *args;
0897
0898     args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
0899     if (!args)
0900         return NULL;
0901
0902     args->i.type = NVIF_IOCTL_V0_MTHD;
0903     args->m.method = NVIF_VMM_V0_PFNMAP;
0904     args->p.page = PAGE_SHIFT;
0905
0906     return args->p.phys;
0907 }
0908
0909 void
0910 nouveau_pfns_free(u64 *pfns)
0911 {
0912     struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
0913
0914     kfree(args);
0915 }
0916
0917 void
0918 nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
0919          unsigned long addr, u64 *pfns, unsigned long npages)
0920 {
0921     struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
0922     int ret;
0923
0924     args->p.addr = addr;
0925     args->p.size = npages << PAGE_SHIFT;
0926
0927     mutex_lock(&svmm->mutex);
0928
0929     ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args,
0930                 struct_size(args, p.phys, npages), NULL);
0931
0932     mutex_unlock(&svmm->mutex);
0933 }
0934
0935 static void
0936 nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
0937 {
0938     struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
0939     nvif_notify_put(&buffer->notify);
0940 }
0941
0942 static int
0943 nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
0944 {
0945     struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
0946     struct nvif_object *device = &svm->drm->client.device.object;
0947     buffer->get = nvif_rd32(device, buffer->getaddr);
0948     buffer->put = nvif_rd32(device, buffer->putaddr);
0949     SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
0950     return nvif_notify_get(&buffer->notify);
0951 }
0952
0953 static void
0954 nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
0955 {
0956     struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
0957     int i;
0958
0959     if (buffer->fault) {
0960         for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
0961             kfree(buffer->fault[i]);
0962         kvfree(buffer->fault);
0963     }
0964
0965     nouveau_svm_fault_buffer_fini(svm, id);
0966
0967     nvif_notify_dtor(&buffer->notify);
0968     nvif_object_dtor(&buffer->object);
0969 }
0970
0971 static int
0972 nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
0973 {
0974     struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
0975     struct nouveau_drm *drm = svm->drm;
0976     struct nvif_object *device = &drm->client.device.object;
0977     struct nvif_clb069_v0 args = {};
0978     int ret;
0979
0980     buffer->id = id;
0981
0982     ret = nvif_object_ctor(device, "svmFaultBuffer", 0, oclass, &args,
0983                    sizeof(args), &buffer->object);
0984     if (ret < 0) {
0985         SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
0986         return ret;
0987     }
0988
0989     nvif_object_map(&buffer->object, NULL, 0);
0990     buffer->entries = args.entries;
0991     buffer->getaddr = args.get;
0992     buffer->putaddr = args.put;
0993
0994     ret = nvif_notify_ctor(&buffer->object, "svmFault", nouveau_svm_fault,
0995                    true, NVB069_V0_NTFY_FAULT, NULL, 0, 0,
0996                    &buffer->notify);
0997     if (ret)
0998         return ret;
0999
1000     buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
1001     if (!buffer->fault)
1002         return -ENOMEM;
1003
1004     return nouveau_svm_fault_buffer_init(svm, id);
1005 }
1006
1007 void
1008 nouveau_svm_resume(struct nouveau_drm *drm)
1009 {
1010     struct nouveau_svm *svm = drm->svm;
1011     if (svm)
1012         nouveau_svm_fault_buffer_init(svm, 0);
1013 }
1014
1015 void
1016 nouveau_svm_suspend(struct nouveau_drm *drm)
1017 {
1018     struct nouveau_svm *svm = drm->svm;
1019     if (svm)
1020         nouveau_svm_fault_buffer_fini(svm, 0);
1021 }
1022
1023 void
1024 nouveau_svm_fini(struct nouveau_drm *drm)
1025 {
1026     struct nouveau_svm *svm = drm->svm;
1027     if (svm) {
1028         nouveau_svm_fault_buffer_dtor(svm, 0);
1029         kfree(drm->svm);
1030         drm->svm = NULL;
1031     }
1032 }
1033
1034 void
1035 nouveau_svm_init(struct nouveau_drm *drm)
1036 {
1037     static const struct nvif_mclass buffers[] = {
1038         {   VOLTA_FAULT_BUFFER_A, 0 },
1039         { MAXWELL_FAULT_BUFFER_A, 0 },
1040         {}
1041     };
1042     struct nouveau_svm *svm;
1043     int ret;
1044
1045     /* Disable on Volta and newer until channel recovery is fixed,
1046      * otherwise clients will have a trivial way to trash the GPU
1047      * for everyone.
1048      */
1049     if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
1050         return;
1051
1052     if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
1053         return;
1054
1055     drm->svm->drm = drm;
1056     mutex_init(&drm->svm->mutex);
1057     INIT_LIST_HEAD(&drm->svm->inst);
1058
1059     ret = nvif_mclass(&drm->client.device.object, buffers);
1060     if (ret < 0) {
1061         SVM_DBG(svm, "No supported fault buffer class");
1062         nouveau_svm_fini(drm);
1063         return;
1064     }
1065
1066     ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
1067     if (ret) {
1068         nouveau_svm_fini(drm);
1069         return;
1070     }
1071
1072     SVM_DBG(svm, "Initialised");
1073 }