Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
0004  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
0005  * Copyright 2019 Marvell. All rights reserved.
0006  */
0007 #include <linux/xarray.h>
0008 #include "uverbs.h"
0009 #include "core_priv.h"
0010 
0011 /**
0012  * rdma_umap_priv_init() - Initialize the private data of a vma
0013  *
0014  * @priv: The already allocated private data
0015  * @vma: The vm area struct that needs private data
0016  * @entry: entry into the mmap_xa that needs to be linked with
0017  *       this vma
0018  *
0019  * Each time we map IO memory into user space this keeps track of the
0020  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
0021  * to point to the zero page and allow the hot unplug to proceed.
0022  *
0023  * This is necessary for cases like PCI physical hot unplug as the actual BAR
0024  * memory may vanish after this and access to it from userspace could MCE.
0025  *
0026  * RDMA drivers supporting disassociation must have their user space designed
0027  * to cope in some way with their IO pages going to the zero page.
0028  *
0029  */
0030 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
0031              struct vm_area_struct *vma,
0032              struct rdma_user_mmap_entry *entry)
0033 {
0034     struct ib_uverbs_file *ufile = vma->vm_file->private_data;
0035 
0036     priv->vma = vma;
0037     if (entry) {
0038         kref_get(&entry->ref);
0039         priv->entry = entry;
0040     }
0041     vma->vm_private_data = priv;
0042     /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
0043 
0044     mutex_lock(&ufile->umap_lock);
0045     list_add(&priv->list, &ufile->umaps);
0046     mutex_unlock(&ufile->umap_lock);
0047 }
0048 EXPORT_SYMBOL(rdma_umap_priv_init);
0049 
0050 /**
0051  * rdma_user_mmap_io() - Map IO memory into a process
0052  *
0053  * @ucontext: associated user context
0054  * @vma: the vma related to the current mmap call
0055  * @pfn: pfn to map
0056  * @size: size to map
0057  * @prot: pgprot to use in remap call
0058  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
0059  *         if mmap_entry is not used by the driver
0060  *
0061  * This is to be called by drivers as part of their mmap() functions if they
0062  * wish to send something like PCI-E BAR memory to userspace.
0063  *
0064  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
0065  * success.
0066  */
0067 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
0068               unsigned long pfn, unsigned long size, pgprot_t prot,
0069               struct rdma_user_mmap_entry *entry)
0070 {
0071     struct ib_uverbs_file *ufile = ucontext->ufile;
0072     struct rdma_umap_priv *priv;
0073 
0074     if (!(vma->vm_flags & VM_SHARED))
0075         return -EINVAL;
0076 
0077     if (vma->vm_end - vma->vm_start != size)
0078         return -EINVAL;
0079 
0080     /* Driver is using this wrong, must be called by ib_uverbs_mmap */
0081     if (WARN_ON(!vma->vm_file ||
0082             vma->vm_file->private_data != ufile))
0083         return -EINVAL;
0084     lockdep_assert_held(&ufile->device->disassociate_srcu);
0085 
0086     priv = kzalloc(sizeof(*priv), GFP_KERNEL);
0087     if (!priv)
0088         return -ENOMEM;
0089 
0090     vma->vm_page_prot = prot;
0091     if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
0092         kfree(priv);
0093         return -EAGAIN;
0094     }
0095 
0096     rdma_umap_priv_init(priv, vma, entry);
0097     return 0;
0098 }
0099 EXPORT_SYMBOL(rdma_user_mmap_io);
0100 
0101 /**
0102  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
0103  *
0104  * @ucontext: associated user context
0105  * @pgoff: The mmap offset >> PAGE_SHIFT
0106  *
0107  * This function is called when a user tries to mmap with an offset (returned
0108  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
0109  * rdma_user_mmap_entry was created by the function
0110  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
0111  * entry so that it won't be deleted from the xarray in the meantime.
0112  *
0113  * Return an reference to an entry if exists or NULL if there is no
0114  * match. rdma_user_mmap_entry_put() must be called to put the reference.
0115  */
0116 struct rdma_user_mmap_entry *
0117 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
0118                    unsigned long pgoff)
0119 {
0120     struct rdma_user_mmap_entry *entry;
0121 
0122     if (pgoff > U32_MAX)
0123         return NULL;
0124 
0125     xa_lock(&ucontext->mmap_xa);
0126 
0127     entry = xa_load(&ucontext->mmap_xa, pgoff);
0128 
0129     /*
0130      * If refcount is zero, entry is already being deleted, driver_removed
0131      * indicates that the no further mmaps are possible and we waiting for
0132      * the active VMAs to be closed.
0133      */
0134     if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
0135         !kref_get_unless_zero(&entry->ref))
0136         goto err;
0137 
0138     xa_unlock(&ucontext->mmap_xa);
0139 
0140     ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
0141           pgoff, entry->npages);
0142 
0143     return entry;
0144 
0145 err:
0146     xa_unlock(&ucontext->mmap_xa);
0147     return NULL;
0148 }
0149 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
0150 
0151 /**
0152  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
0153  *
0154  * @ucontext: associated user context
0155  * @vma: the vma being mmap'd into
0156  *
0157  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
0158  * checks that the VMA is correct.
0159  */
0160 struct rdma_user_mmap_entry *
0161 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
0162              struct vm_area_struct *vma)
0163 {
0164     struct rdma_user_mmap_entry *entry;
0165 
0166     if (!(vma->vm_flags & VM_SHARED))
0167         return NULL;
0168     entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
0169     if (!entry)
0170         return NULL;
0171     if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
0172         rdma_user_mmap_entry_put(entry);
0173         return NULL;
0174     }
0175     return entry;
0176 }
0177 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
0178 
0179 static void rdma_user_mmap_entry_free(struct kref *kref)
0180 {
0181     struct rdma_user_mmap_entry *entry =
0182         container_of(kref, struct rdma_user_mmap_entry, ref);
0183     struct ib_ucontext *ucontext = entry->ucontext;
0184     unsigned long i;
0185 
0186     /*
0187      * Erase all entries occupied by this single entry, this is deferred
0188      * until all VMA are closed so that the mmap offsets remain unique.
0189      */
0190     xa_lock(&ucontext->mmap_xa);
0191     for (i = 0; i < entry->npages; i++)
0192         __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
0193     xa_unlock(&ucontext->mmap_xa);
0194 
0195     ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
0196           entry->start_pgoff, entry->npages);
0197 
0198     if (ucontext->device->ops.mmap_free)
0199         ucontext->device->ops.mmap_free(entry);
0200 }
0201 
0202 /**
0203  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
0204  *
0205  * @entry: an entry in the mmap_xa
0206  *
0207  * This function is called when the mapping is closed if it was
0208  * an io mapping or when the driver is done with the entry for
0209  * some other reason.
0210  * Should be called after rdma_user_mmap_entry_get was called
0211  * and entry is no longer needed. This function will erase the
0212  * entry and free it if its refcnt reaches zero.
0213  */
0214 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
0215 {
0216     kref_put(&entry->ref, rdma_user_mmap_entry_free);
0217 }
0218 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
0219 
0220 /**
0221  * rdma_user_mmap_entry_remove() - Drop reference to entry and
0222  *                 mark it as unmmapable
0223  *
0224  * @entry: the entry to insert into the mmap_xa
0225  *
0226  * Drivers can call this to prevent userspace from creating more mappings for
0227  * entry, however existing mmaps continue to exist and ops->mmap_free() will
0228  * not be called until all user mmaps are destroyed.
0229  */
0230 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
0231 {
0232     if (!entry)
0233         return;
0234 
0235     xa_lock(&entry->ucontext->mmap_xa);
0236     entry->driver_removed = true;
0237     xa_unlock(&entry->ucontext->mmap_xa);
0238     kref_put(&entry->ref, rdma_user_mmap_entry_free);
0239 }
0240 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
0241 
0242 /**
0243  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
0244  *                   in a given range.
0245  *
0246  * @ucontext: associated user context.
0247  * @entry: the entry to insert into the mmap_xa
0248  * @length: length of the address that will be mmapped
0249  * @min_pgoff: minimum pgoff to be returned
0250  * @max_pgoff: maximum pgoff to be returned
0251  *
0252  * This function should be called by drivers that use the rdma_user_mmap
0253  * interface for implementing their mmap syscall A database of mmap offsets is
0254  * handled in the core and helper functions are provided to insert entries
0255  * into the database and extract entries when the user calls mmap with the
0256  * given offset. The function allocates a unique page offset in a given range
0257  * that should be provided to user, the user will use the offset to retrieve
0258  * information such as address to be mapped and how.
0259  *
0260  * Return: 0 on success and -ENOMEM on failure
0261  */
0262 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
0263                       struct rdma_user_mmap_entry *entry,
0264                       size_t length, u32 min_pgoff,
0265                       u32 max_pgoff)
0266 {
0267     struct ib_uverbs_file *ufile = ucontext->ufile;
0268     XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
0269     u32 xa_first, xa_last, npages;
0270     int err;
0271     u32 i;
0272 
0273     if (!entry)
0274         return -EINVAL;
0275 
0276     kref_init(&entry->ref);
0277     entry->ucontext = ucontext;
0278 
0279     /*
0280      * We want the whole allocation to be done without interruption from a
0281      * different thread. The allocation requires finding a free range and
0282      * storing. During the xa_insert the lock could be released, possibly
0283      * allowing another thread to choose the same range.
0284      */
0285     mutex_lock(&ufile->umap_lock);
0286 
0287     xa_lock(&ucontext->mmap_xa);
0288 
0289     /* We want to find an empty range */
0290     npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
0291     entry->npages = npages;
0292     while (true) {
0293         /* First find an empty index */
0294         xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
0295         if (xas.xa_node == XAS_RESTART)
0296             goto err_unlock;
0297 
0298         xa_first = xas.xa_index;
0299 
0300         /* Is there enough room to have the range? */
0301         if (check_add_overflow(xa_first, npages, &xa_last))
0302             goto err_unlock;
0303 
0304         /*
0305          * Now look for the next present entry. If an entry doesn't
0306          * exist, we found an empty range and can proceed.
0307          */
0308         xas_next_entry(&xas, xa_last - 1);
0309         if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
0310             break;
0311     }
0312 
0313     for (i = xa_first; i < xa_last; i++) {
0314         err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
0315         if (err)
0316             goto err_undo;
0317     }
0318 
0319     /*
0320      * Internally the kernel uses a page offset, in libc this is a byte
0321      * offset. Drivers should not return pgoff to userspace.
0322      */
0323     entry->start_pgoff = xa_first;
0324     xa_unlock(&ucontext->mmap_xa);
0325     mutex_unlock(&ufile->umap_lock);
0326 
0327     ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
0328           entry->start_pgoff, npages);
0329 
0330     return 0;
0331 
0332 err_undo:
0333     for (; i > xa_first; i--)
0334         __xa_erase(&ucontext->mmap_xa, i - 1);
0335 
0336 err_unlock:
0337     xa_unlock(&ucontext->mmap_xa);
0338     mutex_unlock(&ufile->umap_lock);
0339     return -ENOMEM;
0340 }
0341 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
0342 
0343 /**
0344  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
0345  *
0346  * @ucontext: associated user context.
0347  * @entry: the entry to insert into the mmap_xa
0348  * @length: length of the address that will be mmapped
0349  *
0350  * This function should be called by drivers that use the rdma_user_mmap
0351  * interface for handling user mmapped addresses. The database is handled in
0352  * the core and helper functions are provided to insert entries into the
0353  * database and extract entries when the user calls mmap with the given offset.
0354  * The function allocates a unique page offset that should be provided to user,
0355  * the user will use the offset to retrieve information such as address to
0356  * be mapped and how.
0357  *
0358  * Return: 0 on success and -ENOMEM on failure
0359  */
0360 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
0361                 struct rdma_user_mmap_entry *entry,
0362                 size_t length)
0363 {
0364     return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
0365                          U32_MAX);
0366 }
0367 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);