0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036 #include <linux/mm.h>
0037 #include <linux/dma-mapping.h>
0038 #include <linux/sched/signal.h>
0039 #include <linux/sched/mm.h>
0040 #include <linux/export.h>
0041 #include <linux/slab.h>
0042 #include <linux/pagemap.h>
0043 #include <linux/count_zeros.h>
0044 #include <rdma/ib_umem_odp.h>
0045
0046 #include "uverbs.h"
0047
0048 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
0049 {
0050 bool make_dirty = umem->writable && dirty;
0051 struct scatterlist *sg;
0052 unsigned int i;
0053
0054 if (dirty)
0055 ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
0056 DMA_BIDIRECTIONAL, 0);
0057
0058 for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i)
0059 unpin_user_page_range_dirty_lock(sg_page(sg),
0060 DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
0061
0062 sg_free_append_table(&umem->sgt_append);
0063 }
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
0080 unsigned long pgsz_bitmap,
0081 unsigned long virt)
0082 {
0083 struct scatterlist *sg;
0084 unsigned long va, pgoff;
0085 dma_addr_t mask;
0086 int i;
0087
0088 if (umem->is_odp) {
0089 unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
0090
0091
0092 if (!(pgsz_bitmap & page_size))
0093 return 0;
0094 return page_size;
0095 }
0096
0097
0098
0099
0100
0101 pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
0102
0103 umem->iova = va = virt;
0104
0105
0106
0107
0108 mask = pgsz_bitmap &
0109 GENMASK(BITS_PER_LONG - 1,
0110 bits_per((umem->length - 1 + virt) ^ virt));
0111
0112 pgoff = umem->address & ~PAGE_MASK;
0113
0114 for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
0115
0116
0117
0118 mask |= (sg_dma_address(sg) + pgoff) ^ va;
0119 va += sg_dma_len(sg) - pgoff;
0120
0121
0122
0123
0124 if (i != (umem->sgt_append.sgt.nents - 1))
0125 mask |= va;
0126 pgoff = 0;
0127 }
0128
0129
0130
0131
0132
0133 if (mask)
0134 pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
0135 return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
0136 }
0137 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147 struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
0148 size_t size, int access)
0149 {
0150 struct ib_umem *umem;
0151 struct page **page_list;
0152 unsigned long lock_limit;
0153 unsigned long new_pinned;
0154 unsigned long cur_base;
0155 unsigned long dma_attr = 0;
0156 struct mm_struct *mm;
0157 unsigned long npages;
0158 int pinned, ret;
0159 unsigned int gup_flags = FOLL_WRITE;
0160
0161
0162
0163
0164
0165 if (((addr + size) < addr) ||
0166 PAGE_ALIGN(addr + size) < (addr + size))
0167 return ERR_PTR(-EINVAL);
0168
0169 if (!can_do_mlock())
0170 return ERR_PTR(-EPERM);
0171
0172 if (access & IB_ACCESS_ON_DEMAND)
0173 return ERR_PTR(-EOPNOTSUPP);
0174
0175 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
0176 if (!umem)
0177 return ERR_PTR(-ENOMEM);
0178 umem->ibdev = device;
0179 umem->length = size;
0180 umem->address = addr;
0181
0182
0183
0184
0185 umem->iova = addr;
0186 umem->writable = ib_access_writable(access);
0187 umem->owning_mm = mm = current->mm;
0188 mmgrab(mm);
0189
0190 page_list = (struct page **) __get_free_page(GFP_KERNEL);
0191 if (!page_list) {
0192 ret = -ENOMEM;
0193 goto umem_kfree;
0194 }
0195
0196 npages = ib_umem_num_pages(umem);
0197 if (npages == 0 || npages > UINT_MAX) {
0198 ret = -EINVAL;
0199 goto out;
0200 }
0201
0202 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
0203
0204 new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
0205 if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
0206 atomic64_sub(npages, &mm->pinned_vm);
0207 ret = -ENOMEM;
0208 goto out;
0209 }
0210
0211 cur_base = addr & PAGE_MASK;
0212
0213 if (!umem->writable)
0214 gup_flags |= FOLL_FORCE;
0215
0216 while (npages) {
0217 cond_resched();
0218 pinned = pin_user_pages_fast(cur_base,
0219 min_t(unsigned long, npages,
0220 PAGE_SIZE /
0221 sizeof(struct page *)),
0222 gup_flags | FOLL_LONGTERM, page_list);
0223 if (pinned < 0) {
0224 ret = pinned;
0225 goto umem_release;
0226 }
0227
0228 cur_base += pinned * PAGE_SIZE;
0229 npages -= pinned;
0230 ret = sg_alloc_append_table_from_pages(
0231 &umem->sgt_append, page_list, pinned, 0,
0232 pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
0233 npages, GFP_KERNEL);
0234 if (ret) {
0235 unpin_user_pages_dirty_lock(page_list, pinned, 0);
0236 goto umem_release;
0237 }
0238 }
0239
0240 if (access & IB_ACCESS_RELAXED_ORDERING)
0241 dma_attr |= DMA_ATTR_WEAK_ORDERING;
0242
0243 ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
0244 DMA_BIDIRECTIONAL, dma_attr);
0245 if (ret)
0246 goto umem_release;
0247 goto out;
0248
0249 umem_release:
0250 __ib_umem_release(device, umem, 0);
0251 atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
0252 out:
0253 free_page((unsigned long) page_list);
0254 umem_kfree:
0255 if (ret) {
0256 mmdrop(umem->owning_mm);
0257 kfree(umem);
0258 }
0259 return ret ? ERR_PTR(ret) : umem;
0260 }
0261 EXPORT_SYMBOL(ib_umem_get);
0262
0263
0264
0265
0266
0267 void ib_umem_release(struct ib_umem *umem)
0268 {
0269 if (!umem)
0270 return;
0271 if (umem->is_dmabuf)
0272 return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
0273 if (umem->is_odp)
0274 return ib_umem_odp_release(to_ib_umem_odp(umem));
0275
0276 __ib_umem_release(umem->ibdev, umem, 1);
0277
0278 atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
0279 mmdrop(umem->owning_mm);
0280 kfree(umem);
0281 }
0282 EXPORT_SYMBOL(ib_umem_release);
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
0295 size_t length)
0296 {
0297 size_t end = offset + length;
0298 int ret;
0299
0300 if (offset > umem->length || length > umem->length - offset) {
0301 pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
0302 __func__, offset, umem->length, end);
0303 return -EINVAL;
0304 }
0305
0306 ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
0307 umem->sgt_append.sgt.orig_nents, dst, length,
0308 offset + ib_umem_offset(umem));
0309
0310 if (ret < 0)
0311 return ret;
0312 else if (ret != length)
0313 return -EINVAL;
0314 else
0315 return 0;
0316 }
0317 EXPORT_SYMBOL(ib_umem_copy_from);