0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #include <linux/slab.h>
0035 #include <rdma/ib_user_verbs.h>
0036
0037 #include "mlx4_ib.h"
0038
0039 static u32 convert_access(int acc)
0040 {
0041 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
0042 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
0043 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
0044 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
0045 (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
0046 MLX4_PERM_LOCAL_READ;
0047 }
0048
0049 static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
0050 {
0051 switch (type) {
0052 case IB_MW_TYPE_1: return MLX4_MW_TYPE_1;
0053 case IB_MW_TYPE_2: return MLX4_MW_TYPE_2;
0054 default: return -1;
0055 }
0056 }
0057
0058 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
0059 {
0060 struct mlx4_ib_mr *mr;
0061 int err;
0062
0063 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0064 if (!mr)
0065 return ERR_PTR(-ENOMEM);
0066
0067 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
0068 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
0069 if (err)
0070 goto err_free;
0071
0072 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
0073 if (err)
0074 goto err_mr;
0075
0076 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0077 mr->umem = NULL;
0078
0079 return &mr->ibmr;
0080
0081 err_mr:
0082 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
0083
0084 err_free:
0085 kfree(mr);
0086
0087 return ERR_PTR(err);
0088 }
0089
0090 enum {
0091 MLX4_MAX_MTT_SHIFT = 31
0092 };
0093
0094 static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
0095 struct mlx4_mtt *mtt,
0096 u64 mtt_size, u64 mtt_shift, u64 len,
0097 u64 cur_start_addr, u64 *pages,
0098 int *start_index, int *npages)
0099 {
0100 u64 cur_end_addr = cur_start_addr + len;
0101 u64 cur_end_addr_aligned = 0;
0102 u64 mtt_entries;
0103 int err = 0;
0104 int k;
0105
0106 len += (cur_start_addr & (mtt_size - 1ULL));
0107 cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
0108 len += (cur_end_addr_aligned - cur_end_addr);
0109 if (len & (mtt_size - 1ULL)) {
0110 pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
0111 len, mtt_size);
0112 return -EINVAL;
0113 }
0114
0115 mtt_entries = (len >> mtt_shift);
0116
0117
0118
0119
0120
0121
0122
0123
0124 cur_start_addr = round_down(cur_start_addr, mtt_size);
0125
0126 for (k = 0; k < mtt_entries; ++k) {
0127 pages[*npages] = cur_start_addr + (mtt_size * k);
0128 (*npages)++;
0129
0130
0131
0132
0133 if (*npages == PAGE_SIZE / sizeof(u64)) {
0134 err = mlx4_write_mtt(dev->dev, mtt, *start_index,
0135 *npages, pages);
0136 if (err)
0137 return err;
0138
0139 (*start_index) += *npages;
0140 *npages = 0;
0141 }
0142 }
0143
0144 return 0;
0145 }
0146
0147 static inline u64 alignment_of(u64 ptr)
0148 {
0149 return ilog2(ptr & (~(ptr - 1)));
0150 }
0151
0152 static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
0153 u64 current_block_end,
0154 u64 block_shift)
0155 {
0156
0157
0158
0159
0160 if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
0161
0162
0163
0164
0165
0166 block_shift = alignment_of(next_block_start);
0167
0168
0169
0170
0171
0172 if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
0173
0174
0175
0176
0177 block_shift = alignment_of(current_block_end);
0178
0179 return block_shift;
0180 }
0181
0182 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
0183 struct ib_umem *umem)
0184 {
0185 u64 *pages;
0186 u64 len = 0;
0187 int err = 0;
0188 u64 mtt_size;
0189 u64 cur_start_addr = 0;
0190 u64 mtt_shift;
0191 int start_index = 0;
0192 int npages = 0;
0193 struct scatterlist *sg;
0194 int i;
0195
0196 pages = (u64 *) __get_free_page(GFP_KERNEL);
0197 if (!pages)
0198 return -ENOMEM;
0199
0200 mtt_shift = mtt->page_shift;
0201 mtt_size = 1ULL << mtt_shift;
0202
0203 for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
0204 if (cur_start_addr + len == sg_dma_address(sg)) {
0205
0206 len += sg_dma_len(sg);
0207 continue;
0208 }
0209
0210
0211
0212
0213
0214 err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
0215 mtt_shift, len,
0216 cur_start_addr,
0217 pages, &start_index,
0218 &npages);
0219 if (err)
0220 goto out;
0221
0222 cur_start_addr = sg_dma_address(sg);
0223 len = sg_dma_len(sg);
0224 }
0225
0226
0227 if (len > 0) {
0228
0229
0230
0231
0232 err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
0233 mtt_shift, len,
0234 cur_start_addr, pages,
0235 &start_index, &npages);
0236 if (err)
0237 goto out;
0238 }
0239
0240 if (npages)
0241 err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
0242
0243 out:
0244 free_page((unsigned long) pages);
0245 return err;
0246 }
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
0258 int *num_of_mtts)
0259 {
0260 u64 block_shift = MLX4_MAX_MTT_SHIFT;
0261 u64 min_shift = PAGE_SHIFT;
0262 u64 last_block_aligned_end = 0;
0263 u64 current_block_start = 0;
0264 u64 first_block_start = 0;
0265 u64 current_block_len = 0;
0266 u64 last_block_end = 0;
0267 struct scatterlist *sg;
0268 u64 current_block_end;
0269 u64 misalignment_bits;
0270 u64 next_block_start;
0271 u64 total_len = 0;
0272 int i;
0273
0274 *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
0275
0276 for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
0277
0278
0279
0280
0281 if (current_block_len == 0 && current_block_start == 0) {
0282 current_block_start = sg_dma_address(sg);
0283 first_block_start = current_block_start;
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299 misalignment_bits =
0300 (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
0301 current_block_start;
0302 block_shift = min(alignment_of(misalignment_bits),
0303 block_shift);
0304 }
0305
0306
0307
0308
0309
0310 next_block_start = sg_dma_address(sg);
0311 current_block_end = current_block_start + current_block_len;
0312
0313 if (current_block_end != next_block_start) {
0314 block_shift = mlx4_ib_umem_calc_block_mtt
0315 (next_block_start,
0316 current_block_end,
0317 block_shift);
0318
0319
0320
0321
0322
0323 if (block_shift <= min_shift)
0324 goto end;
0325
0326
0327
0328
0329
0330
0331 total_len += current_block_len;
0332
0333
0334 current_block_start = next_block_start;
0335 current_block_len = sg_dma_len(sg);
0336 continue;
0337 }
0338
0339
0340
0341
0342
0343 current_block_len += sg_dma_len(sg);
0344 }
0345
0346
0347 total_len += current_block_len;
0348
0349 total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
0350 last_block_end = current_block_start + current_block_len;
0351 last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
0352 total_len += (last_block_aligned_end - last_block_end);
0353
0354 if (total_len & ((1ULL << block_shift) - 1ULL))
0355 pr_warn("misaligned total length detected (%llu, %llu)!",
0356 total_len, block_shift);
0357
0358 *num_of_mtts = total_len >> block_shift;
0359 end:
0360 if (block_shift < min_shift) {
0361
0362
0363
0364
0365 pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
0366
0367 block_shift = min_shift;
0368 }
0369 return block_shift;
0370 }
0371
0372 static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
0373 u64 length, int access_flags)
0374 {
0375
0376
0377
0378
0379
0380
0381 if (!ib_access_writable(access_flags)) {
0382 unsigned long untagged_start = untagged_addr(start);
0383 struct vm_area_struct *vma;
0384
0385 mmap_read_lock(current->mm);
0386
0387
0388
0389
0390
0391 vma = find_vma(current->mm, untagged_start);
0392 if (vma && vma->vm_end >= untagged_start + length &&
0393 vma->vm_start <= untagged_start) {
0394 if (vma->vm_flags & VM_WRITE)
0395 access_flags |= IB_ACCESS_LOCAL_WRITE;
0396 } else {
0397 access_flags |= IB_ACCESS_LOCAL_WRITE;
0398 }
0399
0400 mmap_read_unlock(current->mm);
0401 }
0402
0403 return ib_umem_get(device, start, length, access_flags);
0404 }
0405
0406 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
0407 u64 virt_addr, int access_flags,
0408 struct ib_udata *udata)
0409 {
0410 struct mlx4_ib_dev *dev = to_mdev(pd->device);
0411 struct mlx4_ib_mr *mr;
0412 int shift;
0413 int err;
0414 int n;
0415
0416 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0417 if (!mr)
0418 return ERR_PTR(-ENOMEM);
0419
0420 mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
0421 if (IS_ERR(mr->umem)) {
0422 err = PTR_ERR(mr->umem);
0423 goto err_free;
0424 }
0425
0426 shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
0427
0428 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
0429 convert_access(access_flags), n, shift, &mr->mmr);
0430 if (err)
0431 goto err_umem;
0432
0433 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
0434 if (err)
0435 goto err_mr;
0436
0437 err = mlx4_mr_enable(dev->dev, &mr->mmr);
0438 if (err)
0439 goto err_mr;
0440
0441 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0442 mr->ibmr.length = length;
0443 mr->ibmr.page_size = 1U << shift;
0444
0445 return &mr->ibmr;
0446
0447 err_mr:
0448 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
0449
0450 err_umem:
0451 ib_umem_release(mr->umem);
0452
0453 err_free:
0454 kfree(mr);
0455
0456 return ERR_PTR(err);
0457 }
0458
0459 struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
0460 u64 length, u64 virt_addr,
0461 int mr_access_flags, struct ib_pd *pd,
0462 struct ib_udata *udata)
0463 {
0464 struct mlx4_ib_dev *dev = to_mdev(mr->device);
0465 struct mlx4_ib_mr *mmr = to_mmr(mr);
0466 struct mlx4_mpt_entry *mpt_entry;
0467 struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
0468 int err;
0469
0470
0471
0472
0473
0474 err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
0475 if (err)
0476 return ERR_PTR(err);
0477
0478 if (flags & IB_MR_REREG_PD) {
0479 err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
0480 to_mpd(pd)->pdn);
0481
0482 if (err)
0483 goto release_mpt_entry;
0484 }
0485
0486 if (flags & IB_MR_REREG_ACCESS) {
0487 if (ib_access_writable(mr_access_flags) &&
0488 !mmr->umem->writable) {
0489 err = -EPERM;
0490 goto release_mpt_entry;
0491 }
0492
0493 err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
0494 convert_access(mr_access_flags));
0495
0496 if (err)
0497 goto release_mpt_entry;
0498 }
0499
0500 if (flags & IB_MR_REREG_TRANS) {
0501 int shift;
0502 int n;
0503
0504 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
0505 ib_umem_release(mmr->umem);
0506 mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
0507 mr_access_flags);
0508 if (IS_ERR(mmr->umem)) {
0509 err = PTR_ERR(mmr->umem);
0510
0511 mmr->umem = NULL;
0512 goto release_mpt_entry;
0513 }
0514 n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
0515 shift = PAGE_SHIFT;
0516
0517 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
0518 virt_addr, length, n, shift,
0519 *pmpt_entry);
0520 if (err) {
0521 ib_umem_release(mmr->umem);
0522 goto release_mpt_entry;
0523 }
0524 mmr->mmr.iova = virt_addr;
0525 mmr->mmr.size = length;
0526
0527 err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
0528 if (err) {
0529 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
0530 ib_umem_release(mmr->umem);
0531 goto release_mpt_entry;
0532 }
0533 }
0534
0535
0536
0537
0538 err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
0539 if (!err && flags & IB_MR_REREG_ACCESS)
0540 mmr->mmr.access = mr_access_flags;
0541
0542 release_mpt_entry:
0543 mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
0544 if (err)
0545 return ERR_PTR(err);
0546 return NULL;
0547 }
0548
0549 static int
0550 mlx4_alloc_priv_pages(struct ib_device *device,
0551 struct mlx4_ib_mr *mr,
0552 int max_pages)
0553 {
0554 int ret;
0555
0556
0557
0558
0559
0560
0561 mr->page_map_size = roundup(max_pages * sizeof(u64),
0562 MLX4_MR_PAGES_ALIGN);
0563
0564
0565 mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
0566 if (!mr->pages)
0567 return -ENOMEM;
0568
0569 mr->page_map = dma_map_single(device->dev.parent, mr->pages,
0570 mr->page_map_size, DMA_TO_DEVICE);
0571
0572 if (dma_mapping_error(device->dev.parent, mr->page_map)) {
0573 ret = -ENOMEM;
0574 goto err;
0575 }
0576
0577 return 0;
0578
0579 err:
0580 free_page((unsigned long)mr->pages);
0581 return ret;
0582 }
0583
0584 static void
0585 mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
0586 {
0587 if (mr->pages) {
0588 struct ib_device *device = mr->ibmr.device;
0589
0590 dma_unmap_single(device->dev.parent, mr->page_map,
0591 mr->page_map_size, DMA_TO_DEVICE);
0592 free_page((unsigned long)mr->pages);
0593 mr->pages = NULL;
0594 }
0595 }
0596
0597 int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
0598 {
0599 struct mlx4_ib_mr *mr = to_mmr(ibmr);
0600 int ret;
0601
0602 mlx4_free_priv_pages(mr);
0603
0604 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
0605 if (ret)
0606 return ret;
0607 if (mr->umem)
0608 ib_umem_release(mr->umem);
0609 kfree(mr);
0610
0611 return 0;
0612 }
0613
0614 int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
0615 {
0616 struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
0617 struct mlx4_ib_mw *mw = to_mmw(ibmw);
0618 int err;
0619
0620 err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
0621 to_mlx4_type(ibmw->type), &mw->mmw);
0622 if (err)
0623 return err;
0624
0625 err = mlx4_mw_enable(dev->dev, &mw->mmw);
0626 if (err)
0627 goto err_mw;
0628
0629 ibmw->rkey = mw->mmw.key;
0630 return 0;
0631
0632 err_mw:
0633 mlx4_mw_free(dev->dev, &mw->mmw);
0634 return err;
0635 }
0636
0637 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
0638 {
0639 struct mlx4_ib_mw *mw = to_mmw(ibmw);
0640
0641 mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
0642 return 0;
0643 }
0644
0645 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
0646 u32 max_num_sg)
0647 {
0648 struct mlx4_ib_dev *dev = to_mdev(pd->device);
0649 struct mlx4_ib_mr *mr;
0650 int err;
0651
0652 if (mr_type != IB_MR_TYPE_MEM_REG ||
0653 max_num_sg > MLX4_MAX_FAST_REG_PAGES)
0654 return ERR_PTR(-EINVAL);
0655
0656 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0657 if (!mr)
0658 return ERR_PTR(-ENOMEM);
0659
0660 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
0661 max_num_sg, 0, &mr->mmr);
0662 if (err)
0663 goto err_free;
0664
0665 err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
0666 if (err)
0667 goto err_free_mr;
0668
0669 mr->max_pages = max_num_sg;
0670 err = mlx4_mr_enable(dev->dev, &mr->mmr);
0671 if (err)
0672 goto err_free_pl;
0673
0674 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0675 mr->umem = NULL;
0676
0677 return &mr->ibmr;
0678
0679 err_free_pl:
0680 mr->ibmr.device = pd->device;
0681 mlx4_free_priv_pages(mr);
0682 err_free_mr:
0683 (void) mlx4_mr_free(dev->dev, &mr->mmr);
0684 err_free:
0685 kfree(mr);
0686 return ERR_PTR(err);
0687 }
0688
0689 static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
0690 {
0691 struct mlx4_ib_mr *mr = to_mmr(ibmr);
0692
0693 if (unlikely(mr->npages == mr->max_pages))
0694 return -ENOMEM;
0695
0696 mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
0697
0698 return 0;
0699 }
0700
0701 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
0702 unsigned int *sg_offset)
0703 {
0704 struct mlx4_ib_mr *mr = to_mmr(ibmr);
0705 int rc;
0706
0707 mr->npages = 0;
0708
0709 ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
0710 mr->page_map_size, DMA_TO_DEVICE);
0711
0712 rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
0713
0714 ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
0715 mr->page_map_size, DMA_TO_DEVICE);
0716
0717 return rc;
0718 }