0001
0002
0003
0004
0005
0006
0007 #include "rxe.h"
0008 #include "rxe_loc.h"
0009
0010
0011
0012
0013
0014 u8 rxe_get_next_key(u32 last_key)
0015 {
0016 u8 key;
0017
0018 do {
0019 get_random_bytes(&key, 1);
0020 } while (key == last_key);
0021
0022 return key;
0023 }
0024
0025 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
0026 {
0027
0028
0029 switch (mr->type) {
0030 case IB_MR_TYPE_DMA:
0031 return 0;
0032
0033 case IB_MR_TYPE_USER:
0034 case IB_MR_TYPE_MEM_REG:
0035 if (iova < mr->iova || length > mr->length ||
0036 iova > mr->iova + mr->length - length)
0037 return -EFAULT;
0038 return 0;
0039
0040 default:
0041 pr_warn("%s: mr type (%d) not supported\n",
0042 __func__, mr->type);
0043 return -EFAULT;
0044 }
0045 }
0046
0047 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
0048 | IB_ACCESS_REMOTE_WRITE \
0049 | IB_ACCESS_REMOTE_ATOMIC)
0050
0051 static void rxe_mr_init(int access, struct rxe_mr *mr)
0052 {
0053 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
0054 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
0055
0056
0057
0058
0059
0060
0061 mr->lkey = mr->ibmr.lkey = lkey;
0062 mr->rkey = mr->ibmr.rkey = rkey;
0063
0064 mr->state = RXE_MR_STATE_INVALID;
0065 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
0066 }
0067
0068 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
0069 {
0070 int i;
0071 int num_map;
0072 struct rxe_map **map = mr->map;
0073
0074 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
0075
0076 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
0077 if (!mr->map)
0078 goto err1;
0079
0080 for (i = 0; i < num_map; i++) {
0081 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
0082 if (!mr->map[i])
0083 goto err2;
0084 }
0085
0086 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
0087
0088 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
0089 mr->map_mask = RXE_BUF_PER_MAP - 1;
0090
0091 mr->num_buf = num_buf;
0092 mr->num_map = num_map;
0093 mr->max_buf = num_map * RXE_BUF_PER_MAP;
0094
0095 return 0;
0096
0097 err2:
0098 for (i--; i >= 0; i--)
0099 kfree(mr->map[i]);
0100
0101 kfree(mr->map);
0102 err1:
0103 return -ENOMEM;
0104 }
0105
0106 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
0107 {
0108 rxe_mr_init(access, mr);
0109
0110 mr->ibmr.pd = &pd->ibpd;
0111 mr->access = access;
0112 mr->state = RXE_MR_STATE_VALID;
0113 mr->type = IB_MR_TYPE_DMA;
0114 }
0115
0116 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
0117 int access, struct rxe_mr *mr)
0118 {
0119 struct rxe_map **map;
0120 struct rxe_phys_buf *buf = NULL;
0121 struct ib_umem *umem;
0122 struct sg_page_iter sg_iter;
0123 int num_buf;
0124 void *vaddr;
0125 int err;
0126 int i;
0127
0128 umem = ib_umem_get(pd->ibpd.device, start, length, access);
0129 if (IS_ERR(umem)) {
0130 pr_warn("%s: Unable to pin memory region err = %d\n",
0131 __func__, (int)PTR_ERR(umem));
0132 err = PTR_ERR(umem);
0133 goto err_out;
0134 }
0135
0136 num_buf = ib_umem_num_pages(umem);
0137
0138 rxe_mr_init(access, mr);
0139
0140 err = rxe_mr_alloc(mr, num_buf);
0141 if (err) {
0142 pr_warn("%s: Unable to allocate memory for map\n",
0143 __func__);
0144 goto err_release_umem;
0145 }
0146
0147 mr->page_shift = PAGE_SHIFT;
0148 mr->page_mask = PAGE_SIZE - 1;
0149
0150 num_buf = 0;
0151 map = mr->map;
0152 if (length > 0) {
0153 buf = map[0]->buf;
0154
0155 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
0156 if (num_buf >= RXE_BUF_PER_MAP) {
0157 map++;
0158 buf = map[0]->buf;
0159 num_buf = 0;
0160 }
0161
0162 vaddr = page_address(sg_page_iter_page(&sg_iter));
0163 if (!vaddr) {
0164 pr_warn("%s: Unable to get virtual address\n",
0165 __func__);
0166 err = -ENOMEM;
0167 goto err_cleanup_map;
0168 }
0169
0170 buf->addr = (uintptr_t)vaddr;
0171 buf->size = PAGE_SIZE;
0172 num_buf++;
0173 buf++;
0174
0175 }
0176 }
0177
0178 mr->ibmr.pd = &pd->ibpd;
0179 mr->umem = umem;
0180 mr->access = access;
0181 mr->length = length;
0182 mr->iova = iova;
0183 mr->va = start;
0184 mr->offset = ib_umem_offset(umem);
0185 mr->state = RXE_MR_STATE_VALID;
0186 mr->type = IB_MR_TYPE_USER;
0187
0188 return 0;
0189
0190 err_cleanup_map:
0191 for (i = 0; i < mr->num_map; i++)
0192 kfree(mr->map[i]);
0193 kfree(mr->map);
0194 err_release_umem:
0195 ib_umem_release(umem);
0196 err_out:
0197 return err;
0198 }
0199
0200 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
0201 {
0202 int err;
0203
0204
0205 rxe_mr_init(IB_ACCESS_REMOTE, mr);
0206
0207 err = rxe_mr_alloc(mr, max_pages);
0208 if (err)
0209 goto err1;
0210
0211 mr->ibmr.pd = &pd->ibpd;
0212 mr->max_buf = max_pages;
0213 mr->state = RXE_MR_STATE_FREE;
0214 mr->type = IB_MR_TYPE_MEM_REG;
0215
0216 return 0;
0217
0218 err1:
0219 return err;
0220 }
0221
0222 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
0223 size_t *offset_out)
0224 {
0225 size_t offset = iova - mr->iova + mr->offset;
0226 int map_index;
0227 int buf_index;
0228 u64 length;
0229
0230 if (likely(mr->page_shift)) {
0231 *offset_out = offset & mr->page_mask;
0232 offset >>= mr->page_shift;
0233 *n_out = offset & mr->map_mask;
0234 *m_out = offset >> mr->map_shift;
0235 } else {
0236 map_index = 0;
0237 buf_index = 0;
0238
0239 length = mr->map[map_index]->buf[buf_index].size;
0240
0241 while (offset >= length) {
0242 offset -= length;
0243 buf_index++;
0244
0245 if (buf_index == RXE_BUF_PER_MAP) {
0246 map_index++;
0247 buf_index = 0;
0248 }
0249 length = mr->map[map_index]->buf[buf_index].size;
0250 }
0251
0252 *m_out = map_index;
0253 *n_out = buf_index;
0254 *offset_out = offset;
0255 }
0256 }
0257
0258 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
0259 {
0260 size_t offset;
0261 int m, n;
0262 void *addr;
0263
0264 if (mr->state != RXE_MR_STATE_VALID) {
0265 pr_warn("mr not in valid state\n");
0266 addr = NULL;
0267 goto out;
0268 }
0269
0270 if (!mr->map) {
0271 addr = (void *)(uintptr_t)iova;
0272 goto out;
0273 }
0274
0275 if (mr_check_range(mr, iova, length)) {
0276 pr_warn("range violation\n");
0277 addr = NULL;
0278 goto out;
0279 }
0280
0281 lookup_iova(mr, iova, &m, &n, &offset);
0282
0283 if (offset + length > mr->map[m]->buf[n].size) {
0284 pr_warn("crosses page boundary\n");
0285 addr = NULL;
0286 goto out;
0287 }
0288
0289 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
0290
0291 out:
0292 return addr;
0293 }
0294
0295
0296
0297
0298 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
0299 enum rxe_mr_copy_dir dir)
0300 {
0301 int err;
0302 int bytes;
0303 u8 *va;
0304 struct rxe_map **map;
0305 struct rxe_phys_buf *buf;
0306 int m;
0307 int i;
0308 size_t offset;
0309
0310 if (length == 0)
0311 return 0;
0312
0313 if (mr->type == IB_MR_TYPE_DMA) {
0314 u8 *src, *dest;
0315
0316 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
0317
0318 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
0319
0320 memcpy(dest, src, length);
0321
0322 return 0;
0323 }
0324
0325 WARN_ON_ONCE(!mr->map);
0326
0327 err = mr_check_range(mr, iova, length);
0328 if (err) {
0329 err = -EFAULT;
0330 goto err1;
0331 }
0332
0333 lookup_iova(mr, iova, &m, &i, &offset);
0334
0335 map = mr->map + m;
0336 buf = map[0]->buf + i;
0337
0338 while (length > 0) {
0339 u8 *src, *dest;
0340
0341 va = (u8 *)(uintptr_t)buf->addr + offset;
0342 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
0343 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
0344
0345 bytes = buf->size - offset;
0346
0347 if (bytes > length)
0348 bytes = length;
0349
0350 memcpy(dest, src, bytes);
0351
0352 length -= bytes;
0353 addr += bytes;
0354
0355 offset = 0;
0356 buf++;
0357 i++;
0358
0359 if (i == RXE_BUF_PER_MAP) {
0360 i = 0;
0361 map++;
0362 buf = map[0]->buf;
0363 }
0364 }
0365
0366 return 0;
0367
0368 err1:
0369 return err;
0370 }
0371
0372
0373
0374
0375 int copy_data(
0376 struct rxe_pd *pd,
0377 int access,
0378 struct rxe_dma_info *dma,
0379 void *addr,
0380 int length,
0381 enum rxe_mr_copy_dir dir)
0382 {
0383 int bytes;
0384 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
0385 int offset = dma->sge_offset;
0386 int resid = dma->resid;
0387 struct rxe_mr *mr = NULL;
0388 u64 iova;
0389 int err;
0390
0391 if (length == 0)
0392 return 0;
0393
0394 if (length > resid) {
0395 err = -EINVAL;
0396 goto err2;
0397 }
0398
0399 if (sge->length && (offset < sge->length)) {
0400 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
0401 if (!mr) {
0402 err = -EINVAL;
0403 goto err1;
0404 }
0405 }
0406
0407 while (length > 0) {
0408 bytes = length;
0409
0410 if (offset >= sge->length) {
0411 if (mr) {
0412 rxe_put(mr);
0413 mr = NULL;
0414 }
0415 sge++;
0416 dma->cur_sge++;
0417 offset = 0;
0418
0419 if (dma->cur_sge >= dma->num_sge) {
0420 err = -ENOSPC;
0421 goto err2;
0422 }
0423
0424 if (sge->length) {
0425 mr = lookup_mr(pd, access, sge->lkey,
0426 RXE_LOOKUP_LOCAL);
0427 if (!mr) {
0428 err = -EINVAL;
0429 goto err1;
0430 }
0431 } else {
0432 continue;
0433 }
0434 }
0435
0436 if (bytes > sge->length - offset)
0437 bytes = sge->length - offset;
0438
0439 if (bytes > 0) {
0440 iova = sge->addr + offset;
0441
0442 err = rxe_mr_copy(mr, iova, addr, bytes, dir);
0443 if (err)
0444 goto err2;
0445
0446 offset += bytes;
0447 resid -= bytes;
0448 length -= bytes;
0449 addr += bytes;
0450 }
0451 }
0452
0453 dma->sge_offset = offset;
0454 dma->resid = resid;
0455
0456 if (mr)
0457 rxe_put(mr);
0458
0459 return 0;
0460
0461 err2:
0462 if (mr)
0463 rxe_put(mr);
0464 err1:
0465 return err;
0466 }
0467
0468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
0469 {
0470 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
0471 int offset = dma->sge_offset;
0472 int resid = dma->resid;
0473
0474 while (length) {
0475 unsigned int bytes;
0476
0477 if (offset >= sge->length) {
0478 sge++;
0479 dma->cur_sge++;
0480 offset = 0;
0481 if (dma->cur_sge >= dma->num_sge)
0482 return -ENOSPC;
0483 }
0484
0485 bytes = length;
0486
0487 if (bytes > sge->length - offset)
0488 bytes = sge->length - offset;
0489
0490 offset += bytes;
0491 resid -= bytes;
0492 length -= bytes;
0493 }
0494
0495 dma->sge_offset = offset;
0496 dma->resid = resid;
0497
0498 return 0;
0499 }
0500
0501
0502
0503
0504
0505
0506
0507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
0508 enum rxe_mr_lookup_type type)
0509 {
0510 struct rxe_mr *mr;
0511 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
0512 int index = key >> 8;
0513
0514 mr = rxe_pool_get_index(&rxe->mr_pool, index);
0515 if (!mr)
0516 return NULL;
0517
0518 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
0519 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
0520 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
0521 mr->state != RXE_MR_STATE_VALID)) {
0522 rxe_put(mr);
0523 mr = NULL;
0524 }
0525
0526 return mr;
0527 }
0528
0529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
0530 {
0531 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
0532 struct rxe_mr *mr;
0533 int ret;
0534
0535 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
0536 if (!mr) {
0537 pr_err("%s: No MR for key %#x\n", __func__, key);
0538 ret = -EINVAL;
0539 goto err;
0540 }
0541
0542 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
0543 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
0544 __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
0545 ret = -EINVAL;
0546 goto err_drop_ref;
0547 }
0548
0549 if (atomic_read(&mr->num_mw) > 0) {
0550 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
0551 __func__);
0552 ret = -EINVAL;
0553 goto err_drop_ref;
0554 }
0555
0556 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
0557 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
0558 ret = -EINVAL;
0559 goto err_drop_ref;
0560 }
0561
0562 mr->state = RXE_MR_STATE_FREE;
0563 ret = 0;
0564
0565 err_drop_ref:
0566 rxe_put(mr);
0567 err:
0568 return ret;
0569 }
0570
0571
0572
0573
0574
0575
0576
0577
0578 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
0579 {
0580 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
0581 u32 key = wqe->wr.wr.reg.key;
0582 u32 access = wqe->wr.wr.reg.access;
0583
0584
0585 if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
0586 pr_warn("%s: mr->lkey = 0x%x not free\n",
0587 __func__, mr->lkey);
0588 return -EINVAL;
0589 }
0590
0591
0592 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
0593 pr_warn("%s: qp->pd and mr->pd don't match\n",
0594 __func__);
0595 return -EINVAL;
0596 }
0597
0598
0599 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
0600 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
0601 __func__, key, mr->lkey);
0602 return -EINVAL;
0603 }
0604
0605 mr->access = access;
0606 mr->lkey = key;
0607 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
0608 mr->iova = wqe->wr.wr.reg.mr->iova;
0609 mr->state = RXE_MR_STATE_VALID;
0610
0611 return 0;
0612 }
0613
0614 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
0615 {
0616 struct rxe_mr *mr = to_rmr(ibmr);
0617
0618
0619 if (atomic_read(&mr->num_mw) > 0)
0620 return -EINVAL;
0621
0622 rxe_cleanup(mr);
0623
0624 return 0;
0625 }
0626
0627 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
0628 {
0629 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
0630 int i;
0631
0632 rxe_put(mr_pd(mr));
0633 ib_umem_release(mr->umem);
0634
0635 if (mr->map) {
0636 for (i = 0; i < mr->num_map; i++)
0637 kfree(mr->map[i]);
0638
0639 kfree(mr->map);
0640 }
0641 }