Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
0004  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
0005  */
0006 
0007 #include "rxe.h"
0008 #include "rxe_loc.h"
0009 
0010 /* Return a random 8 bit key value that is
0011  * different than the last_key. Set last_key to -1
0012  * if this is the first key for an MR or MW
0013  */
0014 u8 rxe_get_next_key(u32 last_key)
0015 {
0016     u8 key;
0017 
0018     do {
0019         get_random_bytes(&key, 1);
0020     } while (key == last_key);
0021 
0022     return key;
0023 }
0024 
0025 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
0026 {
0027 
0028 
0029     switch (mr->type) {
0030     case IB_MR_TYPE_DMA:
0031         return 0;
0032 
0033     case IB_MR_TYPE_USER:
0034     case IB_MR_TYPE_MEM_REG:
0035         if (iova < mr->iova || length > mr->length ||
0036             iova > mr->iova + mr->length - length)
0037             return -EFAULT;
0038         return 0;
0039 
0040     default:
0041         pr_warn("%s: mr type (%d) not supported\n",
0042             __func__, mr->type);
0043         return -EFAULT;
0044     }
0045 }
0046 
0047 #define IB_ACCESS_REMOTE    (IB_ACCESS_REMOTE_READ      \
0048                 | IB_ACCESS_REMOTE_WRITE    \
0049                 | IB_ACCESS_REMOTE_ATOMIC)
0050 
0051 static void rxe_mr_init(int access, struct rxe_mr *mr)
0052 {
0053     u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
0054     u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
0055 
0056     /* set ibmr->l/rkey and also copy into private l/rkey
0057      * for user MRs these will always be the same
0058      * for cases where caller 'owns' the key portion
0059      * they may be different until REG_MR WQE is executed.
0060      */
0061     mr->lkey = mr->ibmr.lkey = lkey;
0062     mr->rkey = mr->ibmr.rkey = rkey;
0063 
0064     mr->state = RXE_MR_STATE_INVALID;
0065     mr->map_shift = ilog2(RXE_BUF_PER_MAP);
0066 }
0067 
0068 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
0069 {
0070     int i;
0071     int num_map;
0072     struct rxe_map **map = mr->map;
0073 
0074     num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
0075 
0076     mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
0077     if (!mr->map)
0078         goto err1;
0079 
0080     for (i = 0; i < num_map; i++) {
0081         mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
0082         if (!mr->map[i])
0083             goto err2;
0084     }
0085 
0086     BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
0087 
0088     mr->map_shift = ilog2(RXE_BUF_PER_MAP);
0089     mr->map_mask = RXE_BUF_PER_MAP - 1;
0090 
0091     mr->num_buf = num_buf;
0092     mr->num_map = num_map;
0093     mr->max_buf = num_map * RXE_BUF_PER_MAP;
0094 
0095     return 0;
0096 
0097 err2:
0098     for (i--; i >= 0; i--)
0099         kfree(mr->map[i]);
0100 
0101     kfree(mr->map);
0102 err1:
0103     return -ENOMEM;
0104 }
0105 
0106 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
0107 {
0108     rxe_mr_init(access, mr);
0109 
0110     mr->ibmr.pd = &pd->ibpd;
0111     mr->access = access;
0112     mr->state = RXE_MR_STATE_VALID;
0113     mr->type = IB_MR_TYPE_DMA;
0114 }
0115 
0116 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
0117              int access, struct rxe_mr *mr)
0118 {
0119     struct rxe_map      **map;
0120     struct rxe_phys_buf *buf = NULL;
0121     struct ib_umem      *umem;
0122     struct sg_page_iter sg_iter;
0123     int         num_buf;
0124     void            *vaddr;
0125     int err;
0126     int i;
0127 
0128     umem = ib_umem_get(pd->ibpd.device, start, length, access);
0129     if (IS_ERR(umem)) {
0130         pr_warn("%s: Unable to pin memory region err = %d\n",
0131             __func__, (int)PTR_ERR(umem));
0132         err = PTR_ERR(umem);
0133         goto err_out;
0134     }
0135 
0136     num_buf = ib_umem_num_pages(umem);
0137 
0138     rxe_mr_init(access, mr);
0139 
0140     err = rxe_mr_alloc(mr, num_buf);
0141     if (err) {
0142         pr_warn("%s: Unable to allocate memory for map\n",
0143                 __func__);
0144         goto err_release_umem;
0145     }
0146 
0147     mr->page_shift = PAGE_SHIFT;
0148     mr->page_mask = PAGE_SIZE - 1;
0149 
0150     num_buf         = 0;
0151     map = mr->map;
0152     if (length > 0) {
0153         buf = map[0]->buf;
0154 
0155         for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
0156             if (num_buf >= RXE_BUF_PER_MAP) {
0157                 map++;
0158                 buf = map[0]->buf;
0159                 num_buf = 0;
0160             }
0161 
0162             vaddr = page_address(sg_page_iter_page(&sg_iter));
0163             if (!vaddr) {
0164                 pr_warn("%s: Unable to get virtual address\n",
0165                         __func__);
0166                 err = -ENOMEM;
0167                 goto err_cleanup_map;
0168             }
0169 
0170             buf->addr = (uintptr_t)vaddr;
0171             buf->size = PAGE_SIZE;
0172             num_buf++;
0173             buf++;
0174 
0175         }
0176     }
0177 
0178     mr->ibmr.pd = &pd->ibpd;
0179     mr->umem = umem;
0180     mr->access = access;
0181     mr->length = length;
0182     mr->iova = iova;
0183     mr->va = start;
0184     mr->offset = ib_umem_offset(umem);
0185     mr->state = RXE_MR_STATE_VALID;
0186     mr->type = IB_MR_TYPE_USER;
0187 
0188     return 0;
0189 
0190 err_cleanup_map:
0191     for (i = 0; i < mr->num_map; i++)
0192         kfree(mr->map[i]);
0193     kfree(mr->map);
0194 err_release_umem:
0195     ib_umem_release(umem);
0196 err_out:
0197     return err;
0198 }
0199 
0200 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
0201 {
0202     int err;
0203 
0204     /* always allow remote access for FMRs */
0205     rxe_mr_init(IB_ACCESS_REMOTE, mr);
0206 
0207     err = rxe_mr_alloc(mr, max_pages);
0208     if (err)
0209         goto err1;
0210 
0211     mr->ibmr.pd = &pd->ibpd;
0212     mr->max_buf = max_pages;
0213     mr->state = RXE_MR_STATE_FREE;
0214     mr->type = IB_MR_TYPE_MEM_REG;
0215 
0216     return 0;
0217 
0218 err1:
0219     return err;
0220 }
0221 
0222 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
0223             size_t *offset_out)
0224 {
0225     size_t offset = iova - mr->iova + mr->offset;
0226     int         map_index;
0227     int         buf_index;
0228     u64         length;
0229 
0230     if (likely(mr->page_shift)) {
0231         *offset_out = offset & mr->page_mask;
0232         offset >>= mr->page_shift;
0233         *n_out = offset & mr->map_mask;
0234         *m_out = offset >> mr->map_shift;
0235     } else {
0236         map_index = 0;
0237         buf_index = 0;
0238 
0239         length = mr->map[map_index]->buf[buf_index].size;
0240 
0241         while (offset >= length) {
0242             offset -= length;
0243             buf_index++;
0244 
0245             if (buf_index == RXE_BUF_PER_MAP) {
0246                 map_index++;
0247                 buf_index = 0;
0248             }
0249             length = mr->map[map_index]->buf[buf_index].size;
0250         }
0251 
0252         *m_out = map_index;
0253         *n_out = buf_index;
0254         *offset_out = offset;
0255     }
0256 }
0257 
0258 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
0259 {
0260     size_t offset;
0261     int m, n;
0262     void *addr;
0263 
0264     if (mr->state != RXE_MR_STATE_VALID) {
0265         pr_warn("mr not in valid state\n");
0266         addr = NULL;
0267         goto out;
0268     }
0269 
0270     if (!mr->map) {
0271         addr = (void *)(uintptr_t)iova;
0272         goto out;
0273     }
0274 
0275     if (mr_check_range(mr, iova, length)) {
0276         pr_warn("range violation\n");
0277         addr = NULL;
0278         goto out;
0279     }
0280 
0281     lookup_iova(mr, iova, &m, &n, &offset);
0282 
0283     if (offset + length > mr->map[m]->buf[n].size) {
0284         pr_warn("crosses page boundary\n");
0285         addr = NULL;
0286         goto out;
0287     }
0288 
0289     addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
0290 
0291 out:
0292     return addr;
0293 }
0294 
0295 /* copy data from a range (vaddr, vaddr+length-1) to or from
0296  * a mr object starting at iova.
0297  */
0298 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
0299         enum rxe_mr_copy_dir dir)
0300 {
0301     int         err;
0302     int         bytes;
0303     u8          *va;
0304     struct rxe_map      **map;
0305     struct rxe_phys_buf *buf;
0306     int         m;
0307     int         i;
0308     size_t          offset;
0309 
0310     if (length == 0)
0311         return 0;
0312 
0313     if (mr->type == IB_MR_TYPE_DMA) {
0314         u8 *src, *dest;
0315 
0316         src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
0317 
0318         dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
0319 
0320         memcpy(dest, src, length);
0321 
0322         return 0;
0323     }
0324 
0325     WARN_ON_ONCE(!mr->map);
0326 
0327     err = mr_check_range(mr, iova, length);
0328     if (err) {
0329         err = -EFAULT;
0330         goto err1;
0331     }
0332 
0333     lookup_iova(mr, iova, &m, &i, &offset);
0334 
0335     map = mr->map + m;
0336     buf = map[0]->buf + i;
0337 
0338     while (length > 0) {
0339         u8 *src, *dest;
0340 
0341         va  = (u8 *)(uintptr_t)buf->addr + offset;
0342         src = (dir == RXE_TO_MR_OBJ) ? addr : va;
0343         dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
0344 
0345         bytes   = buf->size - offset;
0346 
0347         if (bytes > length)
0348             bytes = length;
0349 
0350         memcpy(dest, src, bytes);
0351 
0352         length  -= bytes;
0353         addr    += bytes;
0354 
0355         offset  = 0;
0356         buf++;
0357         i++;
0358 
0359         if (i == RXE_BUF_PER_MAP) {
0360             i = 0;
0361             map++;
0362             buf = map[0]->buf;
0363         }
0364     }
0365 
0366     return 0;
0367 
0368 err1:
0369     return err;
0370 }
0371 
0372 /* copy data in or out of a wqe, i.e. sg list
0373  * under the control of a dma descriptor
0374  */
0375 int copy_data(
0376     struct rxe_pd       *pd,
0377     int         access,
0378     struct rxe_dma_info *dma,
0379     void            *addr,
0380     int         length,
0381     enum rxe_mr_copy_dir    dir)
0382 {
0383     int         bytes;
0384     struct rxe_sge      *sge    = &dma->sge[dma->cur_sge];
0385     int         offset  = dma->sge_offset;
0386     int         resid   = dma->resid;
0387     struct rxe_mr       *mr = NULL;
0388     u64         iova;
0389     int         err;
0390 
0391     if (length == 0)
0392         return 0;
0393 
0394     if (length > resid) {
0395         err = -EINVAL;
0396         goto err2;
0397     }
0398 
0399     if (sge->length && (offset < sge->length)) {
0400         mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
0401         if (!mr) {
0402             err = -EINVAL;
0403             goto err1;
0404         }
0405     }
0406 
0407     while (length > 0) {
0408         bytes = length;
0409 
0410         if (offset >= sge->length) {
0411             if (mr) {
0412                 rxe_put(mr);
0413                 mr = NULL;
0414             }
0415             sge++;
0416             dma->cur_sge++;
0417             offset = 0;
0418 
0419             if (dma->cur_sge >= dma->num_sge) {
0420                 err = -ENOSPC;
0421                 goto err2;
0422             }
0423 
0424             if (sge->length) {
0425                 mr = lookup_mr(pd, access, sge->lkey,
0426                            RXE_LOOKUP_LOCAL);
0427                 if (!mr) {
0428                     err = -EINVAL;
0429                     goto err1;
0430                 }
0431             } else {
0432                 continue;
0433             }
0434         }
0435 
0436         if (bytes > sge->length - offset)
0437             bytes = sge->length - offset;
0438 
0439         if (bytes > 0) {
0440             iova = sge->addr + offset;
0441 
0442             err = rxe_mr_copy(mr, iova, addr, bytes, dir);
0443             if (err)
0444                 goto err2;
0445 
0446             offset  += bytes;
0447             resid   -= bytes;
0448             length  -= bytes;
0449             addr    += bytes;
0450         }
0451     }
0452 
0453     dma->sge_offset = offset;
0454     dma->resid  = resid;
0455 
0456     if (mr)
0457         rxe_put(mr);
0458 
0459     return 0;
0460 
0461 err2:
0462     if (mr)
0463         rxe_put(mr);
0464 err1:
0465     return err;
0466 }
0467 
0468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
0469 {
0470     struct rxe_sge      *sge    = &dma->sge[dma->cur_sge];
0471     int         offset  = dma->sge_offset;
0472     int         resid   = dma->resid;
0473 
0474     while (length) {
0475         unsigned int bytes;
0476 
0477         if (offset >= sge->length) {
0478             sge++;
0479             dma->cur_sge++;
0480             offset = 0;
0481             if (dma->cur_sge >= dma->num_sge)
0482                 return -ENOSPC;
0483         }
0484 
0485         bytes = length;
0486 
0487         if (bytes > sge->length - offset)
0488             bytes = sge->length - offset;
0489 
0490         offset  += bytes;
0491         resid   -= bytes;
0492         length  -= bytes;
0493     }
0494 
0495     dma->sge_offset = offset;
0496     dma->resid  = resid;
0497 
0498     return 0;
0499 }
0500 
0501 /* (1) find the mr corresponding to lkey/rkey
0502  *     depending on lookup_type
0503  * (2) verify that the (qp) pd matches the mr pd
0504  * (3) verify that the mr can support the requested access
0505  * (4) verify that mr state is valid
0506  */
0507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
0508              enum rxe_mr_lookup_type type)
0509 {
0510     struct rxe_mr *mr;
0511     struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
0512     int index = key >> 8;
0513 
0514     mr = rxe_pool_get_index(&rxe->mr_pool, index);
0515     if (!mr)
0516         return NULL;
0517 
0518     if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
0519              (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
0520              mr_pd(mr) != pd || (access && !(access & mr->access)) ||
0521              mr->state != RXE_MR_STATE_VALID)) {
0522         rxe_put(mr);
0523         mr = NULL;
0524     }
0525 
0526     return mr;
0527 }
0528 
0529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
0530 {
0531     struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
0532     struct rxe_mr *mr;
0533     int ret;
0534 
0535     mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
0536     if (!mr) {
0537         pr_err("%s: No MR for key %#x\n", __func__, key);
0538         ret = -EINVAL;
0539         goto err;
0540     }
0541 
0542     if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
0543         pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
0544             __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
0545         ret = -EINVAL;
0546         goto err_drop_ref;
0547     }
0548 
0549     if (atomic_read(&mr->num_mw) > 0) {
0550         pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
0551             __func__);
0552         ret = -EINVAL;
0553         goto err_drop_ref;
0554     }
0555 
0556     if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
0557         pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
0558         ret = -EINVAL;
0559         goto err_drop_ref;
0560     }
0561 
0562     mr->state = RXE_MR_STATE_FREE;
0563     ret = 0;
0564 
0565 err_drop_ref:
0566     rxe_put(mr);
0567 err:
0568     return ret;
0569 }
0570 
0571 /* user can (re)register fast MR by executing a REG_MR WQE.
0572  * user is expected to hold a reference on the ib mr until the
0573  * WQE completes.
0574  * Once a fast MR is created this is the only way to change the
0575  * private keys. It is the responsibility of the user to maintain
0576  * the ib mr keys in sync with rxe mr keys.
0577  */
0578 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
0579 {
0580     struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
0581     u32 key = wqe->wr.wr.reg.key;
0582     u32 access = wqe->wr.wr.reg.access;
0583 
0584     /* user can only register MR in free state */
0585     if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
0586         pr_warn("%s: mr->lkey = 0x%x not free\n",
0587             __func__, mr->lkey);
0588         return -EINVAL;
0589     }
0590 
0591     /* user can only register mr with qp in same protection domain */
0592     if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
0593         pr_warn("%s: qp->pd and mr->pd don't match\n",
0594             __func__);
0595         return -EINVAL;
0596     }
0597 
0598     /* user is only allowed to change key portion of l/rkey */
0599     if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
0600         pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
0601             __func__, key, mr->lkey);
0602         return -EINVAL;
0603     }
0604 
0605     mr->access = access;
0606     mr->lkey = key;
0607     mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
0608     mr->iova = wqe->wr.wr.reg.mr->iova;
0609     mr->state = RXE_MR_STATE_VALID;
0610 
0611     return 0;
0612 }
0613 
0614 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
0615 {
0616     struct rxe_mr *mr = to_rmr(ibmr);
0617 
0618     /* See IBA 10.6.7.2.6 */
0619     if (atomic_read(&mr->num_mw) > 0)
0620         return -EINVAL;
0621 
0622     rxe_cleanup(mr);
0623 
0624     return 0;
0625 }
0626 
0627 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
0628 {
0629     struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
0630     int i;
0631 
0632     rxe_put(mr_pd(mr));
0633     ib_umem_release(mr->umem);
0634 
0635     if (mr->map) {
0636         for (i = 0; i < mr->num_map; i++)
0637             kfree(mr->map[i]);
0638 
0639         kfree(mr->map);
0640     }
0641 }