Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2016 HGST, a Western Digital Company.
0004  */
0005 #include <linux/memremap.h>
0006 #include <linux/moduleparam.h>
0007 #include <linux/slab.h>
0008 #include <linux/pci-p2pdma.h>
0009 #include <rdma/mr_pool.h>
0010 #include <rdma/rw.h>
0011 
0012 enum {
0013     RDMA_RW_SINGLE_WR,
0014     RDMA_RW_MULTI_WR,
0015     RDMA_RW_MR,
0016     RDMA_RW_SIG_MR,
0017 };
0018 
0019 static bool rdma_rw_force_mr;
0020 module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
0021 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
0022 
0023 /*
0024  * Report whether memory registration should be used. Memory registration must
0025  * be used for iWarp devices because of iWARP-specific limitations. Memory
0026  * registration is also enabled if registering memory might yield better
0027  * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
0028  */
0029 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num)
0030 {
0031     if (rdma_protocol_iwarp(dev, port_num))
0032         return true;
0033     if (dev->attrs.max_sgl_rd)
0034         return true;
0035     if (unlikely(rdma_rw_force_mr))
0036         return true;
0037     return false;
0038 }
0039 
0040 /*
0041  * Check if the device will use memory registration for this RW operation.
0042  * For RDMA READs we must use MRs on iWarp and can optionally use them as an
0043  * optimization otherwise.  Additionally we have a debug option to force usage
0044  * of MRs to help testing this code path.
0045  */
0046 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num,
0047         enum dma_data_direction dir, int dma_nents)
0048 {
0049     if (dir == DMA_FROM_DEVICE) {
0050         if (rdma_protocol_iwarp(dev, port_num))
0051             return true;
0052         if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
0053             return true;
0054     }
0055     if (unlikely(rdma_rw_force_mr))
0056         return true;
0057     return false;
0058 }
0059 
0060 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
0061                        bool pi_support)
0062 {
0063     u32 max_pages;
0064 
0065     if (pi_support)
0066         max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
0067     else
0068         max_pages = dev->attrs.max_fast_reg_page_list_len;
0069 
0070     /* arbitrary limit to avoid allocating gigantic resources */
0071     return min_t(u32, max_pages, 256);
0072 }
0073 
0074 static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
0075 {
0076     int count = 0;
0077 
0078     if (reg->mr->need_inval) {
0079         reg->inv_wr.opcode = IB_WR_LOCAL_INV;
0080         reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
0081         reg->inv_wr.next = &reg->reg_wr.wr;
0082         count++;
0083     } else {
0084         reg->inv_wr.next = NULL;
0085     }
0086 
0087     return count;
0088 }
0089 
0090 /* Caller must have zero-initialized *reg. */
0091 static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
0092         struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
0093         u32 sg_cnt, u32 offset)
0094 {
0095     u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
0096                             qp->integrity_en);
0097     u32 nents = min(sg_cnt, pages_per_mr);
0098     int count = 0, ret;
0099 
0100     reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
0101     if (!reg->mr)
0102         return -EAGAIN;
0103 
0104     count += rdma_rw_inv_key(reg);
0105 
0106     ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
0107     if (ret < 0 || ret < nents) {
0108         ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
0109         return -EINVAL;
0110     }
0111 
0112     reg->reg_wr.wr.opcode = IB_WR_REG_MR;
0113     reg->reg_wr.mr = reg->mr;
0114     reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
0115     if (rdma_protocol_iwarp(qp->device, port_num))
0116         reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
0117     count++;
0118 
0119     reg->sge.addr = reg->mr->iova;
0120     reg->sge.length = reg->mr->length;
0121     return count;
0122 }
0123 
0124 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0125         u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
0126         u64 remote_addr, u32 rkey, enum dma_data_direction dir)
0127 {
0128     struct rdma_rw_reg_ctx *prev = NULL;
0129     u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
0130                             qp->integrity_en);
0131     int i, j, ret = 0, count = 0;
0132 
0133     ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
0134     ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
0135     if (!ctx->reg) {
0136         ret = -ENOMEM;
0137         goto out;
0138     }
0139 
0140     for (i = 0; i < ctx->nr_ops; i++) {
0141         struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
0142         u32 nents = min(sg_cnt, pages_per_mr);
0143 
0144         ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
0145                 offset);
0146         if (ret < 0)
0147             goto out_free;
0148         count += ret;
0149 
0150         if (prev) {
0151             if (reg->mr->need_inval)
0152                 prev->wr.wr.next = &reg->inv_wr;
0153             else
0154                 prev->wr.wr.next = &reg->reg_wr.wr;
0155         }
0156 
0157         reg->reg_wr.wr.next = &reg->wr.wr;
0158 
0159         reg->wr.wr.sg_list = &reg->sge;
0160         reg->wr.wr.num_sge = 1;
0161         reg->wr.remote_addr = remote_addr;
0162         reg->wr.rkey = rkey;
0163         if (dir == DMA_TO_DEVICE) {
0164             reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
0165         } else if (!rdma_cap_read_inv(qp->device, port_num)) {
0166             reg->wr.wr.opcode = IB_WR_RDMA_READ;
0167         } else {
0168             reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
0169             reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
0170         }
0171         count++;
0172 
0173         remote_addr += reg->sge.length;
0174         sg_cnt -= nents;
0175         for (j = 0; j < nents; j++)
0176             sg = sg_next(sg);
0177         prev = reg;
0178         offset = 0;
0179     }
0180 
0181     if (prev)
0182         prev->wr.wr.next = NULL;
0183 
0184     ctx->type = RDMA_RW_MR;
0185     return count;
0186 
0187 out_free:
0188     while (--i >= 0)
0189         ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
0190     kfree(ctx->reg);
0191 out:
0192     return ret;
0193 }
0194 
0195 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0196         struct scatterlist *sg, u32 sg_cnt, u32 offset,
0197         u64 remote_addr, u32 rkey, enum dma_data_direction dir)
0198 {
0199     u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
0200               qp->max_read_sge;
0201     struct ib_sge *sge;
0202     u32 total_len = 0, i, j;
0203 
0204     ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
0205 
0206     ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
0207     if (!ctx->map.sges)
0208         goto out;
0209 
0210     ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
0211     if (!ctx->map.wrs)
0212         goto out_free_sges;
0213 
0214     for (i = 0; i < ctx->nr_ops; i++) {
0215         struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
0216         u32 nr_sge = min(sg_cnt, max_sge);
0217 
0218         if (dir == DMA_TO_DEVICE)
0219             rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
0220         else
0221             rdma_wr->wr.opcode = IB_WR_RDMA_READ;
0222         rdma_wr->remote_addr = remote_addr + total_len;
0223         rdma_wr->rkey = rkey;
0224         rdma_wr->wr.num_sge = nr_sge;
0225         rdma_wr->wr.sg_list = sge;
0226 
0227         for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
0228             sge->addr = sg_dma_address(sg) + offset;
0229             sge->length = sg_dma_len(sg) - offset;
0230             sge->lkey = qp->pd->local_dma_lkey;
0231 
0232             total_len += sge->length;
0233             sge++;
0234             sg_cnt--;
0235             offset = 0;
0236         }
0237 
0238         rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
0239             &ctx->map.wrs[i + 1].wr : NULL;
0240     }
0241 
0242     ctx->type = RDMA_RW_MULTI_WR;
0243     return ctx->nr_ops;
0244 
0245 out_free_sges:
0246     kfree(ctx->map.sges);
0247 out:
0248     return -ENOMEM;
0249 }
0250 
0251 static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0252         struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
0253         enum dma_data_direction dir)
0254 {
0255     struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
0256 
0257     ctx->nr_ops = 1;
0258 
0259     ctx->single.sge.lkey = qp->pd->local_dma_lkey;
0260     ctx->single.sge.addr = sg_dma_address(sg) + offset;
0261     ctx->single.sge.length = sg_dma_len(sg) - offset;
0262 
0263     memset(rdma_wr, 0, sizeof(*rdma_wr));
0264     if (dir == DMA_TO_DEVICE)
0265         rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
0266     else
0267         rdma_wr->wr.opcode = IB_WR_RDMA_READ;
0268     rdma_wr->wr.sg_list = &ctx->single.sge;
0269     rdma_wr->wr.num_sge = 1;
0270     rdma_wr->remote_addr = remote_addr;
0271     rdma_wr->rkey = rkey;
0272 
0273     ctx->type = RDMA_RW_SINGLE_WR;
0274     return 1;
0275 }
0276 
0277 /**
0278  * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
0279  * @ctx:    context to initialize
0280  * @qp:     queue pair to operate on
0281  * @port_num:   port num to which the connection is bound
0282  * @sg:     scatterlist to READ/WRITE from/to
0283  * @sg_cnt: number of entries in @sg
0284  * @sg_offset:  current byte offset into @sg
0285  * @remote_addr:remote address to read/write (relative to @rkey)
0286  * @rkey:   remote key to operate on
0287  * @dir:    %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
0288  *
0289  * Returns the number of WQEs that will be needed on the workqueue if
0290  * successful, or a negative error code.
0291  */
0292 int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
0293         struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
0294         u64 remote_addr, u32 rkey, enum dma_data_direction dir)
0295 {
0296     struct ib_device *dev = qp->pd->device;
0297     struct sg_table sgt = {
0298         .sgl = sg,
0299         .orig_nents = sg_cnt,
0300     };
0301     int ret;
0302 
0303     ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
0304     if (ret)
0305         return ret;
0306     sg_cnt = sgt.nents;
0307 
0308     /*
0309      * Skip to the S/G entry that sg_offset falls into:
0310      */
0311     for (;;) {
0312         u32 len = sg_dma_len(sg);
0313 
0314         if (sg_offset < len)
0315             break;
0316 
0317         sg = sg_next(sg);
0318         sg_offset -= len;
0319         sg_cnt--;
0320     }
0321 
0322     ret = -EIO;
0323     if (WARN_ON_ONCE(sg_cnt == 0))
0324         goto out_unmap_sg;
0325 
0326     if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
0327         ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
0328                 sg_offset, remote_addr, rkey, dir);
0329     } else if (sg_cnt > 1) {
0330         ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
0331                 remote_addr, rkey, dir);
0332     } else {
0333         ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
0334                 remote_addr, rkey, dir);
0335     }
0336 
0337     if (ret < 0)
0338         goto out_unmap_sg;
0339     return ret;
0340 
0341 out_unmap_sg:
0342     ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
0343     return ret;
0344 }
0345 EXPORT_SYMBOL(rdma_rw_ctx_init);
0346 
0347 /**
0348  * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
0349  * @ctx:    context to initialize
0350  * @qp:     queue pair to operate on
0351  * @port_num:   port num to which the connection is bound
0352  * @sg:     scatterlist to READ/WRITE from/to
0353  * @sg_cnt: number of entries in @sg
0354  * @prot_sg:    scatterlist to READ/WRITE protection information from/to
0355  * @prot_sg_cnt: number of entries in @prot_sg
0356  * @sig_attrs:  signature offloading algorithms
0357  * @remote_addr:remote address to read/write (relative to @rkey)
0358  * @rkey:   remote key to operate on
0359  * @dir:    %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
0360  *
0361  * Returns the number of WQEs that will be needed on the workqueue if
0362  * successful, or a negative error code.
0363  */
0364 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0365         u32 port_num, struct scatterlist *sg, u32 sg_cnt,
0366         struct scatterlist *prot_sg, u32 prot_sg_cnt,
0367         struct ib_sig_attrs *sig_attrs,
0368         u64 remote_addr, u32 rkey, enum dma_data_direction dir)
0369 {
0370     struct ib_device *dev = qp->pd->device;
0371     u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
0372                             qp->integrity_en);
0373     struct sg_table sgt = {
0374         .sgl = sg,
0375         .orig_nents = sg_cnt,
0376     };
0377     struct sg_table prot_sgt = {
0378         .sgl = prot_sg,
0379         .orig_nents = prot_sg_cnt,
0380     };
0381     struct ib_rdma_wr *rdma_wr;
0382     int count = 0, ret;
0383 
0384     if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
0385         pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n",
0386                sg_cnt, prot_sg_cnt, pages_per_mr);
0387         return -EINVAL;
0388     }
0389 
0390     ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
0391     if (ret)
0392         return ret;
0393 
0394     if (prot_sg_cnt) {
0395         ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0);
0396         if (ret)
0397             goto out_unmap_sg;
0398     }
0399 
0400     ctx->type = RDMA_RW_SIG_MR;
0401     ctx->nr_ops = 1;
0402     ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
0403     if (!ctx->reg) {
0404         ret = -ENOMEM;
0405         goto out_unmap_prot_sg;
0406     }
0407 
0408     ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
0409     if (!ctx->reg->mr) {
0410         ret = -EAGAIN;
0411         goto out_free_ctx;
0412     }
0413 
0414     count += rdma_rw_inv_key(ctx->reg);
0415 
0416     memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
0417 
0418     ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg,
0419                   prot_sgt.nents, NULL, SZ_4K);
0420     if (unlikely(ret)) {
0421         pr_err("failed to map PI sg (%u)\n",
0422                sgt.nents + prot_sgt.nents);
0423         goto out_destroy_sig_mr;
0424     }
0425 
0426     ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
0427     ctx->reg->reg_wr.wr.wr_cqe = NULL;
0428     ctx->reg->reg_wr.wr.num_sge = 0;
0429     ctx->reg->reg_wr.wr.send_flags = 0;
0430     ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
0431     if (rdma_protocol_iwarp(qp->device, port_num))
0432         ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
0433     ctx->reg->reg_wr.mr = ctx->reg->mr;
0434     ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
0435     count++;
0436 
0437     ctx->reg->sge.addr = ctx->reg->mr->iova;
0438     ctx->reg->sge.length = ctx->reg->mr->length;
0439     if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
0440         ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
0441 
0442     rdma_wr = &ctx->reg->wr;
0443     rdma_wr->wr.sg_list = &ctx->reg->sge;
0444     rdma_wr->wr.num_sge = 1;
0445     rdma_wr->remote_addr = remote_addr;
0446     rdma_wr->rkey = rkey;
0447     if (dir == DMA_TO_DEVICE)
0448         rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
0449     else
0450         rdma_wr->wr.opcode = IB_WR_RDMA_READ;
0451     ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
0452     count++;
0453 
0454     return count;
0455 
0456 out_destroy_sig_mr:
0457     ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
0458 out_free_ctx:
0459     kfree(ctx->reg);
0460 out_unmap_prot_sg:
0461     if (prot_sgt.nents)
0462         ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0);
0463 out_unmap_sg:
0464     ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
0465     return ret;
0466 }
0467 EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
0468 
0469 /*
0470  * Now that we are going to post the WRs we can update the lkey and need_inval
0471  * state on the MRs.  If we were doing this at init time, we would get double
0472  * or missing invalidations if a context was initialized but not actually
0473  * posted.
0474  */
0475 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
0476 {
0477     reg->mr->need_inval = need_inval;
0478     ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
0479     reg->reg_wr.key = reg->mr->lkey;
0480     reg->sge.lkey = reg->mr->lkey;
0481 }
0482 
0483 /**
0484  * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
0485  * @ctx:    context to operate on
0486  * @qp:     queue pair to operate on
0487  * @port_num:   port num to which the connection is bound
0488  * @cqe:    completion queue entry for the last WR
0489  * @chain_wr:   WR to append to the posted chain
0490  *
0491  * Return the WR chain for the set of RDMA READ/WRITE operations described by
0492  * @ctx, as well as any memory registration operations needed.  If @chain_wr
0493  * is non-NULL the WR it points to will be appended to the chain of WRs posted.
0494  * If @chain_wr is not set @cqe must be set so that the caller gets a
0495  * completion notification.
0496  */
0497 struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0498         u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
0499 {
0500     struct ib_send_wr *first_wr, *last_wr;
0501     int i;
0502 
0503     switch (ctx->type) {
0504     case RDMA_RW_SIG_MR:
0505     case RDMA_RW_MR:
0506         for (i = 0; i < ctx->nr_ops; i++) {
0507             rdma_rw_update_lkey(&ctx->reg[i],
0508                 ctx->reg[i].wr.wr.opcode !=
0509                     IB_WR_RDMA_READ_WITH_INV);
0510         }
0511 
0512         if (ctx->reg[0].inv_wr.next)
0513             first_wr = &ctx->reg[0].inv_wr;
0514         else
0515             first_wr = &ctx->reg[0].reg_wr.wr;
0516         last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
0517         break;
0518     case RDMA_RW_MULTI_WR:
0519         first_wr = &ctx->map.wrs[0].wr;
0520         last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
0521         break;
0522     case RDMA_RW_SINGLE_WR:
0523         first_wr = &ctx->single.wr.wr;
0524         last_wr = &ctx->single.wr.wr;
0525         break;
0526     default:
0527         BUG();
0528     }
0529 
0530     if (chain_wr) {
0531         last_wr->next = chain_wr;
0532     } else {
0533         last_wr->wr_cqe = cqe;
0534         last_wr->send_flags |= IB_SEND_SIGNALED;
0535     }
0536 
0537     return first_wr;
0538 }
0539 EXPORT_SYMBOL(rdma_rw_ctx_wrs);
0540 
0541 /**
0542  * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
0543  * @ctx:    context to operate on
0544  * @qp:     queue pair to operate on
0545  * @port_num:   port num to which the connection is bound
0546  * @cqe:    completion queue entry for the last WR
0547  * @chain_wr:   WR to append to the posted chain
0548  *
0549  * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
0550  * any memory registration operations needed.  If @chain_wr is non-NULL the
0551  * WR it points to will be appended to the chain of WRs posted.  If @chain_wr
0552  * is not set @cqe must be set so that the caller gets a completion
0553  * notification.
0554  */
0555 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
0556         struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
0557 {
0558     struct ib_send_wr *first_wr;
0559 
0560     first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
0561     return ib_post_send(qp, first_wr, NULL);
0562 }
0563 EXPORT_SYMBOL(rdma_rw_ctx_post);
0564 
0565 /**
0566  * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
0567  * @ctx:    context to release
0568  * @qp:     queue pair to operate on
0569  * @port_num:   port num to which the connection is bound
0570  * @sg:     scatterlist that was used for the READ/WRITE
0571  * @sg_cnt: number of entries in @sg
0572  * @dir:    %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
0573  */
0574 void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0575              u32 port_num, struct scatterlist *sg, u32 sg_cnt,
0576              enum dma_data_direction dir)
0577 {
0578     int i;
0579 
0580     switch (ctx->type) {
0581     case RDMA_RW_MR:
0582         for (i = 0; i < ctx->nr_ops; i++)
0583             ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
0584         kfree(ctx->reg);
0585         break;
0586     case RDMA_RW_MULTI_WR:
0587         kfree(ctx->map.wrs);
0588         kfree(ctx->map.sges);
0589         break;
0590     case RDMA_RW_SINGLE_WR:
0591         break;
0592     default:
0593         BUG();
0594         break;
0595     }
0596 
0597     ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
0598 }
0599 EXPORT_SYMBOL(rdma_rw_ctx_destroy);
0600 
0601 /**
0602  * rdma_rw_ctx_destroy_signature - release all resources allocated by
0603  *  rdma_rw_ctx_signature_init
0604  * @ctx:    context to release
0605  * @qp:     queue pair to operate on
0606  * @port_num:   port num to which the connection is bound
0607  * @sg:     scatterlist that was used for the READ/WRITE
0608  * @sg_cnt: number of entries in @sg
0609  * @prot_sg:    scatterlist that was used for the READ/WRITE of the PI
0610  * @prot_sg_cnt: number of entries in @prot_sg
0611  * @dir:    %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
0612  */
0613 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
0614         u32 port_num, struct scatterlist *sg, u32 sg_cnt,
0615         struct scatterlist *prot_sg, u32 prot_sg_cnt,
0616         enum dma_data_direction dir)
0617 {
0618     if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
0619         return;
0620 
0621     ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
0622     kfree(ctx->reg);
0623 
0624     if (prot_sg_cnt)
0625         ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
0626     ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
0627 }
0628 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
0629 
0630 /**
0631  * rdma_rw_mr_factor - return number of MRs required for a payload
0632  * @device: device handling the connection
0633  * @port_num:   port num to which the connection is bound
0634  * @maxpages:   maximum payload pages per rdma_rw_ctx
0635  *
0636  * Returns the number of MRs the device requires to move @maxpayload
0637  * bytes. The returned value is used during transport creation to
0638  * compute max_rdma_ctxts and the size of the transport's Send and
0639  * Send Completion Queues.
0640  */
0641 unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
0642                    unsigned int maxpages)
0643 {
0644     unsigned int mr_pages;
0645 
0646     if (rdma_rw_can_use_mr(device, port_num))
0647         mr_pages = rdma_rw_fr_page_list_len(device, false);
0648     else
0649         mr_pages = device->attrs.max_sge_rd;
0650     return DIV_ROUND_UP(maxpages, mr_pages);
0651 }
0652 EXPORT_SYMBOL(rdma_rw_mr_factor);
0653 
0654 void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
0655 {
0656     u32 factor;
0657 
0658     WARN_ON_ONCE(attr->port_num == 0);
0659 
0660     /*
0661      * Each context needs at least one RDMA READ or WRITE WR.
0662      *
0663      * For some hardware we might need more, eventually we should ask the
0664      * HCA driver for a multiplier here.
0665      */
0666     factor = 1;
0667 
0668     /*
0669      * If the devices needs MRs to perform RDMA READ or WRITE operations,
0670      * we'll need two additional MRs for the registrations and the
0671      * invalidation.
0672      */
0673     if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
0674         rdma_rw_can_use_mr(dev, attr->port_num))
0675         factor += 2;    /* inv + reg */
0676 
0677     attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
0678 
0679     /*
0680      * But maybe we were just too high in the sky and the device doesn't
0681      * even support all we need, and we'll have to live with what we get..
0682      */
0683     attr->cap.max_send_wr =
0684         min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
0685 }
0686 
0687 int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
0688 {
0689     struct ib_device *dev = qp->pd->device;
0690     u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
0691     int ret = 0;
0692 
0693     if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
0694         nr_sig_mrs = attr->cap.max_rdma_ctxs;
0695         nr_mrs = attr->cap.max_rdma_ctxs;
0696         max_num_sg = rdma_rw_fr_page_list_len(dev, true);
0697     } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
0698         nr_mrs = attr->cap.max_rdma_ctxs;
0699         max_num_sg = rdma_rw_fr_page_list_len(dev, false);
0700     }
0701 
0702     if (nr_mrs) {
0703         ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
0704                 IB_MR_TYPE_MEM_REG,
0705                 max_num_sg, 0);
0706         if (ret) {
0707             pr_err("%s: failed to allocated %u MRs\n",
0708                 __func__, nr_mrs);
0709             return ret;
0710         }
0711     }
0712 
0713     if (nr_sig_mrs) {
0714         ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
0715                 IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
0716         if (ret) {
0717             pr_err("%s: failed to allocated %u SIG MRs\n",
0718                 __func__, nr_sig_mrs);
0719             goto out_free_rdma_mrs;
0720         }
0721     }
0722 
0723     return 0;
0724 
0725 out_free_rdma_mrs:
0726     ib_mr_pool_destroy(qp, &qp->rdma_mrs);
0727     return ret;
0728 }
0729 
0730 void rdma_rw_cleanup_mrs(struct ib_qp *qp)
0731 {
0732     ib_mr_pool_destroy(qp, &qp->sig_mrs);
0733     ib_mr_pool_destroy(qp, &qp->rdma_mrs);
0734 }