Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
0003 
0004 #include <rdma/ib_umem_odp.h>
0005 #include "mlx5_ib.h"
0006 #include "umr.h"
0007 #include "wr.h"
0008 
0009 /*
0010  * We can't use an array for xlt_emergency_page because dma_map_single doesn't
0011  * work on kernel modules memory
0012  */
0013 void *xlt_emergency_page;
0014 static DEFINE_MUTEX(xlt_emergency_page_mutex);
0015 
0016 static __be64 get_umr_enable_mr_mask(void)
0017 {
0018     u64 result;
0019 
0020     result = MLX5_MKEY_MASK_KEY |
0021          MLX5_MKEY_MASK_FREE;
0022 
0023     return cpu_to_be64(result);
0024 }
0025 
0026 static __be64 get_umr_disable_mr_mask(void)
0027 {
0028     u64 result;
0029 
0030     result = MLX5_MKEY_MASK_FREE;
0031 
0032     return cpu_to_be64(result);
0033 }
0034 
0035 static __be64 get_umr_update_translation_mask(void)
0036 {
0037     u64 result;
0038 
0039     result = MLX5_MKEY_MASK_LEN |
0040          MLX5_MKEY_MASK_PAGE_SIZE |
0041          MLX5_MKEY_MASK_START_ADDR;
0042 
0043     return cpu_to_be64(result);
0044 }
0045 
0046 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
0047 {
0048     u64 result;
0049 
0050     result = MLX5_MKEY_MASK_LR |
0051          MLX5_MKEY_MASK_LW |
0052          MLX5_MKEY_MASK_RR |
0053          MLX5_MKEY_MASK_RW;
0054 
0055     if (MLX5_CAP_GEN(dev->mdev, atomic))
0056         result |= MLX5_MKEY_MASK_A;
0057 
0058     if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
0059         result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
0060 
0061     if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
0062         result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
0063 
0064     return cpu_to_be64(result);
0065 }
0066 
0067 static __be64 get_umr_update_pd_mask(void)
0068 {
0069     u64 result;
0070 
0071     result = MLX5_MKEY_MASK_PD;
0072 
0073     return cpu_to_be64(result);
0074 }
0075 
0076 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
0077 {
0078     if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
0079         MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
0080         return -EPERM;
0081 
0082     if (mask & MLX5_MKEY_MASK_A &&
0083         MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
0084         return -EPERM;
0085 
0086     if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
0087         !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
0088         return -EPERM;
0089 
0090     if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
0091         !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
0092         return -EPERM;
0093 
0094     return 0;
0095 }
0096 
0097 enum {
0098     MAX_UMR_WR = 128,
0099 };
0100 
0101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
0102 {
0103     struct ib_qp_attr attr = {};
0104     int ret;
0105 
0106     attr.qp_state = IB_QPS_INIT;
0107     attr.port_num = 1;
0108     ret = ib_modify_qp(qp, &attr,
0109                IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
0110     if (ret) {
0111         mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
0112         return ret;
0113     }
0114 
0115     memset(&attr, 0, sizeof(attr));
0116     attr.qp_state = IB_QPS_RTR;
0117 
0118     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
0119     if (ret) {
0120         mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
0121         return ret;
0122     }
0123 
0124     memset(&attr, 0, sizeof(attr));
0125     attr.qp_state = IB_QPS_RTS;
0126     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
0127     if (ret) {
0128         mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
0129         return ret;
0130     }
0131 
0132     return 0;
0133 }
0134 
0135 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
0136 {
0137     struct ib_qp_init_attr init_attr = {};
0138     struct ib_pd *pd;
0139     struct ib_cq *cq;
0140     struct ib_qp *qp;
0141     int ret;
0142 
0143     pd = ib_alloc_pd(&dev->ib_dev, 0);
0144     if (IS_ERR(pd)) {
0145         mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
0146         return PTR_ERR(pd);
0147     }
0148 
0149     cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
0150     if (IS_ERR(cq)) {
0151         mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
0152         ret = PTR_ERR(cq);
0153         goto destroy_pd;
0154     }
0155 
0156     init_attr.send_cq = cq;
0157     init_attr.recv_cq = cq;
0158     init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
0159     init_attr.cap.max_send_wr = MAX_UMR_WR;
0160     init_attr.cap.max_send_sge = 1;
0161     init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
0162     init_attr.port_num = 1;
0163     qp = ib_create_qp(pd, &init_attr);
0164     if (IS_ERR(qp)) {
0165         mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
0166         ret = PTR_ERR(qp);
0167         goto destroy_cq;
0168     }
0169 
0170     ret = mlx5r_umr_qp_rst2rts(dev, qp);
0171     if (ret)
0172         goto destroy_qp;
0173 
0174     dev->umrc.qp = qp;
0175     dev->umrc.cq = cq;
0176     dev->umrc.pd = pd;
0177 
0178     sema_init(&dev->umrc.sem, MAX_UMR_WR);
0179     mutex_init(&dev->umrc.lock);
0180     dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
0181 
0182     return 0;
0183 
0184 destroy_qp:
0185     ib_destroy_qp(qp);
0186 destroy_cq:
0187     ib_free_cq(cq);
0188 destroy_pd:
0189     ib_dealloc_pd(pd);
0190     return ret;
0191 }
0192 
0193 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
0194 {
0195     if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
0196         return;
0197     ib_destroy_qp(dev->umrc.qp);
0198     ib_free_cq(dev->umrc.cq);
0199     ib_dealloc_pd(dev->umrc.pd);
0200 }
0201 
0202 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
0203 {
0204     struct umr_common *umrc = &dev->umrc;
0205     struct ib_qp_attr attr;
0206     int err;
0207 
0208     attr.qp_state = IB_QPS_RESET;
0209     err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
0210     if (err) {
0211         mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
0212         goto err;
0213     }
0214 
0215     err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
0216     if (err)
0217         goto err;
0218 
0219     umrc->state = MLX5_UMR_STATE_ACTIVE;
0220     return 0;
0221 
0222 err:
0223     umrc->state = MLX5_UMR_STATE_ERR;
0224     return err;
0225 }
0226 
0227 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
0228                    struct mlx5r_umr_wqe *wqe, bool with_data)
0229 {
0230     unsigned int wqe_size =
0231         with_data ? sizeof(struct mlx5r_umr_wqe) :
0232                 sizeof(struct mlx5r_umr_wqe) -
0233                     sizeof(struct mlx5_wqe_data_seg);
0234     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
0235     struct mlx5_core_dev *mdev = dev->mdev;
0236     struct mlx5_ib_qp *qp = to_mqp(ibqp);
0237     struct mlx5_wqe_ctrl_seg *ctrl;
0238     union {
0239         struct ib_cqe *ib_cqe;
0240         u64 wr_id;
0241     } id;
0242     void *cur_edge, *seg;
0243     unsigned long flags;
0244     unsigned int idx;
0245     int size, err;
0246 
0247     if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
0248         return -EIO;
0249 
0250     spin_lock_irqsave(&qp->sq.lock, flags);
0251 
0252     err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
0253                   cpu_to_be32(mkey), false, false);
0254     if (WARN_ON(err))
0255         goto out;
0256 
0257     qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
0258 
0259     mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
0260 
0261     id.ib_cqe = cqe;
0262     mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
0263              MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
0264 
0265     mlx5r_ring_db(qp, 1, ctrl);
0266 
0267 out:
0268     spin_unlock_irqrestore(&qp->sq.lock, flags);
0269 
0270     return err;
0271 }
0272 
0273 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
0274 {
0275     struct mlx5_ib_umr_context *context =
0276         container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
0277 
0278     context->status = wc->status;
0279     complete(&context->done);
0280 }
0281 
0282 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
0283 {
0284     context->cqe.done = mlx5r_umr_done;
0285     init_completion(&context->done);
0286 }
0287 
0288 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
0289                    struct mlx5r_umr_wqe *wqe, bool with_data)
0290 {
0291     struct umr_common *umrc = &dev->umrc;
0292     struct mlx5r_umr_context umr_context;
0293     int err;
0294 
0295     err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
0296     if (WARN_ON(err))
0297         return err;
0298 
0299     mlx5r_umr_init_context(&umr_context);
0300 
0301     down(&umrc->sem);
0302     while (true) {
0303         mutex_lock(&umrc->lock);
0304         if (umrc->state == MLX5_UMR_STATE_ERR) {
0305             mutex_unlock(&umrc->lock);
0306             err = -EFAULT;
0307             break;
0308         }
0309 
0310         if (umrc->state == MLX5_UMR_STATE_RECOVER) {
0311             mutex_unlock(&umrc->lock);
0312             usleep_range(3000, 5000);
0313             continue;
0314         }
0315 
0316         err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
0317                       with_data);
0318         mutex_unlock(&umrc->lock);
0319         if (err) {
0320             mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
0321                      err);
0322             break;
0323         }
0324 
0325         wait_for_completion(&umr_context.done);
0326 
0327         if (umr_context.status == IB_WC_SUCCESS)
0328             break;
0329 
0330         if (umr_context.status == IB_WC_WR_FLUSH_ERR)
0331             continue;
0332 
0333         WARN_ON_ONCE(1);
0334         mlx5_ib_warn(dev,
0335             "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
0336             umr_context.status);
0337         mutex_lock(&umrc->lock);
0338         err = mlx5r_umr_recover(dev);
0339         mutex_unlock(&umrc->lock);
0340         if (err)
0341             mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
0342                      err);
0343         err = -EFAULT;
0344         break;
0345     }
0346     up(&umrc->sem);
0347     return err;
0348 }
0349 
0350 /**
0351  * mlx5r_umr_revoke_mr - Fence all DMA on the MR
0352  * @mr: The MR to fence
0353  *
0354  * Upon return the NIC will not be doing any DMA to the pages under the MR,
0355  * and any DMA in progress will be completed. Failure of this function
0356  * indicates the HW has failed catastrophically.
0357  */
0358 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
0359 {
0360     struct mlx5_ib_dev *dev = mr_to_mdev(mr);
0361     struct mlx5r_umr_wqe wqe = {};
0362 
0363     if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
0364         return 0;
0365 
0366     wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
0367     wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
0368     wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
0369 
0370     MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
0371     MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
0372     MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
0373     MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
0374          mlx5_mkey_variant(mr->mmkey.key));
0375 
0376     return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
0377 }
0378 
0379 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
0380                        struct mlx5_mkey_seg *seg,
0381                        unsigned int access_flags)
0382 {
0383     MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
0384     MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
0385     MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
0386     MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
0387     MLX5_SET(mkc, seg, lr, 1);
0388     MLX5_SET(mkc, seg, relaxed_ordering_write,
0389          !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
0390     MLX5_SET(mkc, seg, relaxed_ordering_read,
0391          !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
0392 }
0393 
0394 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
0395                   int access_flags)
0396 {
0397     struct mlx5_ib_dev *dev = mr_to_mdev(mr);
0398     struct mlx5r_umr_wqe wqe = {};
0399     int err;
0400 
0401     wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
0402     wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
0403     wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
0404     wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
0405 
0406     mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
0407     MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
0408     MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
0409     MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
0410          mlx5_mkey_variant(mr->mmkey.key));
0411 
0412     err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
0413     if (err)
0414         return err;
0415 
0416     mr->access_flags = access_flags;
0417     return 0;
0418 }
0419 
0420 #define MLX5_MAX_UMR_CHUNK                                                     \
0421     ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
0422 #define MLX5_SPARE_UMR_CHUNK 0x10000
0423 
0424 /*
0425  * Allocate a temporary buffer to hold the per-page information to transfer to
0426  * HW. For efficiency this should be as large as it can be, but buffer
0427  * allocation failure is not allowed, so try smaller sizes.
0428  */
0429 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
0430 {
0431     const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
0432     size_t size;
0433     void *res = NULL;
0434 
0435     static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
0436 
0437     /*
0438      * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
0439      * allocation can't trigger any kind of reclaim.
0440      */
0441     might_sleep();
0442 
0443     gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
0444 
0445     /*
0446      * If the system already has a suitable high order page then just use
0447      * that, but don't try hard to create one. This max is about 1M, so a
0448      * free x86 huge page will satisfy it.
0449      */
0450     size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
0451              MLX5_MAX_UMR_CHUNK);
0452     *nents = size / ent_size;
0453     res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
0454                        get_order(size));
0455     if (res)
0456         return res;
0457 
0458     if (size > MLX5_SPARE_UMR_CHUNK) {
0459         size = MLX5_SPARE_UMR_CHUNK;
0460         *nents = size / ent_size;
0461         res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
0462                            get_order(size));
0463         if (res)
0464             return res;
0465     }
0466 
0467     *nents = PAGE_SIZE / ent_size;
0468     res = (void *)__get_free_page(gfp_mask);
0469     if (res)
0470         return res;
0471 
0472     mutex_lock(&xlt_emergency_page_mutex);
0473     memset(xlt_emergency_page, 0, PAGE_SIZE);
0474     return xlt_emergency_page;
0475 }
0476 
0477 static void mlx5r_umr_free_xlt(void *xlt, size_t length)
0478 {
0479     if (xlt == xlt_emergency_page) {
0480         mutex_unlock(&xlt_emergency_page_mutex);
0481         return;
0482     }
0483 
0484     free_pages((unsigned long)xlt, get_order(length));
0485 }
0486 
0487 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
0488                      struct ib_sge *sg)
0489 {
0490     struct device *ddev = &dev->mdev->pdev->dev;
0491 
0492     dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
0493     mlx5r_umr_free_xlt(xlt, sg->length);
0494 }
0495 
0496 /*
0497  * Create an XLT buffer ready for submission.
0498  */
0499 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
0500                   size_t nents, size_t ent_size,
0501                   unsigned int flags)
0502 {
0503     struct device *ddev = &dev->mdev->pdev->dev;
0504     dma_addr_t dma;
0505     void *xlt;
0506 
0507     xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
0508                  flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
0509                                   GFP_KERNEL);
0510     sg->length = nents * ent_size;
0511     dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
0512     if (dma_mapping_error(ddev, dma)) {
0513         mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
0514         mlx5r_umr_free_xlt(xlt, sg->length);
0515         return NULL;
0516     }
0517     sg->addr = dma;
0518     sg->lkey = dev->umrc.pd->local_dma_lkey;
0519 
0520     return xlt;
0521 }
0522 
0523 static void
0524 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
0525                   unsigned int flags, struct ib_sge *sg)
0526 {
0527     if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
0528         /* fail if free */
0529         ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
0530     else
0531         /* fail if not free */
0532         ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
0533     ctrl_seg->xlt_octowords =
0534         cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
0535 }
0536 
0537 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
0538                           struct mlx5_mkey_seg *mkey_seg,
0539                           struct mlx5_ib_mr *mr,
0540                           unsigned int page_shift)
0541 {
0542     mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
0543     MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
0544     MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
0545     MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
0546     MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
0547     MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
0548     MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
0549 }
0550 
0551 static void
0552 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
0553                   struct ib_sge *sg)
0554 {
0555     data_seg->byte_count = cpu_to_be32(sg->length);
0556     data_seg->lkey = cpu_to_be32(sg->lkey);
0557     data_seg->addr = cpu_to_be64(sg->addr);
0558 }
0559 
0560 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
0561                     u64 offset)
0562 {
0563     u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
0564 
0565     ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
0566     ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
0567     ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
0568 }
0569 
0570 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
0571                        struct mlx5r_umr_wqe *wqe,
0572                        struct mlx5_ib_mr *mr, struct ib_sge *sg,
0573                        unsigned int flags)
0574 {
0575     bool update_pd_access, update_translation;
0576 
0577     if (flags & MLX5_IB_UPD_XLT_ENABLE)
0578         wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
0579 
0580     update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
0581                flags & MLX5_IB_UPD_XLT_PD ||
0582                flags & MLX5_IB_UPD_XLT_ACCESS;
0583 
0584     if (update_pd_access) {
0585         wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
0586         wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
0587     }
0588 
0589     update_translation =
0590         flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
0591 
0592     if (update_translation) {
0593         wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
0594         if (!mr->ibmr.length)
0595             MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
0596     }
0597 
0598     wqe->ctrl_seg.xlt_octowords =
0599         cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
0600     wqe->data_seg.byte_count = cpu_to_be32(sg->length);
0601 }
0602 
0603 /*
0604  * Send the DMA list to the HW for a normal MR using UMR.
0605  * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
0606  * flag may be used.
0607  */
0608 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
0609 {
0610     struct mlx5_ib_dev *dev = mr_to_mdev(mr);
0611     struct device *ddev = &dev->mdev->pdev->dev;
0612     struct mlx5r_umr_wqe wqe = {};
0613     struct ib_block_iter biter;
0614     struct mlx5_mtt *cur_mtt;
0615     size_t orig_sg_length;
0616     struct mlx5_mtt *mtt;
0617     size_t final_size;
0618     struct ib_sge sg;
0619     u64 offset = 0;
0620     int err = 0;
0621 
0622     if (WARN_ON(mr->umem->is_odp))
0623         return -EINVAL;
0624 
0625     mtt = mlx5r_umr_create_xlt(
0626         dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
0627         sizeof(*mtt), flags);
0628     if (!mtt)
0629         return -ENOMEM;
0630 
0631     orig_sg_length = sg.length;
0632 
0633     mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
0634     mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
0635                       mr->page_shift);
0636     mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
0637 
0638     cur_mtt = mtt;
0639     rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
0640                 mr->umem->sgt_append.sgt.nents,
0641                 BIT(mr->page_shift)) {
0642         if (cur_mtt == (void *)mtt + sg.length) {
0643             dma_sync_single_for_device(ddev, sg.addr, sg.length,
0644                            DMA_TO_DEVICE);
0645 
0646             err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
0647                                true);
0648             if (err)
0649                 goto err;
0650             dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
0651                         DMA_TO_DEVICE);
0652             offset += sg.length;
0653             mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
0654 
0655             cur_mtt = mtt;
0656         }
0657 
0658         cur_mtt->ptag =
0659             cpu_to_be64(rdma_block_iter_dma_address(&biter) |
0660                     MLX5_IB_MTT_PRESENT);
0661 
0662         if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
0663             cur_mtt->ptag = 0;
0664 
0665         cur_mtt++;
0666     }
0667 
0668     final_size = (void *)cur_mtt - (void *)mtt;
0669     sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
0670     memset(cur_mtt, 0, sg.length - final_size);
0671     mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
0672 
0673     dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
0674     err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
0675 
0676 err:
0677     sg.length = orig_sg_length;
0678     mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
0679     return err;
0680 }
0681 
0682 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
0683 {
0684     return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
0685 }
0686 
0687 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
0688              int page_shift, int flags)
0689 {
0690     int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
0691                    ? sizeof(struct mlx5_klm)
0692                    : sizeof(struct mlx5_mtt);
0693     const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
0694     struct mlx5_ib_dev *dev = mr_to_mdev(mr);
0695     struct device *ddev = &dev->mdev->pdev->dev;
0696     const int page_mask = page_align - 1;
0697     struct mlx5r_umr_wqe wqe = {};
0698     size_t pages_mapped = 0;
0699     size_t pages_to_map = 0;
0700     size_t size_to_map = 0;
0701     size_t orig_sg_length;
0702     size_t pages_iter;
0703     struct ib_sge sg;
0704     int err = 0;
0705     void *xlt;
0706 
0707     if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
0708         !umr_can_use_indirect_mkey(dev))
0709         return -EPERM;
0710 
0711     if (WARN_ON(!mr->umem->is_odp))
0712         return -EINVAL;
0713 
0714     /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
0715      * so we need to align the offset and length accordingly
0716      */
0717     if (idx & page_mask) {
0718         npages += idx & page_mask;
0719         idx &= ~page_mask;
0720     }
0721     pages_to_map = ALIGN(npages, page_align);
0722 
0723     xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
0724     if (!xlt)
0725         return -ENOMEM;
0726 
0727     pages_iter = sg.length / desc_size;
0728     orig_sg_length = sg.length;
0729 
0730     if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
0731         struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
0732         size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
0733 
0734         pages_to_map = min_t(size_t, pages_to_map, max_pages);
0735     }
0736 
0737     mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
0738     mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
0739     mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
0740 
0741     for (pages_mapped = 0;
0742          pages_mapped < pages_to_map && !err;
0743          pages_mapped += pages_iter, idx += pages_iter) {
0744         npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
0745         size_to_map = npages * desc_size;
0746         dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
0747                     DMA_TO_DEVICE);
0748         mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
0749         dma_sync_single_for_device(ddev, sg.addr, sg.length,
0750                        DMA_TO_DEVICE);
0751         sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
0752 
0753         if (pages_mapped + pages_iter >= pages_to_map)
0754             mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
0755         mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
0756         err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
0757     }
0758     sg.length = orig_sg_length;
0759     mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
0760     return err;
0761 }