Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
0003  * Copyright (c) 2020, Intel Corporation. All rights reserved.
0004  *
0005  * This software is available to you under a choice of one of two
0006  * licenses.  You may choose to be licensed under the terms of the GNU
0007  * General Public License (GPL) Version 2, available from the file
0008  * COPYING in the main directory of this source tree, or the
0009  * OpenIB.org BSD license below:
0010  *
0011  *     Redistribution and use in source and binary forms, with or
0012  *     without modification, are permitted provided that the following
0013  *     conditions are met:
0014  *
0015  *      - Redistributions of source code must retain the above
0016  *        copyright notice, this list of conditions and the following
0017  *        disclaimer.
0018  *
0019  *      - Redistributions in binary form must reproduce the above
0020  *        copyright notice, this list of conditions and the following
0021  *        disclaimer in the documentation and/or other materials
0022  *        provided with the distribution.
0023  *
0024  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0025  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0026  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0027  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0028  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0029  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0030  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0031  * SOFTWARE.
0032  */
0033 
0034 
0035 #include <linux/kref.h>
0036 #include <linux/random.h>
0037 #include <linux/debugfs.h>
0038 #include <linux/export.h>
0039 #include <linux/delay.h>
0040 #include <linux/dma-buf.h>
0041 #include <linux/dma-resv.h>
0042 #include <rdma/ib_umem.h>
0043 #include <rdma/ib_umem_odp.h>
0044 #include <rdma/ib_verbs.h>
0045 #include "dm.h"
0046 #include "mlx5_ib.h"
0047 #include "umr.h"
0048 
0049 enum {
0050     MAX_PENDING_REG_MR = 8,
0051 };
0052 
0053 #define MLX5_UMR_ALIGN 2048
0054 
0055 static void
0056 create_mkey_callback(int status, struct mlx5_async_work *context);
0057 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
0058                      u64 iova, int access_flags,
0059                      unsigned int page_size, bool populate);
0060 
0061 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
0062                       struct ib_pd *pd)
0063 {
0064     struct mlx5_ib_dev *dev = to_mdev(pd->device);
0065 
0066     MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
0067     MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
0068     MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
0069     MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
0070     MLX5_SET(mkc, mkc, lr, 1);
0071 
0072     if ((acc & IB_ACCESS_RELAXED_ORDERING) &&
0073         pcie_relaxed_ordering_enabled(dev->mdev->pdev)) {
0074         if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
0075             MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
0076         if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
0077             MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
0078     }
0079 
0080     MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
0081     MLX5_SET(mkc, mkc, qpn, 0xffffff);
0082     MLX5_SET64(mkc, mkc, start_addr, start_addr);
0083 }
0084 
0085 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
0086 {
0087     u8 key = atomic_inc_return(&dev->mkey_var);
0088     void *mkc;
0089 
0090     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0091     MLX5_SET(mkc, mkc, mkey_7_0, key);
0092     *mkey = key;
0093 }
0094 
0095 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
0096                    struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
0097 {
0098     int ret;
0099 
0100     assign_mkey_variant(dev, &mkey->key, in);
0101     ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
0102     if (!ret)
0103         init_waitqueue_head(&mkey->wait);
0104 
0105     return ret;
0106 }
0107 
0108 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
0109 {
0110     struct mlx5_ib_dev *dev = async_create->ent->dev;
0111     size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0112     size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
0113 
0114     MLX5_SET(create_mkey_in, async_create->in, opcode,
0115          MLX5_CMD_OP_CREATE_MKEY);
0116     assign_mkey_variant(dev, &async_create->mkey, async_create->in);
0117     return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
0118                 async_create->out, outlen, create_mkey_callback,
0119                 &async_create->cb_work);
0120 }
0121 
0122 static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
0123 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
0124 
0125 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
0126 {
0127     WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
0128 
0129     return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
0130 }
0131 
0132 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
0133 {
0134     if (status == -ENXIO) /* core driver is not available */
0135         return;
0136 
0137     mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
0138     if (status != -EREMOTEIO) /* driver specific failure */
0139         return;
0140 
0141     /* Failed in FW, print cmd out failure details */
0142     mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
0143 }
0144 
0145 
0146 static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
0147              void *to_store)
0148 {
0149     XA_STATE(xas, &ent->mkeys, 0);
0150     void *curr;
0151 
0152     xa_lock_irq(&ent->mkeys);
0153     if (limit_pendings &&
0154         (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
0155         xa_unlock_irq(&ent->mkeys);
0156         return -EAGAIN;
0157     }
0158     while (1) {
0159         /*
0160          * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
0161          * doesn't transparently unlock. Instead we set the xas index to
0162          * the current value of reserved every iteration.
0163          */
0164         xas_set(&xas, ent->reserved);
0165         curr = xas_load(&xas);
0166         if (!curr) {
0167             if (to_store && ent->stored == ent->reserved)
0168                 xas_store(&xas, to_store);
0169             else
0170                 xas_store(&xas, XA_ZERO_ENTRY);
0171             if (xas_valid(&xas)) {
0172                 ent->reserved++;
0173                 if (to_store) {
0174                     if (ent->stored != ent->reserved)
0175                         __xa_store(&ent->mkeys,
0176                                ent->stored,
0177                                to_store,
0178                                GFP_KERNEL);
0179                     ent->stored++;
0180                     queue_adjust_cache_locked(ent);
0181                     WRITE_ONCE(ent->dev->cache.last_add,
0182                            jiffies);
0183                 }
0184             }
0185         }
0186         xa_unlock_irq(&ent->mkeys);
0187 
0188         /*
0189          * Notice xas_nomem() must always be called as it cleans
0190          * up any cached allocation.
0191          */
0192         if (!xas_nomem(&xas, GFP_KERNEL))
0193             break;
0194         xa_lock_irq(&ent->mkeys);
0195     }
0196     if (xas_error(&xas))
0197         return xas_error(&xas);
0198     if (WARN_ON(curr))
0199         return -EINVAL;
0200     return 0;
0201 }
0202 
0203 static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
0204 {
0205     void *old;
0206 
0207     ent->reserved--;
0208     old = __xa_erase(&ent->mkeys, ent->reserved);
0209     WARN_ON(old);
0210 }
0211 
0212 static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
0213 {
0214     void *old;
0215 
0216     old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
0217     WARN_ON(old);
0218     ent->stored++;
0219 }
0220 
0221 static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
0222 {
0223     void *old, *xa_mkey;
0224 
0225     ent->stored--;
0226     ent->reserved--;
0227 
0228     if (ent->stored == ent->reserved) {
0229         xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
0230         WARN_ON(!xa_mkey);
0231         return (u32)xa_to_value(xa_mkey);
0232     }
0233 
0234     xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
0235                  GFP_KERNEL);
0236     WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
0237     old = __xa_erase(&ent->mkeys, ent->reserved);
0238     WARN_ON(old);
0239     return (u32)xa_to_value(xa_mkey);
0240 }
0241 
0242 static void create_mkey_callback(int status, struct mlx5_async_work *context)
0243 {
0244     struct mlx5r_async_create_mkey *mkey_out =
0245         container_of(context, struct mlx5r_async_create_mkey, cb_work);
0246     struct mlx5_cache_ent *ent = mkey_out->ent;
0247     struct mlx5_ib_dev *dev = ent->dev;
0248     unsigned long flags;
0249 
0250     if (status) {
0251         create_mkey_warn(dev, status, mkey_out->out);
0252         kfree(mkey_out);
0253         xa_lock_irqsave(&ent->mkeys, flags);
0254         undo_push_reserve_mkey(ent);
0255         WRITE_ONCE(dev->fill_delay, 1);
0256         xa_unlock_irqrestore(&ent->mkeys, flags);
0257         mod_timer(&dev->delay_timer, jiffies + HZ);
0258         return;
0259     }
0260 
0261     mkey_out->mkey |= mlx5_idx_to_mkey(
0262         MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
0263     WRITE_ONCE(dev->cache.last_add, jiffies);
0264 
0265     xa_lock_irqsave(&ent->mkeys, flags);
0266     push_to_reserved(ent, mkey_out->mkey);
0267     /* If we are doing fill_to_high_water then keep going. */
0268     queue_adjust_cache_locked(ent);
0269     xa_unlock_irqrestore(&ent->mkeys, flags);
0270     kfree(mkey_out);
0271 }
0272 
0273 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
0274 {
0275     int ret = 0;
0276 
0277     switch (access_mode) {
0278     case MLX5_MKC_ACCESS_MODE_MTT:
0279         ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
0280                            sizeof(struct mlx5_mtt));
0281         break;
0282     case MLX5_MKC_ACCESS_MODE_KSM:
0283         ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
0284                            sizeof(struct mlx5_klm));
0285         break;
0286     default:
0287         WARN_ON(1);
0288     }
0289     return ret;
0290 }
0291 
0292 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
0293 {
0294     set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
0295     MLX5_SET(mkc, mkc, free, 1);
0296     MLX5_SET(mkc, mkc, umr_en, 1);
0297     MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
0298     MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
0299 
0300     MLX5_SET(mkc, mkc, translations_octword_size,
0301          get_mkc_octo_size(ent->access_mode, ent->ndescs));
0302     MLX5_SET(mkc, mkc, log_page_size, ent->page);
0303 }
0304 
0305 /* Asynchronously schedule new MRs to be populated in the cache. */
0306 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
0307 {
0308     struct mlx5r_async_create_mkey *async_create;
0309     void *mkc;
0310     int err = 0;
0311     int i;
0312 
0313     for (i = 0; i < num; i++) {
0314         async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
0315                        GFP_KERNEL);
0316         if (!async_create)
0317             return -ENOMEM;
0318         mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
0319                    memory_key_mkey_entry);
0320         set_cache_mkc(ent, mkc);
0321         async_create->ent = ent;
0322 
0323         err = push_mkey(ent, true, NULL);
0324         if (err)
0325             goto free_async_create;
0326 
0327         err = mlx5_ib_create_mkey_cb(async_create);
0328         if (err) {
0329             mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
0330             goto err_undo_reserve;
0331         }
0332     }
0333 
0334     return 0;
0335 
0336 err_undo_reserve:
0337     xa_lock_irq(&ent->mkeys);
0338     undo_push_reserve_mkey(ent);
0339     xa_unlock_irq(&ent->mkeys);
0340 free_async_create:
0341     kfree(async_create);
0342     return err;
0343 }
0344 
0345 /* Synchronously create a MR in the cache */
0346 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
0347 {
0348     size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0349     void *mkc;
0350     u32 *in;
0351     int err;
0352 
0353     in = kzalloc(inlen, GFP_KERNEL);
0354     if (!in)
0355         return -ENOMEM;
0356     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0357     set_cache_mkc(ent, mkc);
0358 
0359     err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
0360     if (err)
0361         goto free_in;
0362 
0363     WRITE_ONCE(ent->dev->cache.last_add, jiffies);
0364 free_in:
0365     kfree(in);
0366     return err;
0367 }
0368 
0369 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
0370 {
0371     u32 mkey;
0372 
0373     lockdep_assert_held(&ent->mkeys.xa_lock);
0374     if (!ent->stored)
0375         return;
0376     mkey = pop_stored_mkey(ent);
0377     xa_unlock_irq(&ent->mkeys);
0378     mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
0379     xa_lock_irq(&ent->mkeys);
0380 }
0381 
0382 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
0383                 bool limit_fill)
0384      __acquires(&ent->mkeys) __releases(&ent->mkeys)
0385 {
0386     int err;
0387 
0388     lockdep_assert_held(&ent->mkeys.xa_lock);
0389 
0390     while (true) {
0391         if (limit_fill)
0392             target = ent->limit * 2;
0393         if (target == ent->reserved)
0394             return 0;
0395         if (target > ent->reserved) {
0396             u32 todo = target - ent->reserved;
0397 
0398             xa_unlock_irq(&ent->mkeys);
0399             err = add_keys(ent, todo);
0400             if (err == -EAGAIN)
0401                 usleep_range(3000, 5000);
0402             xa_lock_irq(&ent->mkeys);
0403             if (err) {
0404                 if (err != -EAGAIN)
0405                     return err;
0406             } else
0407                 return 0;
0408         } else {
0409             remove_cache_mr_locked(ent);
0410         }
0411     }
0412 }
0413 
0414 static ssize_t size_write(struct file *filp, const char __user *buf,
0415               size_t count, loff_t *pos)
0416 {
0417     struct mlx5_cache_ent *ent = filp->private_data;
0418     u32 target;
0419     int err;
0420 
0421     err = kstrtou32_from_user(buf, count, 0, &target);
0422     if (err)
0423         return err;
0424 
0425     /*
0426      * Target is the new value of total_mrs the user requests, however we
0427      * cannot free MRs that are in use. Compute the target value for stored
0428      * mkeys.
0429      */
0430     xa_lock_irq(&ent->mkeys);
0431     if (target < ent->in_use) {
0432         err = -EINVAL;
0433         goto err_unlock;
0434     }
0435     target = target - ent->in_use;
0436     if (target < ent->limit || target > ent->limit*2) {
0437         err = -EINVAL;
0438         goto err_unlock;
0439     }
0440     err = resize_available_mrs(ent, target, false);
0441     if (err)
0442         goto err_unlock;
0443     xa_unlock_irq(&ent->mkeys);
0444 
0445     return count;
0446 
0447 err_unlock:
0448     xa_unlock_irq(&ent->mkeys);
0449     return err;
0450 }
0451 
0452 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
0453              loff_t *pos)
0454 {
0455     struct mlx5_cache_ent *ent = filp->private_data;
0456     char lbuf[20];
0457     int err;
0458 
0459     err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
0460     if (err < 0)
0461         return err;
0462 
0463     return simple_read_from_buffer(buf, count, pos, lbuf, err);
0464 }
0465 
0466 static const struct file_operations size_fops = {
0467     .owner  = THIS_MODULE,
0468     .open   = simple_open,
0469     .write  = size_write,
0470     .read   = size_read,
0471 };
0472 
0473 static ssize_t limit_write(struct file *filp, const char __user *buf,
0474                size_t count, loff_t *pos)
0475 {
0476     struct mlx5_cache_ent *ent = filp->private_data;
0477     u32 var;
0478     int err;
0479 
0480     err = kstrtou32_from_user(buf, count, 0, &var);
0481     if (err)
0482         return err;
0483 
0484     /*
0485      * Upon set we immediately fill the cache to high water mark implied by
0486      * the limit.
0487      */
0488     xa_lock_irq(&ent->mkeys);
0489     ent->limit = var;
0490     err = resize_available_mrs(ent, 0, true);
0491     xa_unlock_irq(&ent->mkeys);
0492     if (err)
0493         return err;
0494     return count;
0495 }
0496 
0497 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
0498               loff_t *pos)
0499 {
0500     struct mlx5_cache_ent *ent = filp->private_data;
0501     char lbuf[20];
0502     int err;
0503 
0504     err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
0505     if (err < 0)
0506         return err;
0507 
0508     return simple_read_from_buffer(buf, count, pos, lbuf, err);
0509 }
0510 
0511 static const struct file_operations limit_fops = {
0512     .owner  = THIS_MODULE,
0513     .open   = simple_open,
0514     .write  = limit_write,
0515     .read   = limit_read,
0516 };
0517 
0518 static bool someone_adding(struct mlx5_mkey_cache *cache)
0519 {
0520     unsigned int i;
0521 
0522     for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0523         struct mlx5_cache_ent *ent = &cache->ent[i];
0524         bool ret;
0525 
0526         xa_lock_irq(&ent->mkeys);
0527         ret = ent->stored < ent->limit;
0528         xa_unlock_irq(&ent->mkeys);
0529         if (ret)
0530             return true;
0531     }
0532     return false;
0533 }
0534 
0535 /*
0536  * Check if the bucket is outside the high/low water mark and schedule an async
0537  * update. The cache refill has hysteresis, once the low water mark is hit it is
0538  * refilled up to the high mark.
0539  */
0540 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
0541 {
0542     lockdep_assert_held(&ent->mkeys.xa_lock);
0543 
0544     if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
0545         return;
0546     if (ent->stored < ent->limit) {
0547         ent->fill_to_high_water = true;
0548         mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0549     } else if (ent->fill_to_high_water &&
0550            ent->reserved < 2 * ent->limit) {
0551         /*
0552          * Once we start populating due to hitting a low water mark
0553          * continue until we pass the high water mark.
0554          */
0555         mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0556     } else if (ent->stored == 2 * ent->limit) {
0557         ent->fill_to_high_water = false;
0558     } else if (ent->stored > 2 * ent->limit) {
0559         /* Queue deletion of excess entries */
0560         ent->fill_to_high_water = false;
0561         if (ent->stored != ent->reserved)
0562             queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
0563                        msecs_to_jiffies(1000));
0564         else
0565             mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0566     }
0567 }
0568 
0569 static void __cache_work_func(struct mlx5_cache_ent *ent)
0570 {
0571     struct mlx5_ib_dev *dev = ent->dev;
0572     struct mlx5_mkey_cache *cache = &dev->cache;
0573     int err;
0574 
0575     xa_lock_irq(&ent->mkeys);
0576     if (ent->disabled)
0577         goto out;
0578 
0579     if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
0580         !READ_ONCE(dev->fill_delay)) {
0581         xa_unlock_irq(&ent->mkeys);
0582         err = add_keys(ent, 1);
0583         xa_lock_irq(&ent->mkeys);
0584         if (ent->disabled)
0585             goto out;
0586         if (err) {
0587             /*
0588              * EAGAIN only happens if there are pending MRs, so we
0589              * will be rescheduled when storing them. The only
0590              * failure path here is ENOMEM.
0591              */
0592             if (err != -EAGAIN) {
0593                 mlx5_ib_warn(
0594                     dev,
0595                     "command failed order %d, err %d\n",
0596                     ent->order, err);
0597                 queue_delayed_work(cache->wq, &ent->dwork,
0598                            msecs_to_jiffies(1000));
0599             }
0600         }
0601     } else if (ent->stored > 2 * ent->limit) {
0602         bool need_delay;
0603 
0604         /*
0605          * The remove_cache_mr() logic is performed as garbage
0606          * collection task. Such task is intended to be run when no
0607          * other active processes are running.
0608          *
0609          * The need_resched() will return TRUE if there are user tasks
0610          * to be activated in near future.
0611          *
0612          * In such case, we don't execute remove_cache_mr() and postpone
0613          * the garbage collection work to try to run in next cycle, in
0614          * order to free CPU resources to other tasks.
0615          */
0616         xa_unlock_irq(&ent->mkeys);
0617         need_delay = need_resched() || someone_adding(cache) ||
0618                  !time_after(jiffies,
0619                      READ_ONCE(cache->last_add) + 300 * HZ);
0620         xa_lock_irq(&ent->mkeys);
0621         if (ent->disabled)
0622             goto out;
0623         if (need_delay) {
0624             queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
0625             goto out;
0626         }
0627         remove_cache_mr_locked(ent);
0628         queue_adjust_cache_locked(ent);
0629     }
0630 out:
0631     xa_unlock_irq(&ent->mkeys);
0632 }
0633 
0634 static void delayed_cache_work_func(struct work_struct *work)
0635 {
0636     struct mlx5_cache_ent *ent;
0637 
0638     ent = container_of(work, struct mlx5_cache_ent, dwork.work);
0639     __cache_work_func(ent);
0640 }
0641 
0642 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
0643                        struct mlx5_cache_ent *ent,
0644                        int access_flags)
0645 {
0646     struct mlx5_ib_mr *mr;
0647     int err;
0648 
0649     if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
0650         return ERR_PTR(-EOPNOTSUPP);
0651 
0652     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0653     if (!mr)
0654         return ERR_PTR(-ENOMEM);
0655 
0656     xa_lock_irq(&ent->mkeys);
0657     ent->in_use++;
0658 
0659     if (!ent->stored) {
0660         queue_adjust_cache_locked(ent);
0661         ent->miss++;
0662         xa_unlock_irq(&ent->mkeys);
0663         err = create_cache_mkey(ent, &mr->mmkey.key);
0664         if (err) {
0665             xa_lock_irq(&ent->mkeys);
0666             ent->in_use--;
0667             xa_unlock_irq(&ent->mkeys);
0668             kfree(mr);
0669             return ERR_PTR(err);
0670         }
0671     } else {
0672         mr->mmkey.key = pop_stored_mkey(ent);
0673         queue_adjust_cache_locked(ent);
0674         xa_unlock_irq(&ent->mkeys);
0675     }
0676     mr->mmkey.cache_ent = ent;
0677     mr->mmkey.type = MLX5_MKEY_MR;
0678     init_waitqueue_head(&mr->mmkey.wait);
0679     return mr;
0680 }
0681 
0682 static void clean_keys(struct mlx5_ib_dev *dev, int c)
0683 {
0684     struct mlx5_mkey_cache *cache = &dev->cache;
0685     struct mlx5_cache_ent *ent = &cache->ent[c];
0686     u32 mkey;
0687 
0688     cancel_delayed_work(&ent->dwork);
0689     xa_lock_irq(&ent->mkeys);
0690     while (ent->stored) {
0691         mkey = pop_stored_mkey(ent);
0692         xa_unlock_irq(&ent->mkeys);
0693         mlx5_core_destroy_mkey(dev->mdev, mkey);
0694         xa_lock_irq(&ent->mkeys);
0695     }
0696     xa_unlock_irq(&ent->mkeys);
0697 }
0698 
0699 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
0700 {
0701     if (!mlx5_debugfs_root || dev->is_rep)
0702         return;
0703 
0704     debugfs_remove_recursive(dev->cache.root);
0705     dev->cache.root = NULL;
0706 }
0707 
0708 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
0709 {
0710     struct mlx5_mkey_cache *cache = &dev->cache;
0711     struct mlx5_cache_ent *ent;
0712     struct dentry *dir;
0713     int i;
0714 
0715     if (!mlx5_debugfs_root || dev->is_rep)
0716         return;
0717 
0718     cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
0719 
0720     for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0721         ent = &cache->ent[i];
0722         sprintf(ent->name, "%d", ent->order);
0723         dir = debugfs_create_dir(ent->name, cache->root);
0724         debugfs_create_file("size", 0600, dir, ent, &size_fops);
0725         debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
0726         debugfs_create_ulong("cur", 0400, dir, &ent->stored);
0727         debugfs_create_u32("miss", 0600, dir, &ent->miss);
0728     }
0729 }
0730 
0731 static void delay_time_func(struct timer_list *t)
0732 {
0733     struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
0734 
0735     WRITE_ONCE(dev->fill_delay, 0);
0736 }
0737 
0738 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
0739 {
0740     struct mlx5_mkey_cache *cache = &dev->cache;
0741     struct mlx5_cache_ent *ent;
0742     int i;
0743 
0744     mutex_init(&dev->slow_path_mutex);
0745     cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
0746     if (!cache->wq) {
0747         mlx5_ib_warn(dev, "failed to create work queue\n");
0748         return -ENOMEM;
0749     }
0750 
0751     mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
0752     timer_setup(&dev->delay_timer, delay_time_func, 0);
0753     for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0754         ent = &cache->ent[i];
0755         xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
0756         ent->order = i + 2;
0757         ent->dev = dev;
0758         ent->limit = 0;
0759 
0760         INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
0761 
0762         if (i > MKEY_CACHE_LAST_STD_ENTRY) {
0763             mlx5_odp_init_mkey_cache_entry(ent);
0764             continue;
0765         }
0766 
0767         if (ent->order > mkey_cache_max_order(dev))
0768             continue;
0769 
0770         ent->page = PAGE_SHIFT;
0771         ent->ndescs = 1 << ent->order;
0772         ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
0773         if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
0774             !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
0775             mlx5r_umr_can_load_pas(dev, 0))
0776             ent->limit = dev->mdev->profile.mr_cache[i].limit;
0777         else
0778             ent->limit = 0;
0779         xa_lock_irq(&ent->mkeys);
0780         queue_adjust_cache_locked(ent);
0781         xa_unlock_irq(&ent->mkeys);
0782     }
0783 
0784     mlx5_mkey_cache_debugfs_init(dev);
0785 
0786     return 0;
0787 }
0788 
0789 int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
0790 {
0791     unsigned int i;
0792 
0793     if (!dev->cache.wq)
0794         return 0;
0795 
0796     for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0797         struct mlx5_cache_ent *ent = &dev->cache.ent[i];
0798 
0799         xa_lock_irq(&ent->mkeys);
0800         ent->disabled = true;
0801         xa_unlock_irq(&ent->mkeys);
0802         cancel_delayed_work_sync(&ent->dwork);
0803     }
0804 
0805     mlx5_mkey_cache_debugfs_cleanup(dev);
0806     mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
0807 
0808     for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
0809         clean_keys(dev, i);
0810 
0811     destroy_workqueue(dev->cache.wq);
0812     del_timer_sync(&dev->delay_timer);
0813 
0814     return 0;
0815 }
0816 
0817 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
0818 {
0819     struct mlx5_ib_dev *dev = to_mdev(pd->device);
0820     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0821     struct mlx5_ib_mr *mr;
0822     void *mkc;
0823     u32 *in;
0824     int err;
0825 
0826     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0827     if (!mr)
0828         return ERR_PTR(-ENOMEM);
0829 
0830     in = kzalloc(inlen, GFP_KERNEL);
0831     if (!in) {
0832         err = -ENOMEM;
0833         goto err_free;
0834     }
0835 
0836     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0837 
0838     MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
0839     MLX5_SET(mkc, mkc, length64, 1);
0840     set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
0841                       pd);
0842 
0843     err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
0844     if (err)
0845         goto err_in;
0846 
0847     kfree(in);
0848     mr->mmkey.type = MLX5_MKEY_MR;
0849     mr->ibmr.lkey = mr->mmkey.key;
0850     mr->ibmr.rkey = mr->mmkey.key;
0851     mr->umem = NULL;
0852 
0853     return &mr->ibmr;
0854 
0855 err_in:
0856     kfree(in);
0857 
0858 err_free:
0859     kfree(mr);
0860 
0861     return ERR_PTR(err);
0862 }
0863 
0864 static int get_octo_len(u64 addr, u64 len, int page_shift)
0865 {
0866     u64 page_size = 1ULL << page_shift;
0867     u64 offset;
0868     int npages;
0869 
0870     offset = addr & (page_size - 1);
0871     npages = ALIGN(len + offset, page_size) >> page_shift;
0872     return (npages + 1) / 2;
0873 }
0874 
0875 static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
0876 {
0877     if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
0878         return MKEY_CACHE_LAST_STD_ENTRY + 2;
0879     return MLX5_MAX_UMR_SHIFT;
0880 }
0881 
0882 static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
0883                             unsigned int order)
0884 {
0885     struct mlx5_mkey_cache *cache = &dev->cache;
0886 
0887     if (order < cache->ent[0].order)
0888         return &cache->ent[0];
0889     order = order - cache->ent[0].order;
0890     if (order > MKEY_CACHE_LAST_STD_ENTRY)
0891         return NULL;
0892     return &cache->ent[order];
0893 }
0894 
0895 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
0896               u64 length, int access_flags, u64 iova)
0897 {
0898     mr->ibmr.lkey = mr->mmkey.key;
0899     mr->ibmr.rkey = mr->mmkey.key;
0900     mr->ibmr.length = length;
0901     mr->ibmr.device = &dev->ib_dev;
0902     mr->ibmr.iova = iova;
0903     mr->access_flags = access_flags;
0904 }
0905 
0906 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
0907                           u64 iova)
0908 {
0909     /*
0910      * The alignment of iova has already been checked upon entering
0911      * UVERBS_METHOD_REG_DMABUF_MR
0912      */
0913     umem->iova = iova;
0914     return PAGE_SIZE;
0915 }
0916 
0917 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
0918                          struct ib_umem *umem, u64 iova,
0919                          int access_flags)
0920 {
0921     struct mlx5_ib_dev *dev = to_mdev(pd->device);
0922     struct mlx5_cache_ent *ent;
0923     struct mlx5_ib_mr *mr;
0924     unsigned int page_size;
0925 
0926     if (umem->is_dmabuf)
0927         page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
0928     else
0929         page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
0930                              0, iova);
0931     if (WARN_ON(!page_size))
0932         return ERR_PTR(-EINVAL);
0933     ent = mkey_cache_ent_from_order(
0934         dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
0935     /*
0936      * Matches access in alloc_cache_mr(). If the MR can't come from the
0937      * cache then synchronously create an uncached one.
0938      */
0939     if (!ent || ent->limit == 0 ||
0940         !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
0941         mutex_lock(&dev->slow_path_mutex);
0942         mr = reg_create(pd, umem, iova, access_flags, page_size, false);
0943         mutex_unlock(&dev->slow_path_mutex);
0944         return mr;
0945     }
0946 
0947     mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
0948     if (IS_ERR(mr))
0949         return mr;
0950 
0951     mr->ibmr.pd = pd;
0952     mr->umem = umem;
0953     mr->page_shift = order_base_2(page_size);
0954     set_mr_fields(dev, mr, umem->length, access_flags, iova);
0955 
0956     return mr;
0957 }
0958 
0959 /*
0960  * If ibmr is NULL it will be allocated by reg_create.
0961  * Else, the given ibmr will be used.
0962  */
0963 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
0964                      u64 iova, int access_flags,
0965                      unsigned int page_size, bool populate)
0966 {
0967     struct mlx5_ib_dev *dev = to_mdev(pd->device);
0968     struct mlx5_ib_mr *mr;
0969     __be64 *pas;
0970     void *mkc;
0971     int inlen;
0972     u32 *in;
0973     int err;
0974     bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
0975 
0976     if (!page_size)
0977         return ERR_PTR(-EINVAL);
0978     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0979     if (!mr)
0980         return ERR_PTR(-ENOMEM);
0981 
0982     mr->ibmr.pd = pd;
0983     mr->access_flags = access_flags;
0984     mr->page_shift = order_base_2(page_size);
0985 
0986     inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0987     if (populate)
0988         inlen += sizeof(*pas) *
0989              roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
0990     in = kvzalloc(inlen, GFP_KERNEL);
0991     if (!in) {
0992         err = -ENOMEM;
0993         goto err_1;
0994     }
0995     pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
0996     if (populate) {
0997         if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
0998             err = -EINVAL;
0999             goto err_2;
1000         }
1001         mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
1002                      pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1003     }
1004 
1005     /* The pg_access bit allows setting the access flags
1006      * in the page list submitted with the command. */
1007     MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1008 
1009     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1010     set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
1011                       populate ? pd : dev->umrc.pd);
1012     MLX5_SET(mkc, mkc, free, !populate);
1013     MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1014     MLX5_SET(mkc, mkc, umr_en, 1);
1015 
1016     MLX5_SET64(mkc, mkc, len, umem->length);
1017     MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1018     MLX5_SET(mkc, mkc, translations_octword_size,
1019          get_octo_len(iova, umem->length, mr->page_shift));
1020     MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
1021     if (populate) {
1022         MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1023              get_octo_len(iova, umem->length, mr->page_shift));
1024     }
1025 
1026     err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1027     if (err) {
1028         mlx5_ib_warn(dev, "create mkey failed\n");
1029         goto err_2;
1030     }
1031     mr->mmkey.type = MLX5_MKEY_MR;
1032     mr->umem = umem;
1033     set_mr_fields(dev, mr, umem->length, access_flags, iova);
1034     kvfree(in);
1035 
1036     mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1037 
1038     return mr;
1039 
1040 err_2:
1041     kvfree(in);
1042 err_1:
1043     kfree(mr);
1044     return ERR_PTR(err);
1045 }
1046 
1047 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1048                        u64 length, int acc, int mode)
1049 {
1050     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1051     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1052     struct mlx5_ib_mr *mr;
1053     void *mkc;
1054     u32 *in;
1055     int err;
1056 
1057     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1058     if (!mr)
1059         return ERR_PTR(-ENOMEM);
1060 
1061     in = kzalloc(inlen, GFP_KERNEL);
1062     if (!in) {
1063         err = -ENOMEM;
1064         goto err_free;
1065     }
1066 
1067     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1068 
1069     MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1070     MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1071     MLX5_SET64(mkc, mkc, len, length);
1072     set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
1073 
1074     err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1075     if (err)
1076         goto err_in;
1077 
1078     kfree(in);
1079 
1080     set_mr_fields(dev, mr, length, acc, start_addr);
1081 
1082     return &mr->ibmr;
1083 
1084 err_in:
1085     kfree(in);
1086 
1087 err_free:
1088     kfree(mr);
1089 
1090     return ERR_PTR(err);
1091 }
1092 
1093 int mlx5_ib_advise_mr(struct ib_pd *pd,
1094               enum ib_uverbs_advise_mr_advice advice,
1095               u32 flags,
1096               struct ib_sge *sg_list,
1097               u32 num_sge,
1098               struct uverbs_attr_bundle *attrs)
1099 {
1100     if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1101         advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1102         advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
1103         return -EOPNOTSUPP;
1104 
1105     return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1106                      sg_list, num_sge);
1107 }
1108 
1109 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1110                 struct ib_dm_mr_attr *attr,
1111                 struct uverbs_attr_bundle *attrs)
1112 {
1113     struct mlx5_ib_dm *mdm = to_mdm(dm);
1114     struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1115     u64 start_addr = mdm->dev_addr + attr->offset;
1116     int mode;
1117 
1118     switch (mdm->type) {
1119     case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1120         if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1121             return ERR_PTR(-EINVAL);
1122 
1123         mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1124         start_addr -= pci_resource_start(dev->pdev, 0);
1125         break;
1126     case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1127     case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1128     case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
1129         if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1130             return ERR_PTR(-EINVAL);
1131 
1132         mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1133         break;
1134     default:
1135         return ERR_PTR(-EINVAL);
1136     }
1137 
1138     return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1139                  attr->access_flags, mode);
1140 }
1141 
1142 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
1143                     u64 iova, int access_flags)
1144 {
1145     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1146     struct mlx5_ib_mr *mr = NULL;
1147     bool xlt_with_umr;
1148     int err;
1149 
1150     xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
1151     if (xlt_with_umr) {
1152         mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
1153     } else {
1154         unsigned int page_size = mlx5_umem_find_best_pgsz(
1155             umem, mkc, log_page_size, 0, iova);
1156 
1157         mutex_lock(&dev->slow_path_mutex);
1158         mr = reg_create(pd, umem, iova, access_flags, page_size, true);
1159         mutex_unlock(&dev->slow_path_mutex);
1160     }
1161     if (IS_ERR(mr)) {
1162         ib_umem_release(umem);
1163         return ERR_CAST(mr);
1164     }
1165 
1166     mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1167 
1168     atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1169 
1170     if (xlt_with_umr) {
1171         /*
1172          * If the MR was created with reg_create then it will be
1173          * configured properly but left disabled. It is safe to go ahead
1174          * and configure it again via UMR while enabling it.
1175          */
1176         err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
1177         if (err) {
1178             mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1179             return ERR_PTR(err);
1180         }
1181     }
1182     return &mr->ibmr;
1183 }
1184 
1185 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
1186                     u64 iova, int access_flags,
1187                     struct ib_udata *udata)
1188 {
1189     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1190     struct ib_umem_odp *odp;
1191     struct mlx5_ib_mr *mr;
1192     int err;
1193 
1194     if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1195         return ERR_PTR(-EOPNOTSUPP);
1196 
1197     err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
1198     if (err)
1199         return ERR_PTR(err);
1200     if (!start && length == U64_MAX) {
1201         if (iova != 0)
1202             return ERR_PTR(-EINVAL);
1203         if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1204             return ERR_PTR(-EINVAL);
1205 
1206         mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1207         if (IS_ERR(mr))
1208             return ERR_CAST(mr);
1209         return &mr->ibmr;
1210     }
1211 
1212     /* ODP requires xlt update via umr to work. */
1213     if (!mlx5r_umr_can_load_pas(dev, length))
1214         return ERR_PTR(-EINVAL);
1215 
1216     odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
1217                   &mlx5_mn_ops);
1218     if (IS_ERR(odp))
1219         return ERR_CAST(odp);
1220 
1221     mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
1222     if (IS_ERR(mr)) {
1223         ib_umem_release(&odp->umem);
1224         return ERR_CAST(mr);
1225     }
1226     xa_init(&mr->implicit_children);
1227 
1228     odp->private = mr;
1229     err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
1230     if (err)
1231         goto err_dereg_mr;
1232 
1233     err = mlx5_ib_init_odp_mr(mr);
1234     if (err)
1235         goto err_dereg_mr;
1236     return &mr->ibmr;
1237 
1238 err_dereg_mr:
1239     mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1240     return ERR_PTR(err);
1241 }
1242 
1243 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1244                   u64 iova, int access_flags,
1245                   struct ib_udata *udata)
1246 {
1247     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1248     struct ib_umem *umem;
1249 
1250     if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1251         return ERR_PTR(-EOPNOTSUPP);
1252 
1253     mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1254             start, iova, length, access_flags);
1255 
1256     if (access_flags & IB_ACCESS_ON_DEMAND)
1257         return create_user_odp_mr(pd, start, length, iova, access_flags,
1258                       udata);
1259     umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
1260     if (IS_ERR(umem))
1261         return ERR_CAST(umem);
1262     return create_real_mr(pd, umem, iova, access_flags);
1263 }
1264 
1265 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
1266 {
1267     struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
1268     struct mlx5_ib_mr *mr = umem_dmabuf->private;
1269 
1270     dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
1271 
1272     if (!umem_dmabuf->sgt)
1273         return;
1274 
1275     mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
1276     ib_umem_dmabuf_unmap_pages(umem_dmabuf);
1277 }
1278 
1279 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
1280     .allow_peer2peer = 1,
1281     .move_notify = mlx5_ib_dmabuf_invalidate_cb,
1282 };
1283 
1284 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
1285                      u64 length, u64 virt_addr,
1286                      int fd, int access_flags,
1287                      struct ib_udata *udata)
1288 {
1289     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1290     struct mlx5_ib_mr *mr = NULL;
1291     struct ib_umem_dmabuf *umem_dmabuf;
1292     int err;
1293 
1294     if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
1295         !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1296         return ERR_PTR(-EOPNOTSUPP);
1297 
1298     mlx5_ib_dbg(dev,
1299             "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
1300             offset, virt_addr, length, fd, access_flags);
1301 
1302     /* dmabuf requires xlt update via umr to work. */
1303     if (!mlx5r_umr_can_load_pas(dev, length))
1304         return ERR_PTR(-EINVAL);
1305 
1306     umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
1307                      access_flags,
1308                      &mlx5_ib_dmabuf_attach_ops);
1309     if (IS_ERR(umem_dmabuf)) {
1310         mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
1311                 PTR_ERR(umem_dmabuf));
1312         return ERR_CAST(umem_dmabuf);
1313     }
1314 
1315     mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
1316                 access_flags);
1317     if (IS_ERR(mr)) {
1318         ib_umem_release(&umem_dmabuf->umem);
1319         return ERR_CAST(mr);
1320     }
1321 
1322     mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1323 
1324     atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
1325     umem_dmabuf->private = mr;
1326     err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
1327     if (err)
1328         goto err_dereg_mr;
1329 
1330     err = mlx5_ib_init_dmabuf_mr(mr);
1331     if (err)
1332         goto err_dereg_mr;
1333     return &mr->ibmr;
1334 
1335 err_dereg_mr:
1336     mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1337     return ERR_PTR(err);
1338 }
1339 
1340 /*
1341  * True if the change in access flags can be done via UMR, only some access
1342  * flags can be updated.
1343  */
1344 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
1345                      unsigned int current_access_flags,
1346                      unsigned int target_access_flags)
1347 {
1348     unsigned int diffs = current_access_flags ^ target_access_flags;
1349 
1350     if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
1351               IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
1352         return false;
1353     return mlx5r_umr_can_reconfig(dev, current_access_flags,
1354                       target_access_flags);
1355 }
1356 
1357 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
1358                   struct ib_umem *new_umem,
1359                   int new_access_flags, u64 iova,
1360                   unsigned long *page_size)
1361 {
1362     struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1363 
1364     /* We only track the allocated sizes of MRs from the cache */
1365     if (!mr->mmkey.cache_ent)
1366         return false;
1367     if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
1368         return false;
1369 
1370     *page_size =
1371         mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
1372     if (WARN_ON(!*page_size))
1373         return false;
1374     return (1ULL << mr->mmkey.cache_ent->order) >=
1375            ib_umem_num_dma_blocks(new_umem, *page_size);
1376 }
1377 
1378 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1379              int access_flags, int flags, struct ib_umem *new_umem,
1380              u64 iova, unsigned long page_size)
1381 {
1382     struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1383     int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
1384     struct ib_umem *old_umem = mr->umem;
1385     int err;
1386 
1387     /*
1388      * To keep everything simple the MR is revoked before we start to mess
1389      * with it. This ensure the change is atomic relative to any use of the
1390      * MR.
1391      */
1392     err = mlx5r_umr_revoke_mr(mr);
1393     if (err)
1394         return err;
1395 
1396     if (flags & IB_MR_REREG_PD) {
1397         mr->ibmr.pd = pd;
1398         upd_flags |= MLX5_IB_UPD_XLT_PD;
1399     }
1400     if (flags & IB_MR_REREG_ACCESS) {
1401         mr->access_flags = access_flags;
1402         upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1403     }
1404 
1405     mr->ibmr.length = new_umem->length;
1406     mr->ibmr.iova = iova;
1407     mr->ibmr.length = new_umem->length;
1408     mr->page_shift = order_base_2(page_size);
1409     mr->umem = new_umem;
1410     err = mlx5r_umr_update_mr_pas(mr, upd_flags);
1411     if (err) {
1412         /*
1413          * The MR is revoked at this point so there is no issue to free
1414          * new_umem.
1415          */
1416         mr->umem = old_umem;
1417         return err;
1418     }
1419 
1420     atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
1421     ib_umem_release(old_umem);
1422     atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
1423     return 0;
1424 }
1425 
1426 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1427                     u64 length, u64 iova, int new_access_flags,
1428                     struct ib_pd *new_pd,
1429                     struct ib_udata *udata)
1430 {
1431     struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1432     struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1433     int err;
1434 
1435     if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1436         return ERR_PTR(-EOPNOTSUPP);
1437 
1438     mlx5_ib_dbg(
1439         dev,
1440         "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1441         start, iova, length, new_access_flags);
1442 
1443     if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
1444         return ERR_PTR(-EOPNOTSUPP);
1445 
1446     if (!(flags & IB_MR_REREG_ACCESS))
1447         new_access_flags = mr->access_flags;
1448     if (!(flags & IB_MR_REREG_PD))
1449         new_pd = ib_mr->pd;
1450 
1451     if (!(flags & IB_MR_REREG_TRANS)) {
1452         struct ib_umem *umem;
1453 
1454         /* Fast path for PD/access change */
1455         if (can_use_umr_rereg_access(dev, mr->access_flags,
1456                          new_access_flags)) {
1457             err = mlx5r_umr_rereg_pd_access(mr, new_pd,
1458                             new_access_flags);
1459             if (err)
1460                 return ERR_PTR(err);
1461             return NULL;
1462         }
1463         /* DM or ODP MR's don't have a normal umem so we can't re-use it */
1464         if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
1465             goto recreate;
1466 
1467         /*
1468          * Only one active MR can refer to a umem at one time, revoke
1469          * the old MR before assigning the umem to the new one.
1470          */
1471         err = mlx5r_umr_revoke_mr(mr);
1472         if (err)
1473             return ERR_PTR(err);
1474         umem = mr->umem;
1475         mr->umem = NULL;
1476         atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1477 
1478         return create_real_mr(new_pd, umem, mr->ibmr.iova,
1479                       new_access_flags);
1480     }
1481 
1482     /*
1483      * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
1484      * but the logic around releasing the umem is different
1485      */
1486     if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
1487         goto recreate;
1488 
1489     if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
1490         can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
1491         struct ib_umem *new_umem;
1492         unsigned long page_size;
1493 
1494         new_umem = ib_umem_get(&dev->ib_dev, start, length,
1495                        new_access_flags);
1496         if (IS_ERR(new_umem))
1497             return ERR_CAST(new_umem);
1498 
1499         /* Fast path for PAS change */
1500         if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
1501                       &page_size)) {
1502             err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
1503                         new_umem, iova, page_size);
1504             if (err) {
1505                 ib_umem_release(new_umem);
1506                 return ERR_PTR(err);
1507             }
1508             return NULL;
1509         }
1510         return create_real_mr(new_pd, new_umem, iova, new_access_flags);
1511     }
1512 
1513     /*
1514      * Everything else has no state we can preserve, just create a new MR
1515      * from scratch
1516      */
1517 recreate:
1518     return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
1519                    new_access_flags, udata);
1520 }
1521 
1522 static int
1523 mlx5_alloc_priv_descs(struct ib_device *device,
1524               struct mlx5_ib_mr *mr,
1525               int ndescs,
1526               int desc_size)
1527 {
1528     struct mlx5_ib_dev *dev = to_mdev(device);
1529     struct device *ddev = &dev->mdev->pdev->dev;
1530     int size = ndescs * desc_size;
1531     int add_size;
1532     int ret;
1533 
1534     add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1535 
1536     mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1537     if (!mr->descs_alloc)
1538         return -ENOMEM;
1539 
1540     mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1541 
1542     mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
1543     if (dma_mapping_error(ddev, mr->desc_map)) {
1544         ret = -ENOMEM;
1545         goto err;
1546     }
1547 
1548     return 0;
1549 err:
1550     kfree(mr->descs_alloc);
1551 
1552     return ret;
1553 }
1554 
1555 static void
1556 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1557 {
1558     if (!mr->umem && mr->descs) {
1559         struct ib_device *device = mr->ibmr.device;
1560         int size = mr->max_descs * mr->desc_size;
1561         struct mlx5_ib_dev *dev = to_mdev(device);
1562 
1563         dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
1564                  DMA_TO_DEVICE);
1565         kfree(mr->descs_alloc);
1566         mr->descs = NULL;
1567     }
1568 }
1569 
1570 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1571 {
1572     struct mlx5_ib_mr *mr = to_mmr(ibmr);
1573     struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1574     int rc;
1575 
1576     /*
1577      * Any async use of the mr must hold the refcount, once the refcount
1578      * goes to zero no other thread, such as ODP page faults, prefetch, any
1579      * UMR activity, etc can touch the mkey. Thus it is safe to destroy it.
1580      */
1581     if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
1582         refcount_read(&mr->mmkey.usecount) != 0 &&
1583         xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
1584         mlx5r_deref_wait_odp_mkey(&mr->mmkey);
1585 
1586     if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1587         xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1588                mr->sig, NULL, GFP_KERNEL);
1589 
1590         if (mr->mtt_mr) {
1591             rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
1592             if (rc)
1593                 return rc;
1594             mr->mtt_mr = NULL;
1595         }
1596         if (mr->klm_mr) {
1597             rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
1598             if (rc)
1599                 return rc;
1600             mr->klm_mr = NULL;
1601         }
1602 
1603         if (mlx5_core_destroy_psv(dev->mdev,
1604                       mr->sig->psv_memory.psv_idx))
1605             mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1606                      mr->sig->psv_memory.psv_idx);
1607         if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1608             mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1609                      mr->sig->psv_wire.psv_idx);
1610         kfree(mr->sig);
1611         mr->sig = NULL;
1612     }
1613 
1614     /* Stop DMA */
1615     if (mr->mmkey.cache_ent) {
1616         xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
1617         mr->mmkey.cache_ent->in_use--;
1618         xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
1619 
1620         if (mlx5r_umr_revoke_mr(mr) ||
1621             push_mkey(mr->mmkey.cache_ent, false,
1622                   xa_mk_value(mr->mmkey.key)))
1623             mr->mmkey.cache_ent = NULL;
1624     }
1625     if (!mr->mmkey.cache_ent) {
1626         rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
1627         if (rc)
1628             return rc;
1629     }
1630 
1631     if (mr->umem) {
1632         bool is_odp = is_odp_mr(mr);
1633 
1634         if (!is_odp)
1635             atomic_sub(ib_umem_num_pages(mr->umem),
1636                    &dev->mdev->priv.reg_pages);
1637         ib_umem_release(mr->umem);
1638         if (is_odp)
1639             mlx5_ib_free_odp_mr(mr);
1640     }
1641 
1642     if (!mr->mmkey.cache_ent)
1643         mlx5_free_priv_descs(mr);
1644 
1645     kfree(mr);
1646     return 0;
1647 }
1648 
1649 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1650                    int access_mode, int page_shift)
1651 {
1652     void *mkc;
1653 
1654     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1655 
1656     /* This is only used from the kernel, so setting the PD is OK. */
1657     set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
1658     MLX5_SET(mkc, mkc, free, 1);
1659     MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1660     MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1661     MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1662     MLX5_SET(mkc, mkc, umr_en, 1);
1663     MLX5_SET(mkc, mkc, log_page_size, page_shift);
1664 }
1665 
1666 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1667                   int ndescs, int desc_size, int page_shift,
1668                   int access_mode, u32 *in, int inlen)
1669 {
1670     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1671     int err;
1672 
1673     mr->access_mode = access_mode;
1674     mr->desc_size = desc_size;
1675     mr->max_descs = ndescs;
1676 
1677     err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1678     if (err)
1679         return err;
1680 
1681     mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1682 
1683     err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1684     if (err)
1685         goto err_free_descs;
1686 
1687     mr->mmkey.type = MLX5_MKEY_MR;
1688     mr->ibmr.lkey = mr->mmkey.key;
1689     mr->ibmr.rkey = mr->mmkey.key;
1690 
1691     return 0;
1692 
1693 err_free_descs:
1694     mlx5_free_priv_descs(mr);
1695     return err;
1696 }
1697 
1698 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1699                 u32 max_num_sg, u32 max_num_meta_sg,
1700                 int desc_size, int access_mode)
1701 {
1702     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1703     int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1704     int page_shift = 0;
1705     struct mlx5_ib_mr *mr;
1706     u32 *in;
1707     int err;
1708 
1709     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1710     if (!mr)
1711         return ERR_PTR(-ENOMEM);
1712 
1713     mr->ibmr.pd = pd;
1714     mr->ibmr.device = pd->device;
1715 
1716     in = kzalloc(inlen, GFP_KERNEL);
1717     if (!in) {
1718         err = -ENOMEM;
1719         goto err_free;
1720     }
1721 
1722     if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1723         page_shift = PAGE_SHIFT;
1724 
1725     err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1726                      access_mode, in, inlen);
1727     if (err)
1728         goto err_free_in;
1729 
1730     mr->umem = NULL;
1731     kfree(in);
1732 
1733     return mr;
1734 
1735 err_free_in:
1736     kfree(in);
1737 err_free:
1738     kfree(mr);
1739     return ERR_PTR(err);
1740 }
1741 
1742 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1743                     int ndescs, u32 *in, int inlen)
1744 {
1745     return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1746                       PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1747                       inlen);
1748 }
1749 
1750 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1751                     int ndescs, u32 *in, int inlen)
1752 {
1753     return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1754                       0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1755 }
1756 
1757 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1758                       int max_num_sg, int max_num_meta_sg,
1759                       u32 *in, int inlen)
1760 {
1761     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1762     u32 psv_index[2];
1763     void *mkc;
1764     int err;
1765 
1766     mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1767     if (!mr->sig)
1768         return -ENOMEM;
1769 
1770     /* create mem & wire PSVs */
1771     err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1772     if (err)
1773         goto err_free_sig;
1774 
1775     mr->sig->psv_memory.psv_idx = psv_index[0];
1776     mr->sig->psv_wire.psv_idx = psv_index[1];
1777 
1778     mr->sig->sig_status_checked = true;
1779     mr->sig->sig_err_exists = false;
1780     /* Next UMR, Arm SIGERR */
1781     ++mr->sig->sigerr_count;
1782     mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1783                      sizeof(struct mlx5_klm),
1784                      MLX5_MKC_ACCESS_MODE_KLMS);
1785     if (IS_ERR(mr->klm_mr)) {
1786         err = PTR_ERR(mr->klm_mr);
1787         goto err_destroy_psv;
1788     }
1789     mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1790                      sizeof(struct mlx5_mtt),
1791                      MLX5_MKC_ACCESS_MODE_MTT);
1792     if (IS_ERR(mr->mtt_mr)) {
1793         err = PTR_ERR(mr->mtt_mr);
1794         goto err_free_klm_mr;
1795     }
1796 
1797     /* Set bsf descriptors for mkey */
1798     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1799     MLX5_SET(mkc, mkc, bsf_en, 1);
1800     MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1801 
1802     err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1803                      MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1804     if (err)
1805         goto err_free_mtt_mr;
1806 
1807     err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1808                   mr->sig, GFP_KERNEL));
1809     if (err)
1810         goto err_free_descs;
1811     return 0;
1812 
1813 err_free_descs:
1814     destroy_mkey(dev, mr);
1815     mlx5_free_priv_descs(mr);
1816 err_free_mtt_mr:
1817     mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
1818     mr->mtt_mr = NULL;
1819 err_free_klm_mr:
1820     mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
1821     mr->klm_mr = NULL;
1822 err_destroy_psv:
1823     if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
1824         mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1825                  mr->sig->psv_memory.psv_idx);
1826     if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1827         mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1828                  mr->sig->psv_wire.psv_idx);
1829 err_free_sig:
1830     kfree(mr->sig);
1831 
1832     return err;
1833 }
1834 
1835 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
1836                     enum ib_mr_type mr_type, u32 max_num_sg,
1837                     u32 max_num_meta_sg)
1838 {
1839     struct mlx5_ib_dev *dev = to_mdev(pd->device);
1840     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1841     int ndescs = ALIGN(max_num_sg, 4);
1842     struct mlx5_ib_mr *mr;
1843     u32 *in;
1844     int err;
1845 
1846     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1847     if (!mr)
1848         return ERR_PTR(-ENOMEM);
1849 
1850     in = kzalloc(inlen, GFP_KERNEL);
1851     if (!in) {
1852         err = -ENOMEM;
1853         goto err_free;
1854     }
1855 
1856     mr->ibmr.device = pd->device;
1857     mr->umem = NULL;
1858 
1859     switch (mr_type) {
1860     case IB_MR_TYPE_MEM_REG:
1861         err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
1862         break;
1863     case IB_MR_TYPE_SG_GAPS:
1864         err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
1865         break;
1866     case IB_MR_TYPE_INTEGRITY:
1867         err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
1868                          max_num_meta_sg, in, inlen);
1869         break;
1870     default:
1871         mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1872         err = -EINVAL;
1873     }
1874 
1875     if (err)
1876         goto err_free_in;
1877 
1878     kfree(in);
1879 
1880     return &mr->ibmr;
1881 
1882 err_free_in:
1883     kfree(in);
1884 err_free:
1885     kfree(mr);
1886     return ERR_PTR(err);
1887 }
1888 
1889 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1890                    u32 max_num_sg)
1891 {
1892     return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
1893 }
1894 
1895 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
1896                      u32 max_num_sg, u32 max_num_meta_sg)
1897 {
1898     return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
1899                   max_num_meta_sg);
1900 }
1901 
1902 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
1903 {
1904     struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
1905     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1906     struct mlx5_ib_mw *mw = to_mmw(ibmw);
1907     unsigned int ndescs;
1908     u32 *in = NULL;
1909     void *mkc;
1910     int err;
1911     struct mlx5_ib_alloc_mw req = {};
1912     struct {
1913         __u32   comp_mask;
1914         __u32   response_length;
1915     } resp = {};
1916 
1917     err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1918     if (err)
1919         return err;
1920 
1921     if (req.comp_mask || req.reserved1 || req.reserved2)
1922         return -EOPNOTSUPP;
1923 
1924     if (udata->inlen > sizeof(req) &&
1925         !ib_is_udata_cleared(udata, sizeof(req),
1926                  udata->inlen - sizeof(req)))
1927         return -EOPNOTSUPP;
1928 
1929     ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1930 
1931     in = kzalloc(inlen, GFP_KERNEL);
1932     if (!in) {
1933         err = -ENOMEM;
1934         goto free;
1935     }
1936 
1937     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1938 
1939     MLX5_SET(mkc, mkc, free, 1);
1940     MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1941     MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
1942     MLX5_SET(mkc, mkc, umr_en, 1);
1943     MLX5_SET(mkc, mkc, lr, 1);
1944     MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
1945     MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
1946     MLX5_SET(mkc, mkc, qpn, 0xffffff);
1947 
1948     err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
1949     if (err)
1950         goto free;
1951 
1952     mw->mmkey.type = MLX5_MKEY_MW;
1953     ibmw->rkey = mw->mmkey.key;
1954     mw->mmkey.ndescs = ndescs;
1955 
1956     resp.response_length =
1957         min(offsetofend(typeof(resp), response_length), udata->outlen);
1958     if (resp.response_length) {
1959         err = ib_copy_to_udata(udata, &resp, resp.response_length);
1960         if (err)
1961             goto free_mkey;
1962     }
1963 
1964     if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
1965         err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
1966         if (err)
1967             goto free_mkey;
1968     }
1969 
1970     kfree(in);
1971     return 0;
1972 
1973 free_mkey:
1974     mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
1975 free:
1976     kfree(in);
1977     return err;
1978 }
1979 
1980 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1981 {
1982     struct mlx5_ib_dev *dev = to_mdev(mw->device);
1983     struct mlx5_ib_mw *mmw = to_mmw(mw);
1984 
1985     if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
1986         xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
1987         /*
1988          * pagefault_single_data_segment() may be accessing mmw
1989          * if the user bound an ODP MR to this MW.
1990          */
1991         mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
1992 
1993     return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
1994 }
1995 
1996 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1997                 struct ib_mr_status *mr_status)
1998 {
1999     struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2000     int ret = 0;
2001 
2002     if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2003         pr_err("Invalid status check mask\n");
2004         ret = -EINVAL;
2005         goto done;
2006     }
2007 
2008     mr_status->fail_status = 0;
2009     if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2010         if (!mmr->sig) {
2011             ret = -EINVAL;
2012             pr_err("signature status check requested on a non-signature enabled MR\n");
2013             goto done;
2014         }
2015 
2016         mmr->sig->sig_status_checked = true;
2017         if (!mmr->sig->sig_err_exists)
2018             goto done;
2019 
2020         if (ibmr->lkey == mmr->sig->err_item.key)
2021             memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2022                    sizeof(mr_status->sig_err));
2023         else {
2024             mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2025             mr_status->sig_err.sig_err_offset = 0;
2026             mr_status->sig_err.key = mmr->sig->err_item.key;
2027         }
2028 
2029         mmr->sig->sig_err_exists = false;
2030         mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2031     }
2032 
2033 done:
2034     return ret;
2035 }
2036 
2037 static int
2038 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2039             int data_sg_nents, unsigned int *data_sg_offset,
2040             struct scatterlist *meta_sg, int meta_sg_nents,
2041             unsigned int *meta_sg_offset)
2042 {
2043     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2044     unsigned int sg_offset = 0;
2045     int n = 0;
2046 
2047     mr->meta_length = 0;
2048     if (data_sg_nents == 1) {
2049         n++;
2050         mr->mmkey.ndescs = 1;
2051         if (data_sg_offset)
2052             sg_offset = *data_sg_offset;
2053         mr->data_length = sg_dma_len(data_sg) - sg_offset;
2054         mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2055         if (meta_sg_nents == 1) {
2056             n++;
2057             mr->meta_ndescs = 1;
2058             if (meta_sg_offset)
2059                 sg_offset = *meta_sg_offset;
2060             else
2061                 sg_offset = 0;
2062             mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2063             mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2064         }
2065         ibmr->length = mr->data_length + mr->meta_length;
2066     }
2067 
2068     return n;
2069 }
2070 
2071 static int
2072 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2073            struct scatterlist *sgl,
2074            unsigned short sg_nents,
2075            unsigned int *sg_offset_p,
2076            struct scatterlist *meta_sgl,
2077            unsigned short meta_sg_nents,
2078            unsigned int *meta_sg_offset_p)
2079 {
2080     struct scatterlist *sg = sgl;
2081     struct mlx5_klm *klms = mr->descs;
2082     unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2083     u32 lkey = mr->ibmr.pd->local_dma_lkey;
2084     int i, j = 0;
2085 
2086     mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2087     mr->ibmr.length = 0;
2088 
2089     for_each_sg(sgl, sg, sg_nents, i) {
2090         if (unlikely(i >= mr->max_descs))
2091             break;
2092         klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2093         klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2094         klms[i].key = cpu_to_be32(lkey);
2095         mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2096 
2097         sg_offset = 0;
2098     }
2099 
2100     if (sg_offset_p)
2101         *sg_offset_p = sg_offset;
2102 
2103     mr->mmkey.ndescs = i;
2104     mr->data_length = mr->ibmr.length;
2105 
2106     if (meta_sg_nents) {
2107         sg = meta_sgl;
2108         sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2109         for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2110             if (unlikely(i + j >= mr->max_descs))
2111                 break;
2112             klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2113                              sg_offset);
2114             klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2115                              sg_offset);
2116             klms[i + j].key = cpu_to_be32(lkey);
2117             mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2118 
2119             sg_offset = 0;
2120         }
2121         if (meta_sg_offset_p)
2122             *meta_sg_offset_p = sg_offset;
2123 
2124         mr->meta_ndescs = j;
2125         mr->meta_length = mr->ibmr.length - mr->data_length;
2126     }
2127 
2128     return i + j;
2129 }
2130 
2131 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2132 {
2133     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2134     __be64 *descs;
2135 
2136     if (unlikely(mr->mmkey.ndescs == mr->max_descs))
2137         return -ENOMEM;
2138 
2139     descs = mr->descs;
2140     descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2141 
2142     return 0;
2143 }
2144 
2145 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2146 {
2147     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2148     __be64 *descs;
2149 
2150     if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
2151         return -ENOMEM;
2152 
2153     descs = mr->descs;
2154     descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
2155         cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2156 
2157     return 0;
2158 }
2159 
2160 static int
2161 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2162              int data_sg_nents, unsigned int *data_sg_offset,
2163              struct scatterlist *meta_sg, int meta_sg_nents,
2164              unsigned int *meta_sg_offset)
2165 {
2166     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2167     struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2168     int n;
2169 
2170     pi_mr->mmkey.ndescs = 0;
2171     pi_mr->meta_ndescs = 0;
2172     pi_mr->meta_length = 0;
2173 
2174     ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2175                    pi_mr->desc_size * pi_mr->max_descs,
2176                    DMA_TO_DEVICE);
2177 
2178     pi_mr->ibmr.page_size = ibmr->page_size;
2179     n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2180                mlx5_set_page);
2181     if (n != data_sg_nents)
2182         return n;
2183 
2184     pi_mr->data_iova = pi_mr->ibmr.iova;
2185     pi_mr->data_length = pi_mr->ibmr.length;
2186     pi_mr->ibmr.length = pi_mr->data_length;
2187     ibmr->length = pi_mr->data_length;
2188 
2189     if (meta_sg_nents) {
2190         u64 page_mask = ~((u64)ibmr->page_size - 1);
2191         u64 iova = pi_mr->data_iova;
2192 
2193         n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2194                     meta_sg_offset, mlx5_set_page_pi);
2195 
2196         pi_mr->meta_length = pi_mr->ibmr.length;
2197         /*
2198          * PI address for the HW is the offset of the metadata address
2199          * relative to the first data page address.
2200          * It equals to first data page address + size of data pages +
2201          * metadata offset at the first metadata page
2202          */
2203         pi_mr->pi_iova = (iova & page_mask) +
2204                  pi_mr->mmkey.ndescs * ibmr->page_size +
2205                  (pi_mr->ibmr.iova & ~page_mask);
2206         /*
2207          * In order to use one MTT MR for data and metadata, we register
2208          * also the gaps between the end of the data and the start of
2209          * the metadata (the sig MR will verify that the HW will access
2210          * to right addresses). This mapping is safe because we use
2211          * internal mkey for the registration.
2212          */
2213         pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2214         pi_mr->ibmr.iova = iova;
2215         ibmr->length += pi_mr->meta_length;
2216     }
2217 
2218     ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2219                       pi_mr->desc_size * pi_mr->max_descs,
2220                       DMA_TO_DEVICE);
2221 
2222     return n;
2223 }
2224 
2225 static int
2226 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2227              int data_sg_nents, unsigned int *data_sg_offset,
2228              struct scatterlist *meta_sg, int meta_sg_nents,
2229              unsigned int *meta_sg_offset)
2230 {
2231     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2232     struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2233     int n;
2234 
2235     pi_mr->mmkey.ndescs = 0;
2236     pi_mr->meta_ndescs = 0;
2237     pi_mr->meta_length = 0;
2238 
2239     ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2240                    pi_mr->desc_size * pi_mr->max_descs,
2241                    DMA_TO_DEVICE);
2242 
2243     n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2244                    meta_sg, meta_sg_nents, meta_sg_offset);
2245 
2246     ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2247                       pi_mr->desc_size * pi_mr->max_descs,
2248                       DMA_TO_DEVICE);
2249 
2250     /* This is zero-based memory region */
2251     pi_mr->data_iova = 0;
2252     pi_mr->ibmr.iova = 0;
2253     pi_mr->pi_iova = pi_mr->data_length;
2254     ibmr->length = pi_mr->ibmr.length;
2255 
2256     return n;
2257 }
2258 
2259 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2260              int data_sg_nents, unsigned int *data_sg_offset,
2261              struct scatterlist *meta_sg, int meta_sg_nents,
2262              unsigned int *meta_sg_offset)
2263 {
2264     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2265     struct mlx5_ib_mr *pi_mr = NULL;
2266     int n;
2267 
2268     WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2269 
2270     mr->mmkey.ndescs = 0;
2271     mr->data_length = 0;
2272     mr->data_iova = 0;
2273     mr->meta_ndescs = 0;
2274     mr->pi_iova = 0;
2275     /*
2276      * As a performance optimization, if possible, there is no need to
2277      * perform UMR operation to register the data/metadata buffers.
2278      * First try to map the sg lists to PA descriptors with local_dma_lkey.
2279      * Fallback to UMR only in case of a failure.
2280      */
2281     n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2282                     data_sg_offset, meta_sg, meta_sg_nents,
2283                     meta_sg_offset);
2284     if (n == data_sg_nents + meta_sg_nents)
2285         goto out;
2286     /*
2287      * As a performance optimization, if possible, there is no need to map
2288      * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2289      * descriptors and fallback to KLM only in case of a failure.
2290      * It's more efficient for the HW to work with MTT descriptors
2291      * (especially in high load).
2292      * Use KLM (indirect access) only if it's mandatory.
2293      */
2294     pi_mr = mr->mtt_mr;
2295     n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2296                      data_sg_offset, meta_sg, meta_sg_nents,
2297                      meta_sg_offset);
2298     if (n == data_sg_nents + meta_sg_nents)
2299         goto out;
2300 
2301     pi_mr = mr->klm_mr;
2302     n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2303                      data_sg_offset, meta_sg, meta_sg_nents,
2304                      meta_sg_offset);
2305     if (unlikely(n != data_sg_nents + meta_sg_nents))
2306         return -ENOMEM;
2307 
2308 out:
2309     /* This is zero-based memory region */
2310     ibmr->iova = 0;
2311     mr->pi_mr = pi_mr;
2312     if (pi_mr)
2313         ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2314     else
2315         ibmr->sig_attrs->meta_length = mr->meta_length;
2316 
2317     return 0;
2318 }
2319 
2320 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2321               unsigned int *sg_offset)
2322 {
2323     struct mlx5_ib_mr *mr = to_mmr(ibmr);
2324     int n;
2325 
2326     mr->mmkey.ndescs = 0;
2327 
2328     ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2329                    mr->desc_size * mr->max_descs,
2330                    DMA_TO_DEVICE);
2331 
2332     if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2333         n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2334                        NULL);
2335     else
2336         n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2337                 mlx5_set_page);
2338 
2339     ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2340                       mr->desc_size * mr->max_descs,
2341                       DMA_TO_DEVICE);
2342 
2343     return n;
2344 }