0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #include <linux/kref.h>
0036 #include <linux/random.h>
0037 #include <linux/debugfs.h>
0038 #include <linux/export.h>
0039 #include <linux/delay.h>
0040 #include <linux/dma-buf.h>
0041 #include <linux/dma-resv.h>
0042 #include <rdma/ib_umem.h>
0043 #include <rdma/ib_umem_odp.h>
0044 #include <rdma/ib_verbs.h>
0045 #include "dm.h"
0046 #include "mlx5_ib.h"
0047 #include "umr.h"
0048
0049 enum {
0050 MAX_PENDING_REG_MR = 8,
0051 };
0052
0053 #define MLX5_UMR_ALIGN 2048
0054
0055 static void
0056 create_mkey_callback(int status, struct mlx5_async_work *context);
0057 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
0058 u64 iova, int access_flags,
0059 unsigned int page_size, bool populate);
0060
0061 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
0062 struct ib_pd *pd)
0063 {
0064 struct mlx5_ib_dev *dev = to_mdev(pd->device);
0065
0066 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
0067 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
0068 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
0069 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
0070 MLX5_SET(mkc, mkc, lr, 1);
0071
0072 if ((acc & IB_ACCESS_RELAXED_ORDERING) &&
0073 pcie_relaxed_ordering_enabled(dev->mdev->pdev)) {
0074 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
0075 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
0076 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
0077 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
0078 }
0079
0080 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
0081 MLX5_SET(mkc, mkc, qpn, 0xffffff);
0082 MLX5_SET64(mkc, mkc, start_addr, start_addr);
0083 }
0084
0085 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
0086 {
0087 u8 key = atomic_inc_return(&dev->mkey_var);
0088 void *mkc;
0089
0090 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0091 MLX5_SET(mkc, mkc, mkey_7_0, key);
0092 *mkey = key;
0093 }
0094
0095 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
0096 struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
0097 {
0098 int ret;
0099
0100 assign_mkey_variant(dev, &mkey->key, in);
0101 ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
0102 if (!ret)
0103 init_waitqueue_head(&mkey->wait);
0104
0105 return ret;
0106 }
0107
0108 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
0109 {
0110 struct mlx5_ib_dev *dev = async_create->ent->dev;
0111 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0112 size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
0113
0114 MLX5_SET(create_mkey_in, async_create->in, opcode,
0115 MLX5_CMD_OP_CREATE_MKEY);
0116 assign_mkey_variant(dev, &async_create->mkey, async_create->in);
0117 return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
0118 async_create->out, outlen, create_mkey_callback,
0119 &async_create->cb_work);
0120 }
0121
0122 static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
0123 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
0124
0125 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
0126 {
0127 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
0128
0129 return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
0130 }
0131
0132 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
0133 {
0134 if (status == -ENXIO)
0135 return;
0136
0137 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
0138 if (status != -EREMOTEIO)
0139 return;
0140
0141
0142 mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
0143 }
0144
0145
0146 static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
0147 void *to_store)
0148 {
0149 XA_STATE(xas, &ent->mkeys, 0);
0150 void *curr;
0151
0152 xa_lock_irq(&ent->mkeys);
0153 if (limit_pendings &&
0154 (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
0155 xa_unlock_irq(&ent->mkeys);
0156 return -EAGAIN;
0157 }
0158 while (1) {
0159
0160
0161
0162
0163
0164 xas_set(&xas, ent->reserved);
0165 curr = xas_load(&xas);
0166 if (!curr) {
0167 if (to_store && ent->stored == ent->reserved)
0168 xas_store(&xas, to_store);
0169 else
0170 xas_store(&xas, XA_ZERO_ENTRY);
0171 if (xas_valid(&xas)) {
0172 ent->reserved++;
0173 if (to_store) {
0174 if (ent->stored != ent->reserved)
0175 __xa_store(&ent->mkeys,
0176 ent->stored,
0177 to_store,
0178 GFP_KERNEL);
0179 ent->stored++;
0180 queue_adjust_cache_locked(ent);
0181 WRITE_ONCE(ent->dev->cache.last_add,
0182 jiffies);
0183 }
0184 }
0185 }
0186 xa_unlock_irq(&ent->mkeys);
0187
0188
0189
0190
0191
0192 if (!xas_nomem(&xas, GFP_KERNEL))
0193 break;
0194 xa_lock_irq(&ent->mkeys);
0195 }
0196 if (xas_error(&xas))
0197 return xas_error(&xas);
0198 if (WARN_ON(curr))
0199 return -EINVAL;
0200 return 0;
0201 }
0202
0203 static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
0204 {
0205 void *old;
0206
0207 ent->reserved--;
0208 old = __xa_erase(&ent->mkeys, ent->reserved);
0209 WARN_ON(old);
0210 }
0211
0212 static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
0213 {
0214 void *old;
0215
0216 old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
0217 WARN_ON(old);
0218 ent->stored++;
0219 }
0220
0221 static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
0222 {
0223 void *old, *xa_mkey;
0224
0225 ent->stored--;
0226 ent->reserved--;
0227
0228 if (ent->stored == ent->reserved) {
0229 xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
0230 WARN_ON(!xa_mkey);
0231 return (u32)xa_to_value(xa_mkey);
0232 }
0233
0234 xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
0235 GFP_KERNEL);
0236 WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
0237 old = __xa_erase(&ent->mkeys, ent->reserved);
0238 WARN_ON(old);
0239 return (u32)xa_to_value(xa_mkey);
0240 }
0241
0242 static void create_mkey_callback(int status, struct mlx5_async_work *context)
0243 {
0244 struct mlx5r_async_create_mkey *mkey_out =
0245 container_of(context, struct mlx5r_async_create_mkey, cb_work);
0246 struct mlx5_cache_ent *ent = mkey_out->ent;
0247 struct mlx5_ib_dev *dev = ent->dev;
0248 unsigned long flags;
0249
0250 if (status) {
0251 create_mkey_warn(dev, status, mkey_out->out);
0252 kfree(mkey_out);
0253 xa_lock_irqsave(&ent->mkeys, flags);
0254 undo_push_reserve_mkey(ent);
0255 WRITE_ONCE(dev->fill_delay, 1);
0256 xa_unlock_irqrestore(&ent->mkeys, flags);
0257 mod_timer(&dev->delay_timer, jiffies + HZ);
0258 return;
0259 }
0260
0261 mkey_out->mkey |= mlx5_idx_to_mkey(
0262 MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
0263 WRITE_ONCE(dev->cache.last_add, jiffies);
0264
0265 xa_lock_irqsave(&ent->mkeys, flags);
0266 push_to_reserved(ent, mkey_out->mkey);
0267
0268 queue_adjust_cache_locked(ent);
0269 xa_unlock_irqrestore(&ent->mkeys, flags);
0270 kfree(mkey_out);
0271 }
0272
0273 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
0274 {
0275 int ret = 0;
0276
0277 switch (access_mode) {
0278 case MLX5_MKC_ACCESS_MODE_MTT:
0279 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
0280 sizeof(struct mlx5_mtt));
0281 break;
0282 case MLX5_MKC_ACCESS_MODE_KSM:
0283 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
0284 sizeof(struct mlx5_klm));
0285 break;
0286 default:
0287 WARN_ON(1);
0288 }
0289 return ret;
0290 }
0291
0292 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
0293 {
0294 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
0295 MLX5_SET(mkc, mkc, free, 1);
0296 MLX5_SET(mkc, mkc, umr_en, 1);
0297 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
0298 MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
0299
0300 MLX5_SET(mkc, mkc, translations_octword_size,
0301 get_mkc_octo_size(ent->access_mode, ent->ndescs));
0302 MLX5_SET(mkc, mkc, log_page_size, ent->page);
0303 }
0304
0305
0306 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
0307 {
0308 struct mlx5r_async_create_mkey *async_create;
0309 void *mkc;
0310 int err = 0;
0311 int i;
0312
0313 for (i = 0; i < num; i++) {
0314 async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
0315 GFP_KERNEL);
0316 if (!async_create)
0317 return -ENOMEM;
0318 mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
0319 memory_key_mkey_entry);
0320 set_cache_mkc(ent, mkc);
0321 async_create->ent = ent;
0322
0323 err = push_mkey(ent, true, NULL);
0324 if (err)
0325 goto free_async_create;
0326
0327 err = mlx5_ib_create_mkey_cb(async_create);
0328 if (err) {
0329 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
0330 goto err_undo_reserve;
0331 }
0332 }
0333
0334 return 0;
0335
0336 err_undo_reserve:
0337 xa_lock_irq(&ent->mkeys);
0338 undo_push_reserve_mkey(ent);
0339 xa_unlock_irq(&ent->mkeys);
0340 free_async_create:
0341 kfree(async_create);
0342 return err;
0343 }
0344
0345
0346 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
0347 {
0348 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0349 void *mkc;
0350 u32 *in;
0351 int err;
0352
0353 in = kzalloc(inlen, GFP_KERNEL);
0354 if (!in)
0355 return -ENOMEM;
0356 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0357 set_cache_mkc(ent, mkc);
0358
0359 err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
0360 if (err)
0361 goto free_in;
0362
0363 WRITE_ONCE(ent->dev->cache.last_add, jiffies);
0364 free_in:
0365 kfree(in);
0366 return err;
0367 }
0368
0369 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
0370 {
0371 u32 mkey;
0372
0373 lockdep_assert_held(&ent->mkeys.xa_lock);
0374 if (!ent->stored)
0375 return;
0376 mkey = pop_stored_mkey(ent);
0377 xa_unlock_irq(&ent->mkeys);
0378 mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
0379 xa_lock_irq(&ent->mkeys);
0380 }
0381
0382 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
0383 bool limit_fill)
0384 __acquires(&ent->mkeys) __releases(&ent->mkeys)
0385 {
0386 int err;
0387
0388 lockdep_assert_held(&ent->mkeys.xa_lock);
0389
0390 while (true) {
0391 if (limit_fill)
0392 target = ent->limit * 2;
0393 if (target == ent->reserved)
0394 return 0;
0395 if (target > ent->reserved) {
0396 u32 todo = target - ent->reserved;
0397
0398 xa_unlock_irq(&ent->mkeys);
0399 err = add_keys(ent, todo);
0400 if (err == -EAGAIN)
0401 usleep_range(3000, 5000);
0402 xa_lock_irq(&ent->mkeys);
0403 if (err) {
0404 if (err != -EAGAIN)
0405 return err;
0406 } else
0407 return 0;
0408 } else {
0409 remove_cache_mr_locked(ent);
0410 }
0411 }
0412 }
0413
0414 static ssize_t size_write(struct file *filp, const char __user *buf,
0415 size_t count, loff_t *pos)
0416 {
0417 struct mlx5_cache_ent *ent = filp->private_data;
0418 u32 target;
0419 int err;
0420
0421 err = kstrtou32_from_user(buf, count, 0, &target);
0422 if (err)
0423 return err;
0424
0425
0426
0427
0428
0429
0430 xa_lock_irq(&ent->mkeys);
0431 if (target < ent->in_use) {
0432 err = -EINVAL;
0433 goto err_unlock;
0434 }
0435 target = target - ent->in_use;
0436 if (target < ent->limit || target > ent->limit*2) {
0437 err = -EINVAL;
0438 goto err_unlock;
0439 }
0440 err = resize_available_mrs(ent, target, false);
0441 if (err)
0442 goto err_unlock;
0443 xa_unlock_irq(&ent->mkeys);
0444
0445 return count;
0446
0447 err_unlock:
0448 xa_unlock_irq(&ent->mkeys);
0449 return err;
0450 }
0451
0452 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
0453 loff_t *pos)
0454 {
0455 struct mlx5_cache_ent *ent = filp->private_data;
0456 char lbuf[20];
0457 int err;
0458
0459 err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
0460 if (err < 0)
0461 return err;
0462
0463 return simple_read_from_buffer(buf, count, pos, lbuf, err);
0464 }
0465
0466 static const struct file_operations size_fops = {
0467 .owner = THIS_MODULE,
0468 .open = simple_open,
0469 .write = size_write,
0470 .read = size_read,
0471 };
0472
0473 static ssize_t limit_write(struct file *filp, const char __user *buf,
0474 size_t count, loff_t *pos)
0475 {
0476 struct mlx5_cache_ent *ent = filp->private_data;
0477 u32 var;
0478 int err;
0479
0480 err = kstrtou32_from_user(buf, count, 0, &var);
0481 if (err)
0482 return err;
0483
0484
0485
0486
0487
0488 xa_lock_irq(&ent->mkeys);
0489 ent->limit = var;
0490 err = resize_available_mrs(ent, 0, true);
0491 xa_unlock_irq(&ent->mkeys);
0492 if (err)
0493 return err;
0494 return count;
0495 }
0496
0497 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
0498 loff_t *pos)
0499 {
0500 struct mlx5_cache_ent *ent = filp->private_data;
0501 char lbuf[20];
0502 int err;
0503
0504 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
0505 if (err < 0)
0506 return err;
0507
0508 return simple_read_from_buffer(buf, count, pos, lbuf, err);
0509 }
0510
0511 static const struct file_operations limit_fops = {
0512 .owner = THIS_MODULE,
0513 .open = simple_open,
0514 .write = limit_write,
0515 .read = limit_read,
0516 };
0517
0518 static bool someone_adding(struct mlx5_mkey_cache *cache)
0519 {
0520 unsigned int i;
0521
0522 for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0523 struct mlx5_cache_ent *ent = &cache->ent[i];
0524 bool ret;
0525
0526 xa_lock_irq(&ent->mkeys);
0527 ret = ent->stored < ent->limit;
0528 xa_unlock_irq(&ent->mkeys);
0529 if (ret)
0530 return true;
0531 }
0532 return false;
0533 }
0534
0535
0536
0537
0538
0539
0540 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
0541 {
0542 lockdep_assert_held(&ent->mkeys.xa_lock);
0543
0544 if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
0545 return;
0546 if (ent->stored < ent->limit) {
0547 ent->fill_to_high_water = true;
0548 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0549 } else if (ent->fill_to_high_water &&
0550 ent->reserved < 2 * ent->limit) {
0551
0552
0553
0554
0555 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0556 } else if (ent->stored == 2 * ent->limit) {
0557 ent->fill_to_high_water = false;
0558 } else if (ent->stored > 2 * ent->limit) {
0559
0560 ent->fill_to_high_water = false;
0561 if (ent->stored != ent->reserved)
0562 queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
0563 msecs_to_jiffies(1000));
0564 else
0565 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
0566 }
0567 }
0568
0569 static void __cache_work_func(struct mlx5_cache_ent *ent)
0570 {
0571 struct mlx5_ib_dev *dev = ent->dev;
0572 struct mlx5_mkey_cache *cache = &dev->cache;
0573 int err;
0574
0575 xa_lock_irq(&ent->mkeys);
0576 if (ent->disabled)
0577 goto out;
0578
0579 if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
0580 !READ_ONCE(dev->fill_delay)) {
0581 xa_unlock_irq(&ent->mkeys);
0582 err = add_keys(ent, 1);
0583 xa_lock_irq(&ent->mkeys);
0584 if (ent->disabled)
0585 goto out;
0586 if (err) {
0587
0588
0589
0590
0591
0592 if (err != -EAGAIN) {
0593 mlx5_ib_warn(
0594 dev,
0595 "command failed order %d, err %d\n",
0596 ent->order, err);
0597 queue_delayed_work(cache->wq, &ent->dwork,
0598 msecs_to_jiffies(1000));
0599 }
0600 }
0601 } else if (ent->stored > 2 * ent->limit) {
0602 bool need_delay;
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616 xa_unlock_irq(&ent->mkeys);
0617 need_delay = need_resched() || someone_adding(cache) ||
0618 !time_after(jiffies,
0619 READ_ONCE(cache->last_add) + 300 * HZ);
0620 xa_lock_irq(&ent->mkeys);
0621 if (ent->disabled)
0622 goto out;
0623 if (need_delay) {
0624 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
0625 goto out;
0626 }
0627 remove_cache_mr_locked(ent);
0628 queue_adjust_cache_locked(ent);
0629 }
0630 out:
0631 xa_unlock_irq(&ent->mkeys);
0632 }
0633
0634 static void delayed_cache_work_func(struct work_struct *work)
0635 {
0636 struct mlx5_cache_ent *ent;
0637
0638 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
0639 __cache_work_func(ent);
0640 }
0641
0642 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
0643 struct mlx5_cache_ent *ent,
0644 int access_flags)
0645 {
0646 struct mlx5_ib_mr *mr;
0647 int err;
0648
0649 if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
0650 return ERR_PTR(-EOPNOTSUPP);
0651
0652 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0653 if (!mr)
0654 return ERR_PTR(-ENOMEM);
0655
0656 xa_lock_irq(&ent->mkeys);
0657 ent->in_use++;
0658
0659 if (!ent->stored) {
0660 queue_adjust_cache_locked(ent);
0661 ent->miss++;
0662 xa_unlock_irq(&ent->mkeys);
0663 err = create_cache_mkey(ent, &mr->mmkey.key);
0664 if (err) {
0665 xa_lock_irq(&ent->mkeys);
0666 ent->in_use--;
0667 xa_unlock_irq(&ent->mkeys);
0668 kfree(mr);
0669 return ERR_PTR(err);
0670 }
0671 } else {
0672 mr->mmkey.key = pop_stored_mkey(ent);
0673 queue_adjust_cache_locked(ent);
0674 xa_unlock_irq(&ent->mkeys);
0675 }
0676 mr->mmkey.cache_ent = ent;
0677 mr->mmkey.type = MLX5_MKEY_MR;
0678 init_waitqueue_head(&mr->mmkey.wait);
0679 return mr;
0680 }
0681
0682 static void clean_keys(struct mlx5_ib_dev *dev, int c)
0683 {
0684 struct mlx5_mkey_cache *cache = &dev->cache;
0685 struct mlx5_cache_ent *ent = &cache->ent[c];
0686 u32 mkey;
0687
0688 cancel_delayed_work(&ent->dwork);
0689 xa_lock_irq(&ent->mkeys);
0690 while (ent->stored) {
0691 mkey = pop_stored_mkey(ent);
0692 xa_unlock_irq(&ent->mkeys);
0693 mlx5_core_destroy_mkey(dev->mdev, mkey);
0694 xa_lock_irq(&ent->mkeys);
0695 }
0696 xa_unlock_irq(&ent->mkeys);
0697 }
0698
0699 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
0700 {
0701 if (!mlx5_debugfs_root || dev->is_rep)
0702 return;
0703
0704 debugfs_remove_recursive(dev->cache.root);
0705 dev->cache.root = NULL;
0706 }
0707
0708 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
0709 {
0710 struct mlx5_mkey_cache *cache = &dev->cache;
0711 struct mlx5_cache_ent *ent;
0712 struct dentry *dir;
0713 int i;
0714
0715 if (!mlx5_debugfs_root || dev->is_rep)
0716 return;
0717
0718 cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
0719
0720 for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0721 ent = &cache->ent[i];
0722 sprintf(ent->name, "%d", ent->order);
0723 dir = debugfs_create_dir(ent->name, cache->root);
0724 debugfs_create_file("size", 0600, dir, ent, &size_fops);
0725 debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
0726 debugfs_create_ulong("cur", 0400, dir, &ent->stored);
0727 debugfs_create_u32("miss", 0600, dir, &ent->miss);
0728 }
0729 }
0730
0731 static void delay_time_func(struct timer_list *t)
0732 {
0733 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
0734
0735 WRITE_ONCE(dev->fill_delay, 0);
0736 }
0737
0738 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
0739 {
0740 struct mlx5_mkey_cache *cache = &dev->cache;
0741 struct mlx5_cache_ent *ent;
0742 int i;
0743
0744 mutex_init(&dev->slow_path_mutex);
0745 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
0746 if (!cache->wq) {
0747 mlx5_ib_warn(dev, "failed to create work queue\n");
0748 return -ENOMEM;
0749 }
0750
0751 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
0752 timer_setup(&dev->delay_timer, delay_time_func, 0);
0753 for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0754 ent = &cache->ent[i];
0755 xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
0756 ent->order = i + 2;
0757 ent->dev = dev;
0758 ent->limit = 0;
0759
0760 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
0761
0762 if (i > MKEY_CACHE_LAST_STD_ENTRY) {
0763 mlx5_odp_init_mkey_cache_entry(ent);
0764 continue;
0765 }
0766
0767 if (ent->order > mkey_cache_max_order(dev))
0768 continue;
0769
0770 ent->page = PAGE_SHIFT;
0771 ent->ndescs = 1 << ent->order;
0772 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
0773 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
0774 !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
0775 mlx5r_umr_can_load_pas(dev, 0))
0776 ent->limit = dev->mdev->profile.mr_cache[i].limit;
0777 else
0778 ent->limit = 0;
0779 xa_lock_irq(&ent->mkeys);
0780 queue_adjust_cache_locked(ent);
0781 xa_unlock_irq(&ent->mkeys);
0782 }
0783
0784 mlx5_mkey_cache_debugfs_init(dev);
0785
0786 return 0;
0787 }
0788
0789 int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
0790 {
0791 unsigned int i;
0792
0793 if (!dev->cache.wq)
0794 return 0;
0795
0796 for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
0797 struct mlx5_cache_ent *ent = &dev->cache.ent[i];
0798
0799 xa_lock_irq(&ent->mkeys);
0800 ent->disabled = true;
0801 xa_unlock_irq(&ent->mkeys);
0802 cancel_delayed_work_sync(&ent->dwork);
0803 }
0804
0805 mlx5_mkey_cache_debugfs_cleanup(dev);
0806 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
0807
0808 for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
0809 clean_keys(dev, i);
0810
0811 destroy_workqueue(dev->cache.wq);
0812 del_timer_sync(&dev->delay_timer);
0813
0814 return 0;
0815 }
0816
0817 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
0818 {
0819 struct mlx5_ib_dev *dev = to_mdev(pd->device);
0820 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0821 struct mlx5_ib_mr *mr;
0822 void *mkc;
0823 u32 *in;
0824 int err;
0825
0826 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0827 if (!mr)
0828 return ERR_PTR(-ENOMEM);
0829
0830 in = kzalloc(inlen, GFP_KERNEL);
0831 if (!in) {
0832 err = -ENOMEM;
0833 goto err_free;
0834 }
0835
0836 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0837
0838 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
0839 MLX5_SET(mkc, mkc, length64, 1);
0840 set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
0841 pd);
0842
0843 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
0844 if (err)
0845 goto err_in;
0846
0847 kfree(in);
0848 mr->mmkey.type = MLX5_MKEY_MR;
0849 mr->ibmr.lkey = mr->mmkey.key;
0850 mr->ibmr.rkey = mr->mmkey.key;
0851 mr->umem = NULL;
0852
0853 return &mr->ibmr;
0854
0855 err_in:
0856 kfree(in);
0857
0858 err_free:
0859 kfree(mr);
0860
0861 return ERR_PTR(err);
0862 }
0863
0864 static int get_octo_len(u64 addr, u64 len, int page_shift)
0865 {
0866 u64 page_size = 1ULL << page_shift;
0867 u64 offset;
0868 int npages;
0869
0870 offset = addr & (page_size - 1);
0871 npages = ALIGN(len + offset, page_size) >> page_shift;
0872 return (npages + 1) / 2;
0873 }
0874
0875 static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
0876 {
0877 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
0878 return MKEY_CACHE_LAST_STD_ENTRY + 2;
0879 return MLX5_MAX_UMR_SHIFT;
0880 }
0881
0882 static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
0883 unsigned int order)
0884 {
0885 struct mlx5_mkey_cache *cache = &dev->cache;
0886
0887 if (order < cache->ent[0].order)
0888 return &cache->ent[0];
0889 order = order - cache->ent[0].order;
0890 if (order > MKEY_CACHE_LAST_STD_ENTRY)
0891 return NULL;
0892 return &cache->ent[order];
0893 }
0894
0895 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
0896 u64 length, int access_flags, u64 iova)
0897 {
0898 mr->ibmr.lkey = mr->mmkey.key;
0899 mr->ibmr.rkey = mr->mmkey.key;
0900 mr->ibmr.length = length;
0901 mr->ibmr.device = &dev->ib_dev;
0902 mr->ibmr.iova = iova;
0903 mr->access_flags = access_flags;
0904 }
0905
0906 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
0907 u64 iova)
0908 {
0909
0910
0911
0912
0913 umem->iova = iova;
0914 return PAGE_SIZE;
0915 }
0916
0917 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
0918 struct ib_umem *umem, u64 iova,
0919 int access_flags)
0920 {
0921 struct mlx5_ib_dev *dev = to_mdev(pd->device);
0922 struct mlx5_cache_ent *ent;
0923 struct mlx5_ib_mr *mr;
0924 unsigned int page_size;
0925
0926 if (umem->is_dmabuf)
0927 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
0928 else
0929 page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
0930 0, iova);
0931 if (WARN_ON(!page_size))
0932 return ERR_PTR(-EINVAL);
0933 ent = mkey_cache_ent_from_order(
0934 dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
0935
0936
0937
0938
0939 if (!ent || ent->limit == 0 ||
0940 !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
0941 mutex_lock(&dev->slow_path_mutex);
0942 mr = reg_create(pd, umem, iova, access_flags, page_size, false);
0943 mutex_unlock(&dev->slow_path_mutex);
0944 return mr;
0945 }
0946
0947 mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
0948 if (IS_ERR(mr))
0949 return mr;
0950
0951 mr->ibmr.pd = pd;
0952 mr->umem = umem;
0953 mr->page_shift = order_base_2(page_size);
0954 set_mr_fields(dev, mr, umem->length, access_flags, iova);
0955
0956 return mr;
0957 }
0958
0959
0960
0961
0962
0963 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
0964 u64 iova, int access_flags,
0965 unsigned int page_size, bool populate)
0966 {
0967 struct mlx5_ib_dev *dev = to_mdev(pd->device);
0968 struct mlx5_ib_mr *mr;
0969 __be64 *pas;
0970 void *mkc;
0971 int inlen;
0972 u32 *in;
0973 int err;
0974 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
0975
0976 if (!page_size)
0977 return ERR_PTR(-EINVAL);
0978 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0979 if (!mr)
0980 return ERR_PTR(-ENOMEM);
0981
0982 mr->ibmr.pd = pd;
0983 mr->access_flags = access_flags;
0984 mr->page_shift = order_base_2(page_size);
0985
0986 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0987 if (populate)
0988 inlen += sizeof(*pas) *
0989 roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
0990 in = kvzalloc(inlen, GFP_KERNEL);
0991 if (!in) {
0992 err = -ENOMEM;
0993 goto err_1;
0994 }
0995 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
0996 if (populate) {
0997 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
0998 err = -EINVAL;
0999 goto err_2;
1000 }
1001 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
1002 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1003 }
1004
1005
1006
1007 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1008
1009 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1010 set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
1011 populate ? pd : dev->umrc.pd);
1012 MLX5_SET(mkc, mkc, free, !populate);
1013 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1014 MLX5_SET(mkc, mkc, umr_en, 1);
1015
1016 MLX5_SET64(mkc, mkc, len, umem->length);
1017 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1018 MLX5_SET(mkc, mkc, translations_octword_size,
1019 get_octo_len(iova, umem->length, mr->page_shift));
1020 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
1021 if (populate) {
1022 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1023 get_octo_len(iova, umem->length, mr->page_shift));
1024 }
1025
1026 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1027 if (err) {
1028 mlx5_ib_warn(dev, "create mkey failed\n");
1029 goto err_2;
1030 }
1031 mr->mmkey.type = MLX5_MKEY_MR;
1032 mr->umem = umem;
1033 set_mr_fields(dev, mr, umem->length, access_flags, iova);
1034 kvfree(in);
1035
1036 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1037
1038 return mr;
1039
1040 err_2:
1041 kvfree(in);
1042 err_1:
1043 kfree(mr);
1044 return ERR_PTR(err);
1045 }
1046
1047 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1048 u64 length, int acc, int mode)
1049 {
1050 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1051 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1052 struct mlx5_ib_mr *mr;
1053 void *mkc;
1054 u32 *in;
1055 int err;
1056
1057 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1058 if (!mr)
1059 return ERR_PTR(-ENOMEM);
1060
1061 in = kzalloc(inlen, GFP_KERNEL);
1062 if (!in) {
1063 err = -ENOMEM;
1064 goto err_free;
1065 }
1066
1067 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1068
1069 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1070 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1071 MLX5_SET64(mkc, mkc, len, length);
1072 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
1073
1074 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1075 if (err)
1076 goto err_in;
1077
1078 kfree(in);
1079
1080 set_mr_fields(dev, mr, length, acc, start_addr);
1081
1082 return &mr->ibmr;
1083
1084 err_in:
1085 kfree(in);
1086
1087 err_free:
1088 kfree(mr);
1089
1090 return ERR_PTR(err);
1091 }
1092
1093 int mlx5_ib_advise_mr(struct ib_pd *pd,
1094 enum ib_uverbs_advise_mr_advice advice,
1095 u32 flags,
1096 struct ib_sge *sg_list,
1097 u32 num_sge,
1098 struct uverbs_attr_bundle *attrs)
1099 {
1100 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1101 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1102 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
1103 return -EOPNOTSUPP;
1104
1105 return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1106 sg_list, num_sge);
1107 }
1108
1109 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1110 struct ib_dm_mr_attr *attr,
1111 struct uverbs_attr_bundle *attrs)
1112 {
1113 struct mlx5_ib_dm *mdm = to_mdm(dm);
1114 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1115 u64 start_addr = mdm->dev_addr + attr->offset;
1116 int mode;
1117
1118 switch (mdm->type) {
1119 case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1120 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1121 return ERR_PTR(-EINVAL);
1122
1123 mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1124 start_addr -= pci_resource_start(dev->pdev, 0);
1125 break;
1126 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1127 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1128 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
1129 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1130 return ERR_PTR(-EINVAL);
1131
1132 mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1133 break;
1134 default:
1135 return ERR_PTR(-EINVAL);
1136 }
1137
1138 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1139 attr->access_flags, mode);
1140 }
1141
1142 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
1143 u64 iova, int access_flags)
1144 {
1145 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1146 struct mlx5_ib_mr *mr = NULL;
1147 bool xlt_with_umr;
1148 int err;
1149
1150 xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
1151 if (xlt_with_umr) {
1152 mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
1153 } else {
1154 unsigned int page_size = mlx5_umem_find_best_pgsz(
1155 umem, mkc, log_page_size, 0, iova);
1156
1157 mutex_lock(&dev->slow_path_mutex);
1158 mr = reg_create(pd, umem, iova, access_flags, page_size, true);
1159 mutex_unlock(&dev->slow_path_mutex);
1160 }
1161 if (IS_ERR(mr)) {
1162 ib_umem_release(umem);
1163 return ERR_CAST(mr);
1164 }
1165
1166 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1167
1168 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1169
1170 if (xlt_with_umr) {
1171
1172
1173
1174
1175
1176 err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
1177 if (err) {
1178 mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1179 return ERR_PTR(err);
1180 }
1181 }
1182 return &mr->ibmr;
1183 }
1184
1185 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
1186 u64 iova, int access_flags,
1187 struct ib_udata *udata)
1188 {
1189 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1190 struct ib_umem_odp *odp;
1191 struct mlx5_ib_mr *mr;
1192 int err;
1193
1194 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1195 return ERR_PTR(-EOPNOTSUPP);
1196
1197 err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
1198 if (err)
1199 return ERR_PTR(err);
1200 if (!start && length == U64_MAX) {
1201 if (iova != 0)
1202 return ERR_PTR(-EINVAL);
1203 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1204 return ERR_PTR(-EINVAL);
1205
1206 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1207 if (IS_ERR(mr))
1208 return ERR_CAST(mr);
1209 return &mr->ibmr;
1210 }
1211
1212
1213 if (!mlx5r_umr_can_load_pas(dev, length))
1214 return ERR_PTR(-EINVAL);
1215
1216 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
1217 &mlx5_mn_ops);
1218 if (IS_ERR(odp))
1219 return ERR_CAST(odp);
1220
1221 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
1222 if (IS_ERR(mr)) {
1223 ib_umem_release(&odp->umem);
1224 return ERR_CAST(mr);
1225 }
1226 xa_init(&mr->implicit_children);
1227
1228 odp->private = mr;
1229 err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
1230 if (err)
1231 goto err_dereg_mr;
1232
1233 err = mlx5_ib_init_odp_mr(mr);
1234 if (err)
1235 goto err_dereg_mr;
1236 return &mr->ibmr;
1237
1238 err_dereg_mr:
1239 mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1240 return ERR_PTR(err);
1241 }
1242
1243 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1244 u64 iova, int access_flags,
1245 struct ib_udata *udata)
1246 {
1247 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1248 struct ib_umem *umem;
1249
1250 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1251 return ERR_PTR(-EOPNOTSUPP);
1252
1253 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1254 start, iova, length, access_flags);
1255
1256 if (access_flags & IB_ACCESS_ON_DEMAND)
1257 return create_user_odp_mr(pd, start, length, iova, access_flags,
1258 udata);
1259 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
1260 if (IS_ERR(umem))
1261 return ERR_CAST(umem);
1262 return create_real_mr(pd, umem, iova, access_flags);
1263 }
1264
1265 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
1266 {
1267 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
1268 struct mlx5_ib_mr *mr = umem_dmabuf->private;
1269
1270 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
1271
1272 if (!umem_dmabuf->sgt)
1273 return;
1274
1275 mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
1276 ib_umem_dmabuf_unmap_pages(umem_dmabuf);
1277 }
1278
1279 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
1280 .allow_peer2peer = 1,
1281 .move_notify = mlx5_ib_dmabuf_invalidate_cb,
1282 };
1283
1284 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
1285 u64 length, u64 virt_addr,
1286 int fd, int access_flags,
1287 struct ib_udata *udata)
1288 {
1289 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1290 struct mlx5_ib_mr *mr = NULL;
1291 struct ib_umem_dmabuf *umem_dmabuf;
1292 int err;
1293
1294 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
1295 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1296 return ERR_PTR(-EOPNOTSUPP);
1297
1298 mlx5_ib_dbg(dev,
1299 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
1300 offset, virt_addr, length, fd, access_flags);
1301
1302
1303 if (!mlx5r_umr_can_load_pas(dev, length))
1304 return ERR_PTR(-EINVAL);
1305
1306 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
1307 access_flags,
1308 &mlx5_ib_dmabuf_attach_ops);
1309 if (IS_ERR(umem_dmabuf)) {
1310 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
1311 PTR_ERR(umem_dmabuf));
1312 return ERR_CAST(umem_dmabuf);
1313 }
1314
1315 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
1316 access_flags);
1317 if (IS_ERR(mr)) {
1318 ib_umem_release(&umem_dmabuf->umem);
1319 return ERR_CAST(mr);
1320 }
1321
1322 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1323
1324 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
1325 umem_dmabuf->private = mr;
1326 err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
1327 if (err)
1328 goto err_dereg_mr;
1329
1330 err = mlx5_ib_init_dmabuf_mr(mr);
1331 if (err)
1332 goto err_dereg_mr;
1333 return &mr->ibmr;
1334
1335 err_dereg_mr:
1336 mlx5_ib_dereg_mr(&mr->ibmr, NULL);
1337 return ERR_PTR(err);
1338 }
1339
1340
1341
1342
1343
1344 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
1345 unsigned int current_access_flags,
1346 unsigned int target_access_flags)
1347 {
1348 unsigned int diffs = current_access_flags ^ target_access_flags;
1349
1350 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
1351 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
1352 return false;
1353 return mlx5r_umr_can_reconfig(dev, current_access_flags,
1354 target_access_flags);
1355 }
1356
1357 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
1358 struct ib_umem *new_umem,
1359 int new_access_flags, u64 iova,
1360 unsigned long *page_size)
1361 {
1362 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1363
1364
1365 if (!mr->mmkey.cache_ent)
1366 return false;
1367 if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
1368 return false;
1369
1370 *page_size =
1371 mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
1372 if (WARN_ON(!*page_size))
1373 return false;
1374 return (1ULL << mr->mmkey.cache_ent->order) >=
1375 ib_umem_num_dma_blocks(new_umem, *page_size);
1376 }
1377
1378 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1379 int access_flags, int flags, struct ib_umem *new_umem,
1380 u64 iova, unsigned long page_size)
1381 {
1382 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1383 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
1384 struct ib_umem *old_umem = mr->umem;
1385 int err;
1386
1387
1388
1389
1390
1391
1392 err = mlx5r_umr_revoke_mr(mr);
1393 if (err)
1394 return err;
1395
1396 if (flags & IB_MR_REREG_PD) {
1397 mr->ibmr.pd = pd;
1398 upd_flags |= MLX5_IB_UPD_XLT_PD;
1399 }
1400 if (flags & IB_MR_REREG_ACCESS) {
1401 mr->access_flags = access_flags;
1402 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1403 }
1404
1405 mr->ibmr.length = new_umem->length;
1406 mr->ibmr.iova = iova;
1407 mr->ibmr.length = new_umem->length;
1408 mr->page_shift = order_base_2(page_size);
1409 mr->umem = new_umem;
1410 err = mlx5r_umr_update_mr_pas(mr, upd_flags);
1411 if (err) {
1412
1413
1414
1415
1416 mr->umem = old_umem;
1417 return err;
1418 }
1419
1420 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
1421 ib_umem_release(old_umem);
1422 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
1423 return 0;
1424 }
1425
1426 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1427 u64 length, u64 iova, int new_access_flags,
1428 struct ib_pd *new_pd,
1429 struct ib_udata *udata)
1430 {
1431 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1432 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1433 int err;
1434
1435 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1436 return ERR_PTR(-EOPNOTSUPP);
1437
1438 mlx5_ib_dbg(
1439 dev,
1440 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1441 start, iova, length, new_access_flags);
1442
1443 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
1444 return ERR_PTR(-EOPNOTSUPP);
1445
1446 if (!(flags & IB_MR_REREG_ACCESS))
1447 new_access_flags = mr->access_flags;
1448 if (!(flags & IB_MR_REREG_PD))
1449 new_pd = ib_mr->pd;
1450
1451 if (!(flags & IB_MR_REREG_TRANS)) {
1452 struct ib_umem *umem;
1453
1454
1455 if (can_use_umr_rereg_access(dev, mr->access_flags,
1456 new_access_flags)) {
1457 err = mlx5r_umr_rereg_pd_access(mr, new_pd,
1458 new_access_flags);
1459 if (err)
1460 return ERR_PTR(err);
1461 return NULL;
1462 }
1463
1464 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
1465 goto recreate;
1466
1467
1468
1469
1470
1471 err = mlx5r_umr_revoke_mr(mr);
1472 if (err)
1473 return ERR_PTR(err);
1474 umem = mr->umem;
1475 mr->umem = NULL;
1476 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1477
1478 return create_real_mr(new_pd, umem, mr->ibmr.iova,
1479 new_access_flags);
1480 }
1481
1482
1483
1484
1485
1486 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
1487 goto recreate;
1488
1489 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
1490 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
1491 struct ib_umem *new_umem;
1492 unsigned long page_size;
1493
1494 new_umem = ib_umem_get(&dev->ib_dev, start, length,
1495 new_access_flags);
1496 if (IS_ERR(new_umem))
1497 return ERR_CAST(new_umem);
1498
1499
1500 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
1501 &page_size)) {
1502 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
1503 new_umem, iova, page_size);
1504 if (err) {
1505 ib_umem_release(new_umem);
1506 return ERR_PTR(err);
1507 }
1508 return NULL;
1509 }
1510 return create_real_mr(new_pd, new_umem, iova, new_access_flags);
1511 }
1512
1513
1514
1515
1516
1517 recreate:
1518 return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
1519 new_access_flags, udata);
1520 }
1521
1522 static int
1523 mlx5_alloc_priv_descs(struct ib_device *device,
1524 struct mlx5_ib_mr *mr,
1525 int ndescs,
1526 int desc_size)
1527 {
1528 struct mlx5_ib_dev *dev = to_mdev(device);
1529 struct device *ddev = &dev->mdev->pdev->dev;
1530 int size = ndescs * desc_size;
1531 int add_size;
1532 int ret;
1533
1534 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1535
1536 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1537 if (!mr->descs_alloc)
1538 return -ENOMEM;
1539
1540 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1541
1542 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
1543 if (dma_mapping_error(ddev, mr->desc_map)) {
1544 ret = -ENOMEM;
1545 goto err;
1546 }
1547
1548 return 0;
1549 err:
1550 kfree(mr->descs_alloc);
1551
1552 return ret;
1553 }
1554
1555 static void
1556 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1557 {
1558 if (!mr->umem && mr->descs) {
1559 struct ib_device *device = mr->ibmr.device;
1560 int size = mr->max_descs * mr->desc_size;
1561 struct mlx5_ib_dev *dev = to_mdev(device);
1562
1563 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
1564 DMA_TO_DEVICE);
1565 kfree(mr->descs_alloc);
1566 mr->descs = NULL;
1567 }
1568 }
1569
1570 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1571 {
1572 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1573 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1574 int rc;
1575
1576
1577
1578
1579
1580
1581 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
1582 refcount_read(&mr->mmkey.usecount) != 0 &&
1583 xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
1584 mlx5r_deref_wait_odp_mkey(&mr->mmkey);
1585
1586 if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1587 xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1588 mr->sig, NULL, GFP_KERNEL);
1589
1590 if (mr->mtt_mr) {
1591 rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
1592 if (rc)
1593 return rc;
1594 mr->mtt_mr = NULL;
1595 }
1596 if (mr->klm_mr) {
1597 rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
1598 if (rc)
1599 return rc;
1600 mr->klm_mr = NULL;
1601 }
1602
1603 if (mlx5_core_destroy_psv(dev->mdev,
1604 mr->sig->psv_memory.psv_idx))
1605 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1606 mr->sig->psv_memory.psv_idx);
1607 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1608 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1609 mr->sig->psv_wire.psv_idx);
1610 kfree(mr->sig);
1611 mr->sig = NULL;
1612 }
1613
1614
1615 if (mr->mmkey.cache_ent) {
1616 xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
1617 mr->mmkey.cache_ent->in_use--;
1618 xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
1619
1620 if (mlx5r_umr_revoke_mr(mr) ||
1621 push_mkey(mr->mmkey.cache_ent, false,
1622 xa_mk_value(mr->mmkey.key)))
1623 mr->mmkey.cache_ent = NULL;
1624 }
1625 if (!mr->mmkey.cache_ent) {
1626 rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
1627 if (rc)
1628 return rc;
1629 }
1630
1631 if (mr->umem) {
1632 bool is_odp = is_odp_mr(mr);
1633
1634 if (!is_odp)
1635 atomic_sub(ib_umem_num_pages(mr->umem),
1636 &dev->mdev->priv.reg_pages);
1637 ib_umem_release(mr->umem);
1638 if (is_odp)
1639 mlx5_ib_free_odp_mr(mr);
1640 }
1641
1642 if (!mr->mmkey.cache_ent)
1643 mlx5_free_priv_descs(mr);
1644
1645 kfree(mr);
1646 return 0;
1647 }
1648
1649 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1650 int access_mode, int page_shift)
1651 {
1652 void *mkc;
1653
1654 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1655
1656
1657 set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
1658 MLX5_SET(mkc, mkc, free, 1);
1659 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1660 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1661 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1662 MLX5_SET(mkc, mkc, umr_en, 1);
1663 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1664 }
1665
1666 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1667 int ndescs, int desc_size, int page_shift,
1668 int access_mode, u32 *in, int inlen)
1669 {
1670 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1671 int err;
1672
1673 mr->access_mode = access_mode;
1674 mr->desc_size = desc_size;
1675 mr->max_descs = ndescs;
1676
1677 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1678 if (err)
1679 return err;
1680
1681 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1682
1683 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1684 if (err)
1685 goto err_free_descs;
1686
1687 mr->mmkey.type = MLX5_MKEY_MR;
1688 mr->ibmr.lkey = mr->mmkey.key;
1689 mr->ibmr.rkey = mr->mmkey.key;
1690
1691 return 0;
1692
1693 err_free_descs:
1694 mlx5_free_priv_descs(mr);
1695 return err;
1696 }
1697
1698 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1699 u32 max_num_sg, u32 max_num_meta_sg,
1700 int desc_size, int access_mode)
1701 {
1702 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1703 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1704 int page_shift = 0;
1705 struct mlx5_ib_mr *mr;
1706 u32 *in;
1707 int err;
1708
1709 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1710 if (!mr)
1711 return ERR_PTR(-ENOMEM);
1712
1713 mr->ibmr.pd = pd;
1714 mr->ibmr.device = pd->device;
1715
1716 in = kzalloc(inlen, GFP_KERNEL);
1717 if (!in) {
1718 err = -ENOMEM;
1719 goto err_free;
1720 }
1721
1722 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1723 page_shift = PAGE_SHIFT;
1724
1725 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1726 access_mode, in, inlen);
1727 if (err)
1728 goto err_free_in;
1729
1730 mr->umem = NULL;
1731 kfree(in);
1732
1733 return mr;
1734
1735 err_free_in:
1736 kfree(in);
1737 err_free:
1738 kfree(mr);
1739 return ERR_PTR(err);
1740 }
1741
1742 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1743 int ndescs, u32 *in, int inlen)
1744 {
1745 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1746 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1747 inlen);
1748 }
1749
1750 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1751 int ndescs, u32 *in, int inlen)
1752 {
1753 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1754 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1755 }
1756
1757 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1758 int max_num_sg, int max_num_meta_sg,
1759 u32 *in, int inlen)
1760 {
1761 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1762 u32 psv_index[2];
1763 void *mkc;
1764 int err;
1765
1766 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1767 if (!mr->sig)
1768 return -ENOMEM;
1769
1770
1771 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1772 if (err)
1773 goto err_free_sig;
1774
1775 mr->sig->psv_memory.psv_idx = psv_index[0];
1776 mr->sig->psv_wire.psv_idx = psv_index[1];
1777
1778 mr->sig->sig_status_checked = true;
1779 mr->sig->sig_err_exists = false;
1780
1781 ++mr->sig->sigerr_count;
1782 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1783 sizeof(struct mlx5_klm),
1784 MLX5_MKC_ACCESS_MODE_KLMS);
1785 if (IS_ERR(mr->klm_mr)) {
1786 err = PTR_ERR(mr->klm_mr);
1787 goto err_destroy_psv;
1788 }
1789 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1790 sizeof(struct mlx5_mtt),
1791 MLX5_MKC_ACCESS_MODE_MTT);
1792 if (IS_ERR(mr->mtt_mr)) {
1793 err = PTR_ERR(mr->mtt_mr);
1794 goto err_free_klm_mr;
1795 }
1796
1797
1798 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1799 MLX5_SET(mkc, mkc, bsf_en, 1);
1800 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1801
1802 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1803 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1804 if (err)
1805 goto err_free_mtt_mr;
1806
1807 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1808 mr->sig, GFP_KERNEL));
1809 if (err)
1810 goto err_free_descs;
1811 return 0;
1812
1813 err_free_descs:
1814 destroy_mkey(dev, mr);
1815 mlx5_free_priv_descs(mr);
1816 err_free_mtt_mr:
1817 mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
1818 mr->mtt_mr = NULL;
1819 err_free_klm_mr:
1820 mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
1821 mr->klm_mr = NULL;
1822 err_destroy_psv:
1823 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
1824 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1825 mr->sig->psv_memory.psv_idx);
1826 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1827 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1828 mr->sig->psv_wire.psv_idx);
1829 err_free_sig:
1830 kfree(mr->sig);
1831
1832 return err;
1833 }
1834
1835 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
1836 enum ib_mr_type mr_type, u32 max_num_sg,
1837 u32 max_num_meta_sg)
1838 {
1839 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1840 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1841 int ndescs = ALIGN(max_num_sg, 4);
1842 struct mlx5_ib_mr *mr;
1843 u32 *in;
1844 int err;
1845
1846 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1847 if (!mr)
1848 return ERR_PTR(-ENOMEM);
1849
1850 in = kzalloc(inlen, GFP_KERNEL);
1851 if (!in) {
1852 err = -ENOMEM;
1853 goto err_free;
1854 }
1855
1856 mr->ibmr.device = pd->device;
1857 mr->umem = NULL;
1858
1859 switch (mr_type) {
1860 case IB_MR_TYPE_MEM_REG:
1861 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
1862 break;
1863 case IB_MR_TYPE_SG_GAPS:
1864 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
1865 break;
1866 case IB_MR_TYPE_INTEGRITY:
1867 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
1868 max_num_meta_sg, in, inlen);
1869 break;
1870 default:
1871 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1872 err = -EINVAL;
1873 }
1874
1875 if (err)
1876 goto err_free_in;
1877
1878 kfree(in);
1879
1880 return &mr->ibmr;
1881
1882 err_free_in:
1883 kfree(in);
1884 err_free:
1885 kfree(mr);
1886 return ERR_PTR(err);
1887 }
1888
1889 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1890 u32 max_num_sg)
1891 {
1892 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
1893 }
1894
1895 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
1896 u32 max_num_sg, u32 max_num_meta_sg)
1897 {
1898 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
1899 max_num_meta_sg);
1900 }
1901
1902 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
1903 {
1904 struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
1905 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1906 struct mlx5_ib_mw *mw = to_mmw(ibmw);
1907 unsigned int ndescs;
1908 u32 *in = NULL;
1909 void *mkc;
1910 int err;
1911 struct mlx5_ib_alloc_mw req = {};
1912 struct {
1913 __u32 comp_mask;
1914 __u32 response_length;
1915 } resp = {};
1916
1917 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1918 if (err)
1919 return err;
1920
1921 if (req.comp_mask || req.reserved1 || req.reserved2)
1922 return -EOPNOTSUPP;
1923
1924 if (udata->inlen > sizeof(req) &&
1925 !ib_is_udata_cleared(udata, sizeof(req),
1926 udata->inlen - sizeof(req)))
1927 return -EOPNOTSUPP;
1928
1929 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1930
1931 in = kzalloc(inlen, GFP_KERNEL);
1932 if (!in) {
1933 err = -ENOMEM;
1934 goto free;
1935 }
1936
1937 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1938
1939 MLX5_SET(mkc, mkc, free, 1);
1940 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1941 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
1942 MLX5_SET(mkc, mkc, umr_en, 1);
1943 MLX5_SET(mkc, mkc, lr, 1);
1944 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
1945 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
1946 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1947
1948 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
1949 if (err)
1950 goto free;
1951
1952 mw->mmkey.type = MLX5_MKEY_MW;
1953 ibmw->rkey = mw->mmkey.key;
1954 mw->mmkey.ndescs = ndescs;
1955
1956 resp.response_length =
1957 min(offsetofend(typeof(resp), response_length), udata->outlen);
1958 if (resp.response_length) {
1959 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1960 if (err)
1961 goto free_mkey;
1962 }
1963
1964 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
1965 err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
1966 if (err)
1967 goto free_mkey;
1968 }
1969
1970 kfree(in);
1971 return 0;
1972
1973 free_mkey:
1974 mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
1975 free:
1976 kfree(in);
1977 return err;
1978 }
1979
1980 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1981 {
1982 struct mlx5_ib_dev *dev = to_mdev(mw->device);
1983 struct mlx5_ib_mw *mmw = to_mmw(mw);
1984
1985 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
1986 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
1987
1988
1989
1990
1991 mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
1992
1993 return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
1994 }
1995
1996 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1997 struct ib_mr_status *mr_status)
1998 {
1999 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2000 int ret = 0;
2001
2002 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2003 pr_err("Invalid status check mask\n");
2004 ret = -EINVAL;
2005 goto done;
2006 }
2007
2008 mr_status->fail_status = 0;
2009 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2010 if (!mmr->sig) {
2011 ret = -EINVAL;
2012 pr_err("signature status check requested on a non-signature enabled MR\n");
2013 goto done;
2014 }
2015
2016 mmr->sig->sig_status_checked = true;
2017 if (!mmr->sig->sig_err_exists)
2018 goto done;
2019
2020 if (ibmr->lkey == mmr->sig->err_item.key)
2021 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2022 sizeof(mr_status->sig_err));
2023 else {
2024 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2025 mr_status->sig_err.sig_err_offset = 0;
2026 mr_status->sig_err.key = mmr->sig->err_item.key;
2027 }
2028
2029 mmr->sig->sig_err_exists = false;
2030 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2031 }
2032
2033 done:
2034 return ret;
2035 }
2036
2037 static int
2038 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2039 int data_sg_nents, unsigned int *data_sg_offset,
2040 struct scatterlist *meta_sg, int meta_sg_nents,
2041 unsigned int *meta_sg_offset)
2042 {
2043 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2044 unsigned int sg_offset = 0;
2045 int n = 0;
2046
2047 mr->meta_length = 0;
2048 if (data_sg_nents == 1) {
2049 n++;
2050 mr->mmkey.ndescs = 1;
2051 if (data_sg_offset)
2052 sg_offset = *data_sg_offset;
2053 mr->data_length = sg_dma_len(data_sg) - sg_offset;
2054 mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2055 if (meta_sg_nents == 1) {
2056 n++;
2057 mr->meta_ndescs = 1;
2058 if (meta_sg_offset)
2059 sg_offset = *meta_sg_offset;
2060 else
2061 sg_offset = 0;
2062 mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2063 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2064 }
2065 ibmr->length = mr->data_length + mr->meta_length;
2066 }
2067
2068 return n;
2069 }
2070
2071 static int
2072 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2073 struct scatterlist *sgl,
2074 unsigned short sg_nents,
2075 unsigned int *sg_offset_p,
2076 struct scatterlist *meta_sgl,
2077 unsigned short meta_sg_nents,
2078 unsigned int *meta_sg_offset_p)
2079 {
2080 struct scatterlist *sg = sgl;
2081 struct mlx5_klm *klms = mr->descs;
2082 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2083 u32 lkey = mr->ibmr.pd->local_dma_lkey;
2084 int i, j = 0;
2085
2086 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2087 mr->ibmr.length = 0;
2088
2089 for_each_sg(sgl, sg, sg_nents, i) {
2090 if (unlikely(i >= mr->max_descs))
2091 break;
2092 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2093 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2094 klms[i].key = cpu_to_be32(lkey);
2095 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2096
2097 sg_offset = 0;
2098 }
2099
2100 if (sg_offset_p)
2101 *sg_offset_p = sg_offset;
2102
2103 mr->mmkey.ndescs = i;
2104 mr->data_length = mr->ibmr.length;
2105
2106 if (meta_sg_nents) {
2107 sg = meta_sgl;
2108 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2109 for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2110 if (unlikely(i + j >= mr->max_descs))
2111 break;
2112 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2113 sg_offset);
2114 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2115 sg_offset);
2116 klms[i + j].key = cpu_to_be32(lkey);
2117 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2118
2119 sg_offset = 0;
2120 }
2121 if (meta_sg_offset_p)
2122 *meta_sg_offset_p = sg_offset;
2123
2124 mr->meta_ndescs = j;
2125 mr->meta_length = mr->ibmr.length - mr->data_length;
2126 }
2127
2128 return i + j;
2129 }
2130
2131 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2132 {
2133 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2134 __be64 *descs;
2135
2136 if (unlikely(mr->mmkey.ndescs == mr->max_descs))
2137 return -ENOMEM;
2138
2139 descs = mr->descs;
2140 descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2141
2142 return 0;
2143 }
2144
2145 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2146 {
2147 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2148 __be64 *descs;
2149
2150 if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
2151 return -ENOMEM;
2152
2153 descs = mr->descs;
2154 descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
2155 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2156
2157 return 0;
2158 }
2159
2160 static int
2161 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2162 int data_sg_nents, unsigned int *data_sg_offset,
2163 struct scatterlist *meta_sg, int meta_sg_nents,
2164 unsigned int *meta_sg_offset)
2165 {
2166 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2167 struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2168 int n;
2169
2170 pi_mr->mmkey.ndescs = 0;
2171 pi_mr->meta_ndescs = 0;
2172 pi_mr->meta_length = 0;
2173
2174 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2175 pi_mr->desc_size * pi_mr->max_descs,
2176 DMA_TO_DEVICE);
2177
2178 pi_mr->ibmr.page_size = ibmr->page_size;
2179 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2180 mlx5_set_page);
2181 if (n != data_sg_nents)
2182 return n;
2183
2184 pi_mr->data_iova = pi_mr->ibmr.iova;
2185 pi_mr->data_length = pi_mr->ibmr.length;
2186 pi_mr->ibmr.length = pi_mr->data_length;
2187 ibmr->length = pi_mr->data_length;
2188
2189 if (meta_sg_nents) {
2190 u64 page_mask = ~((u64)ibmr->page_size - 1);
2191 u64 iova = pi_mr->data_iova;
2192
2193 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2194 meta_sg_offset, mlx5_set_page_pi);
2195
2196 pi_mr->meta_length = pi_mr->ibmr.length;
2197
2198
2199
2200
2201
2202
2203 pi_mr->pi_iova = (iova & page_mask) +
2204 pi_mr->mmkey.ndescs * ibmr->page_size +
2205 (pi_mr->ibmr.iova & ~page_mask);
2206
2207
2208
2209
2210
2211
2212
2213 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2214 pi_mr->ibmr.iova = iova;
2215 ibmr->length += pi_mr->meta_length;
2216 }
2217
2218 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2219 pi_mr->desc_size * pi_mr->max_descs,
2220 DMA_TO_DEVICE);
2221
2222 return n;
2223 }
2224
2225 static int
2226 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2227 int data_sg_nents, unsigned int *data_sg_offset,
2228 struct scatterlist *meta_sg, int meta_sg_nents,
2229 unsigned int *meta_sg_offset)
2230 {
2231 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2232 struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2233 int n;
2234
2235 pi_mr->mmkey.ndescs = 0;
2236 pi_mr->meta_ndescs = 0;
2237 pi_mr->meta_length = 0;
2238
2239 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2240 pi_mr->desc_size * pi_mr->max_descs,
2241 DMA_TO_DEVICE);
2242
2243 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2244 meta_sg, meta_sg_nents, meta_sg_offset);
2245
2246 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2247 pi_mr->desc_size * pi_mr->max_descs,
2248 DMA_TO_DEVICE);
2249
2250
2251 pi_mr->data_iova = 0;
2252 pi_mr->ibmr.iova = 0;
2253 pi_mr->pi_iova = pi_mr->data_length;
2254 ibmr->length = pi_mr->ibmr.length;
2255
2256 return n;
2257 }
2258
2259 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2260 int data_sg_nents, unsigned int *data_sg_offset,
2261 struct scatterlist *meta_sg, int meta_sg_nents,
2262 unsigned int *meta_sg_offset)
2263 {
2264 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2265 struct mlx5_ib_mr *pi_mr = NULL;
2266 int n;
2267
2268 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2269
2270 mr->mmkey.ndescs = 0;
2271 mr->data_length = 0;
2272 mr->data_iova = 0;
2273 mr->meta_ndescs = 0;
2274 mr->pi_iova = 0;
2275
2276
2277
2278
2279
2280
2281 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2282 data_sg_offset, meta_sg, meta_sg_nents,
2283 meta_sg_offset);
2284 if (n == data_sg_nents + meta_sg_nents)
2285 goto out;
2286
2287
2288
2289
2290
2291
2292
2293
2294 pi_mr = mr->mtt_mr;
2295 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2296 data_sg_offset, meta_sg, meta_sg_nents,
2297 meta_sg_offset);
2298 if (n == data_sg_nents + meta_sg_nents)
2299 goto out;
2300
2301 pi_mr = mr->klm_mr;
2302 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2303 data_sg_offset, meta_sg, meta_sg_nents,
2304 meta_sg_offset);
2305 if (unlikely(n != data_sg_nents + meta_sg_nents))
2306 return -ENOMEM;
2307
2308 out:
2309
2310 ibmr->iova = 0;
2311 mr->pi_mr = pi_mr;
2312 if (pi_mr)
2313 ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2314 else
2315 ibmr->sig_attrs->meta_length = mr->meta_length;
2316
2317 return 0;
2318 }
2319
2320 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2321 unsigned int *sg_offset)
2322 {
2323 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2324 int n;
2325
2326 mr->mmkey.ndescs = 0;
2327
2328 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2329 mr->desc_size * mr->max_descs,
2330 DMA_TO_DEVICE);
2331
2332 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2333 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2334 NULL);
2335 else
2336 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2337 mlx5_set_page);
2338
2339 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2340 mr->desc_size * mr->max_descs,
2341 DMA_TO_DEVICE);
2342
2343 return n;
2344 }