0001
0002
0003
0004 #include <linux/vhost_types.h>
0005 #include <linux/vdpa.h>
0006 #include <linux/gcd.h>
0007 #include <linux/string.h>
0008 #include <linux/mlx5/qp.h>
0009 #include "mlx5_vdpa.h"
0010
0011
0012 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
0013 ({ \
0014 u64 __s = _s; \
0015 u64 _res; \
0016 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
0017 _res; \
0018 })
0019
0020 static int get_octo_len(u64 len, int page_shift)
0021 {
0022 u64 page_size = 1ULL << page_shift;
0023 int npages;
0024
0025 npages = ALIGN(len, page_size) >> page_shift;
0026 return (npages + 1) / 2;
0027 }
0028
0029 static void mlx5_set_access_mode(void *mkc, int mode)
0030 {
0031 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
0032 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
0033 }
0034
0035 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
0036 {
0037 struct scatterlist *sg;
0038 int nsg = mr->nsg;
0039 u64 dma_addr;
0040 u64 dma_len;
0041 int j = 0;
0042 int i;
0043
0044 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
0045 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
0046 nsg && dma_len;
0047 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
0048 mtt[j++] = cpu_to_be64(dma_addr);
0049 }
0050 }
0051
0052 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0053 {
0054 int inlen;
0055 void *mkc;
0056 void *in;
0057 int err;
0058
0059 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
0060 in = kvzalloc(inlen, GFP_KERNEL);
0061 if (!in)
0062 return -ENOMEM;
0063
0064 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
0065 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0066 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
0067 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
0068 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
0069 MLX5_SET(mkc, mkc, qpn, 0xffffff);
0070 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0071 MLX5_SET64(mkc, mkc, start_addr, mr->offset);
0072 MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
0073 MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
0074 MLX5_SET(mkc, mkc, translations_octword_size,
0075 get_octo_len(mr->end - mr->start, mr->log_size));
0076 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
0077 get_octo_len(mr->end - mr->start, mr->log_size));
0078 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
0079 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
0080 kvfree(in);
0081 if (err) {
0082 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
0083 return err;
0084 }
0085
0086 return 0;
0087 }
0088
0089 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0090 {
0091 mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
0092 }
0093
0094 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0095 {
0096 return max_t(u64, map->start, mr->start);
0097 }
0098
0099 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0100 {
0101 return min_t(u64, map->last + 1, mr->end);
0102 }
0103
0104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0105 {
0106 return map_end(map, mr) - map_start(map, mr);
0107 }
0108
0109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
0110 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
0111
0112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
0113 {
0114 struct mlx5_vdpa_direct_mr *s;
0115
0116 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0117 if (!s)
0118 return MLX5_VDPA_INVALID_START_ADDR;
0119
0120 return s->start;
0121 }
0122
0123 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
0124 {
0125 struct mlx5_vdpa_direct_mr *s;
0126 struct mlx5_vdpa_direct_mr *e;
0127
0128 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0129 if (!s)
0130 return MLX5_VDPA_INVALID_LEN;
0131
0132 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0133
0134 return e->end - s->start;
0135 }
0136
0137 #define LOG_MAX_KLM_SIZE 30
0138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
0139
0140 static u32 klm_bcount(u64 size)
0141 {
0142 return (u32)size;
0143 }
0144
0145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
0146 {
0147 struct mlx5_vdpa_direct_mr *dmr;
0148 struct mlx5_klm *klmarr;
0149 struct mlx5_klm *klm;
0150 bool first = true;
0151 u64 preve;
0152 int i;
0153
0154 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
0155 i = 0;
0156 list_for_each_entry(dmr, &mkey->head, list) {
0157 again:
0158 klm = &klmarr[i++];
0159 if (first) {
0160 preve = dmr->start;
0161 first = false;
0162 }
0163
0164 if (preve == dmr->start) {
0165 klm->key = cpu_to_be32(dmr->mr);
0166 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
0167 preve = dmr->end;
0168 } else {
0169 klm->key = cpu_to_be32(mvdev->res.null_mkey);
0170 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
0171 preve = dmr->start;
0172 goto again;
0173 }
0174 }
0175 }
0176
0177 static int klm_byte_size(int nklms)
0178 {
0179 return 16 * ALIGN(nklms, 4);
0180 }
0181
0182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0183 {
0184 int inlen;
0185 void *mkc;
0186 void *in;
0187 int err;
0188 u64 start;
0189 u64 len;
0190
0191 start = indir_start_addr(mr);
0192 len = indir_len(mr);
0193 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
0194 return -EINVAL;
0195
0196 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
0197 in = kzalloc(inlen, GFP_KERNEL);
0198 if (!in)
0199 return -ENOMEM;
0200
0201 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
0202 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0203 MLX5_SET(mkc, mkc, lw, 1);
0204 MLX5_SET(mkc, mkc, lr, 1);
0205 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
0206 MLX5_SET(mkc, mkc, qpn, 0xffffff);
0207 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0208 MLX5_SET64(mkc, mkc, start_addr, start);
0209 MLX5_SET64(mkc, mkc, len, len);
0210 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
0211 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
0212 fill_indir(mvdev, mr, in);
0213 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
0214 kfree(in);
0215 return err;
0216 }
0217
0218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
0219 {
0220 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
0221 }
0222
0223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
0224 struct vhost_iotlb *iotlb)
0225 {
0226 struct vhost_iotlb_map *map;
0227 unsigned long lgcd = 0;
0228 int log_entity_size;
0229 unsigned long size;
0230 u64 start = 0;
0231 int err;
0232 struct page *pg;
0233 unsigned int nsg;
0234 int sglen;
0235 u64 pa;
0236 u64 paend;
0237 struct scatterlist *sg;
0238 struct device *dma = mvdev->vdev.dma_dev;
0239
0240 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
0241 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
0242 size = maplen(map, mr);
0243 lgcd = gcd(lgcd, size);
0244 start += size;
0245 }
0246 log_entity_size = ilog2(lgcd);
0247
0248 sglen = 1 << log_entity_size;
0249 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
0250
0251 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
0252 if (err)
0253 return err;
0254
0255 sg = mr->sg_head.sgl;
0256 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
0257 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
0258 paend = map->addr + maplen(map, mr);
0259 for (pa = map->addr; pa < paend; pa += sglen) {
0260 pg = pfn_to_page(__phys_to_pfn(pa));
0261 if (!sg) {
0262 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
0263 map->start, map->last + 1);
0264 err = -ENOMEM;
0265 goto err_map;
0266 }
0267 sg_set_page(sg, pg, sglen, 0);
0268 sg = sg_next(sg);
0269 if (!sg)
0270 goto done;
0271 }
0272 }
0273 done:
0274 mr->log_size = log_entity_size;
0275 mr->nsg = nsg;
0276 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0277 if (!mr->nent) {
0278 err = -ENOMEM;
0279 goto err_map;
0280 }
0281
0282 err = create_direct_mr(mvdev, mr);
0283 if (err)
0284 goto err_direct;
0285
0286 return 0;
0287
0288 err_direct:
0289 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0290 err_map:
0291 sg_free_table(&mr->sg_head);
0292 return err;
0293 }
0294
0295 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0296 {
0297 struct device *dma = mvdev->vdev.dma_dev;
0298
0299 destroy_direct_mr(mvdev, mr);
0300 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0301 sg_free_table(&mr->sg_head);
0302 }
0303
0304 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
0305 struct vhost_iotlb *iotlb)
0306 {
0307 struct mlx5_vdpa_mr *mr = &mvdev->mr;
0308 struct mlx5_vdpa_direct_mr *dmr;
0309 struct mlx5_vdpa_direct_mr *n;
0310 LIST_HEAD(tmp);
0311 u64 st;
0312 u64 sz;
0313 int err;
0314 int i = 0;
0315
0316 st = start;
0317 while (size) {
0318 sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
0319 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
0320 if (!dmr) {
0321 err = -ENOMEM;
0322 goto err_alloc;
0323 }
0324
0325 dmr->start = st;
0326 dmr->end = st + sz;
0327 dmr->perm = perm;
0328 err = map_direct_mr(mvdev, dmr, iotlb);
0329 if (err) {
0330 kfree(dmr);
0331 goto err_alloc;
0332 }
0333
0334 list_add_tail(&dmr->list, &tmp);
0335 size -= sz;
0336 mr->num_directs++;
0337 mr->num_klms++;
0338 st += sz;
0339 i++;
0340 }
0341 list_splice_tail(&tmp, &mr->head);
0342 return 0;
0343
0344 err_alloc:
0345 list_for_each_entry_safe(dmr, n, &mr->head, list) {
0346 list_del_init(&dmr->list);
0347 unmap_direct_mr(mvdev, dmr);
0348 kfree(dmr);
0349 }
0350 return err;
0351 }
0352
0353
0354
0355
0356
0357
0358
0359 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0360 {
0361 struct mlx5_vdpa_mr *mr = &mvdev->mr;
0362 struct mlx5_vdpa_direct_mr *dmr;
0363 struct mlx5_vdpa_direct_mr *n;
0364 struct vhost_iotlb_map *map;
0365 u32 pperm = U16_MAX;
0366 u64 last = U64_MAX;
0367 u64 ps = U64_MAX;
0368 u64 pe = U64_MAX;
0369 u64 start = 0;
0370 int err = 0;
0371 int nnuls;
0372
0373 INIT_LIST_HEAD(&mr->head);
0374 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
0375 map = vhost_iotlb_itree_next(map, start, last)) {
0376 start = map->start;
0377 if (pe == map->start && pperm == map->perm) {
0378 pe = map->last + 1;
0379 } else {
0380 if (ps != U64_MAX) {
0381 if (pe < map->start) {
0382
0383
0384
0385 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
0386 LOG_MAX_KLM_SIZE);
0387 mr->num_klms += nnuls;
0388 }
0389 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
0390 if (err)
0391 goto err_chain;
0392 }
0393 ps = map->start;
0394 pe = map->last + 1;
0395 pperm = map->perm;
0396 }
0397 }
0398 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
0399 if (err)
0400 goto err_chain;
0401
0402
0403
0404
0405
0406 err = create_indirect_key(mvdev, mr);
0407 if (err)
0408 goto err_chain;
0409
0410 mr->user_mr = true;
0411 return 0;
0412
0413 err_chain:
0414 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
0415 list_del_init(&dmr->list);
0416 unmap_direct_mr(mvdev, dmr);
0417 kfree(dmr);
0418 }
0419 return err;
0420 }
0421
0422 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0423 {
0424 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0425 void *mkc;
0426 u32 *in;
0427 int err;
0428
0429 in = kzalloc(inlen, GFP_KERNEL);
0430 if (!in)
0431 return -ENOMEM;
0432
0433 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0434
0435 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
0436 MLX5_SET(mkc, mkc, length64, 1);
0437 MLX5_SET(mkc, mkc, lw, 1);
0438 MLX5_SET(mkc, mkc, lr, 1);
0439 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0440 MLX5_SET(mkc, mkc, qpn, 0xffffff);
0441
0442 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
0443 if (!err)
0444 mr->user_mr = false;
0445
0446 kfree(in);
0447 return err;
0448 }
0449
0450 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0451 {
0452 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
0453 }
0454
0455 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
0456 {
0457 struct vhost_iotlb_map *map;
0458 u64 start = 0, last = ULLONG_MAX;
0459 int err;
0460
0461 if (!src) {
0462 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
0463 return err;
0464 }
0465
0466 for (map = vhost_iotlb_itree_first(src, start, last); map;
0467 map = vhost_iotlb_itree_next(map, start, last)) {
0468 err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
0469 map->addr, map->perm);
0470 if (err)
0471 return err;
0472 }
0473 return 0;
0474 }
0475
0476 static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
0477 {
0478 vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
0479 }
0480
0481 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0482 {
0483 struct mlx5_vdpa_direct_mr *dmr;
0484 struct mlx5_vdpa_direct_mr *n;
0485
0486 destroy_indirect_key(mvdev, mr);
0487 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
0488 list_del_init(&dmr->list);
0489 unmap_direct_mr(mvdev, dmr);
0490 kfree(dmr);
0491 }
0492 }
0493
0494 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
0495 {
0496 struct mlx5_vdpa_mr *mr = &mvdev->mr;
0497
0498 mutex_lock(&mr->mkey_mtx);
0499 if (!mr->initialized)
0500 goto out;
0501
0502 prune_iotlb(mvdev);
0503 if (mr->user_mr)
0504 destroy_user_mr(mvdev, mr);
0505 else
0506 destroy_dma_mr(mvdev, mr);
0507
0508 memset(mr, 0, sizeof(*mr));
0509 mr->initialized = false;
0510 out:
0511 mutex_unlock(&mr->mkey_mtx);
0512 }
0513
0514 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0515 {
0516 struct mlx5_vdpa_mr *mr = &mvdev->mr;
0517 int err;
0518
0519 if (mr->initialized)
0520 return 0;
0521
0522 if (iotlb)
0523 err = create_user_mr(mvdev, iotlb);
0524 else
0525 err = create_dma_mr(mvdev, mr);
0526
0527 if (err)
0528 return err;
0529
0530 err = dup_iotlb(mvdev, iotlb);
0531 if (err)
0532 goto out_err;
0533
0534 mr->initialized = true;
0535 return 0;
0536
0537 out_err:
0538 if (iotlb)
0539 destroy_user_mr(mvdev, mr);
0540 else
0541 destroy_dma_mr(mvdev, mr);
0542
0543 return err;
0544 }
0545
0546 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0547 {
0548 int err;
0549
0550 mutex_lock(&mvdev->mr.mkey_mtx);
0551 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
0552 mutex_unlock(&mvdev->mr.mkey_mtx);
0553 return err;
0554 }
0555
0556 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
0557 bool *change_map)
0558 {
0559 struct mlx5_vdpa_mr *mr = &mvdev->mr;
0560 int err = 0;
0561
0562 *change_map = false;
0563 mutex_lock(&mr->mkey_mtx);
0564 if (mr->initialized) {
0565 mlx5_vdpa_info(mvdev, "memory map update\n");
0566 *change_map = true;
0567 }
0568 if (!*change_map)
0569 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
0570 mutex_unlock(&mr->mkey_mtx);
0571
0572 return err;
0573 }