Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
0003 
0004 #include <linux/vhost_types.h>
0005 #include <linux/vdpa.h>
0006 #include <linux/gcd.h>
0007 #include <linux/string.h>
0008 #include <linux/mlx5/qp.h>
0009 #include "mlx5_vdpa.h"
0010 
0011 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
0012 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
0013 ({ \
0014     u64 __s = _s; \
0015     u64 _res; \
0016     _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
0017     _res; \
0018 })
0019 
0020 static int get_octo_len(u64 len, int page_shift)
0021 {
0022     u64 page_size = 1ULL << page_shift;
0023     int npages;
0024 
0025     npages = ALIGN(len, page_size) >> page_shift;
0026     return (npages + 1) / 2;
0027 }
0028 
0029 static void mlx5_set_access_mode(void *mkc, int mode)
0030 {
0031     MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
0032     MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
0033 }
0034 
0035 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
0036 {
0037     struct scatterlist *sg;
0038     int nsg = mr->nsg;
0039     u64 dma_addr;
0040     u64 dma_len;
0041     int j = 0;
0042     int i;
0043 
0044     for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
0045         for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
0046              nsg && dma_len;
0047              nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
0048             mtt[j++] = cpu_to_be64(dma_addr);
0049     }
0050 }
0051 
0052 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0053 {
0054     int inlen;
0055     void *mkc;
0056     void *in;
0057     int err;
0058 
0059     inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
0060     in = kvzalloc(inlen, GFP_KERNEL);
0061     if (!in)
0062         return -ENOMEM;
0063 
0064     MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
0065     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0066     MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
0067     MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
0068     mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
0069     MLX5_SET(mkc, mkc, qpn, 0xffffff);
0070     MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0071     MLX5_SET64(mkc, mkc, start_addr, mr->offset);
0072     MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
0073     MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
0074     MLX5_SET(mkc, mkc, translations_octword_size,
0075          get_octo_len(mr->end - mr->start, mr->log_size));
0076     MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
0077          get_octo_len(mr->end - mr->start, mr->log_size));
0078     populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
0079     err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
0080     kvfree(in);
0081     if (err) {
0082         mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
0083         return err;
0084     }
0085 
0086     return 0;
0087 }
0088 
0089 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0090 {
0091     mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
0092 }
0093 
0094 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0095 {
0096     return max_t(u64, map->start, mr->start);
0097 }
0098 
0099 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0100 {
0101     return min_t(u64, map->last + 1, mr->end);
0102 }
0103 
0104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
0105 {
0106     return map_end(map, mr) - map_start(map, mr);
0107 }
0108 
0109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
0110 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
0111 
0112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
0113 {
0114     struct mlx5_vdpa_direct_mr *s;
0115 
0116     s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0117     if (!s)
0118         return MLX5_VDPA_INVALID_START_ADDR;
0119 
0120     return s->start;
0121 }
0122 
0123 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
0124 {
0125     struct mlx5_vdpa_direct_mr *s;
0126     struct mlx5_vdpa_direct_mr *e;
0127 
0128     s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0129     if (!s)
0130         return MLX5_VDPA_INVALID_LEN;
0131 
0132     e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
0133 
0134     return e->end - s->start;
0135 }
0136 
0137 #define LOG_MAX_KLM_SIZE 30
0138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
0139 
0140 static u32 klm_bcount(u64 size)
0141 {
0142     return (u32)size;
0143 }
0144 
0145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
0146 {
0147     struct mlx5_vdpa_direct_mr *dmr;
0148     struct mlx5_klm *klmarr;
0149     struct mlx5_klm *klm;
0150     bool first = true;
0151     u64 preve;
0152     int i;
0153 
0154     klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
0155     i = 0;
0156     list_for_each_entry(dmr, &mkey->head, list) {
0157 again:
0158         klm = &klmarr[i++];
0159         if (first) {
0160             preve = dmr->start;
0161             first = false;
0162         }
0163 
0164         if (preve == dmr->start) {
0165             klm->key = cpu_to_be32(dmr->mr);
0166             klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
0167             preve = dmr->end;
0168         } else {
0169             klm->key = cpu_to_be32(mvdev->res.null_mkey);
0170             klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
0171             preve = dmr->start;
0172             goto again;
0173         }
0174     }
0175 }
0176 
0177 static int klm_byte_size(int nklms)
0178 {
0179     return 16 * ALIGN(nklms, 4);
0180 }
0181 
0182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0183 {
0184     int inlen;
0185     void *mkc;
0186     void *in;
0187     int err;
0188     u64 start;
0189     u64 len;
0190 
0191     start = indir_start_addr(mr);
0192     len = indir_len(mr);
0193     if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
0194         return -EINVAL;
0195 
0196     inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
0197     in = kzalloc(inlen, GFP_KERNEL);
0198     if (!in)
0199         return -ENOMEM;
0200 
0201     MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
0202     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0203     MLX5_SET(mkc, mkc, lw, 1);
0204     MLX5_SET(mkc, mkc, lr, 1);
0205     mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
0206     MLX5_SET(mkc, mkc, qpn, 0xffffff);
0207     MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0208     MLX5_SET64(mkc, mkc, start_addr, start);
0209     MLX5_SET64(mkc, mkc, len, len);
0210     MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
0211     MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
0212     fill_indir(mvdev, mr, in);
0213     err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
0214     kfree(in);
0215     return err;
0216 }
0217 
0218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
0219 {
0220     mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
0221 }
0222 
0223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
0224              struct vhost_iotlb *iotlb)
0225 {
0226     struct vhost_iotlb_map *map;
0227     unsigned long lgcd = 0;
0228     int log_entity_size;
0229     unsigned long size;
0230     u64 start = 0;
0231     int err;
0232     struct page *pg;
0233     unsigned int nsg;
0234     int sglen;
0235     u64 pa;
0236     u64 paend;
0237     struct scatterlist *sg;
0238     struct device *dma = mvdev->vdev.dma_dev;
0239 
0240     for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
0241          map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
0242         size = maplen(map, mr);
0243         lgcd = gcd(lgcd, size);
0244         start += size;
0245     }
0246     log_entity_size = ilog2(lgcd);
0247 
0248     sglen = 1 << log_entity_size;
0249     nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
0250 
0251     err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
0252     if (err)
0253         return err;
0254 
0255     sg = mr->sg_head.sgl;
0256     for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
0257          map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
0258         paend = map->addr + maplen(map, mr);
0259         for (pa = map->addr; pa < paend; pa += sglen) {
0260             pg = pfn_to_page(__phys_to_pfn(pa));
0261             if (!sg) {
0262                 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
0263                            map->start, map->last + 1);
0264                 err = -ENOMEM;
0265                 goto err_map;
0266             }
0267             sg_set_page(sg, pg, sglen, 0);
0268             sg = sg_next(sg);
0269             if (!sg)
0270                 goto done;
0271         }
0272     }
0273 done:
0274     mr->log_size = log_entity_size;
0275     mr->nsg = nsg;
0276     mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0277     if (!mr->nent) {
0278         err = -ENOMEM;
0279         goto err_map;
0280     }
0281 
0282     err = create_direct_mr(mvdev, mr);
0283     if (err)
0284         goto err_direct;
0285 
0286     return 0;
0287 
0288 err_direct:
0289     dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0290 err_map:
0291     sg_free_table(&mr->sg_head);
0292     return err;
0293 }
0294 
0295 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
0296 {
0297     struct device *dma = mvdev->vdev.dma_dev;
0298 
0299     destroy_direct_mr(mvdev, mr);
0300     dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
0301     sg_free_table(&mr->sg_head);
0302 }
0303 
0304 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
0305                 struct vhost_iotlb *iotlb)
0306 {
0307     struct mlx5_vdpa_mr *mr = &mvdev->mr;
0308     struct mlx5_vdpa_direct_mr *dmr;
0309     struct mlx5_vdpa_direct_mr *n;
0310     LIST_HEAD(tmp);
0311     u64 st;
0312     u64 sz;
0313     int err;
0314     int i = 0;
0315 
0316     st = start;
0317     while (size) {
0318         sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
0319         dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
0320         if (!dmr) {
0321             err = -ENOMEM;
0322             goto err_alloc;
0323         }
0324 
0325         dmr->start = st;
0326         dmr->end = st + sz;
0327         dmr->perm = perm;
0328         err = map_direct_mr(mvdev, dmr, iotlb);
0329         if (err) {
0330             kfree(dmr);
0331             goto err_alloc;
0332         }
0333 
0334         list_add_tail(&dmr->list, &tmp);
0335         size -= sz;
0336         mr->num_directs++;
0337         mr->num_klms++;
0338         st += sz;
0339         i++;
0340     }
0341     list_splice_tail(&tmp, &mr->head);
0342     return 0;
0343 
0344 err_alloc:
0345     list_for_each_entry_safe(dmr, n, &mr->head, list) {
0346         list_del_init(&dmr->list);
0347         unmap_direct_mr(mvdev, dmr);
0348         kfree(dmr);
0349     }
0350     return err;
0351 }
0352 
0353 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
0354  * merging mergeable maps, and create direct memory keys that provide the
0355  * device access to memory. The direct mkeys are then referred to by the
0356  * indirect memory key that provides access to the enitre address space given
0357  * by iotlb.
0358  */
0359 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0360 {
0361     struct mlx5_vdpa_mr *mr = &mvdev->mr;
0362     struct mlx5_vdpa_direct_mr *dmr;
0363     struct mlx5_vdpa_direct_mr *n;
0364     struct vhost_iotlb_map *map;
0365     u32 pperm = U16_MAX;
0366     u64 last = U64_MAX;
0367     u64 ps = U64_MAX;
0368     u64 pe = U64_MAX;
0369     u64 start = 0;
0370     int err = 0;
0371     int nnuls;
0372 
0373     INIT_LIST_HEAD(&mr->head);
0374     for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
0375          map = vhost_iotlb_itree_next(map, start, last)) {
0376         start = map->start;
0377         if (pe == map->start && pperm == map->perm) {
0378             pe = map->last + 1;
0379         } else {
0380             if (ps != U64_MAX) {
0381                 if (pe < map->start) {
0382                     /* We have a hole in the map. Check how
0383                      * many null keys are required to fill it.
0384                      */
0385                     nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
0386                                        LOG_MAX_KLM_SIZE);
0387                     mr->num_klms += nnuls;
0388                 }
0389                 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
0390                 if (err)
0391                     goto err_chain;
0392             }
0393             ps = map->start;
0394             pe = map->last + 1;
0395             pperm = map->perm;
0396         }
0397     }
0398     err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
0399     if (err)
0400         goto err_chain;
0401 
0402     /* Create the memory key that defines the guests's address space. This
0403      * memory key refers to the direct keys that contain the MTT
0404      * translations
0405      */
0406     err = create_indirect_key(mvdev, mr);
0407     if (err)
0408         goto err_chain;
0409 
0410     mr->user_mr = true;
0411     return 0;
0412 
0413 err_chain:
0414     list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
0415         list_del_init(&dmr->list);
0416         unmap_direct_mr(mvdev, dmr);
0417         kfree(dmr);
0418     }
0419     return err;
0420 }
0421 
0422 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0423 {
0424     int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
0425     void *mkc;
0426     u32 *in;
0427     int err;
0428 
0429     in = kzalloc(inlen, GFP_KERNEL);
0430     if (!in)
0431         return -ENOMEM;
0432 
0433     mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
0434 
0435     MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
0436     MLX5_SET(mkc, mkc, length64, 1);
0437     MLX5_SET(mkc, mkc, lw, 1);
0438     MLX5_SET(mkc, mkc, lr, 1);
0439     MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
0440     MLX5_SET(mkc, mkc, qpn, 0xffffff);
0441 
0442     err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
0443     if (!err)
0444         mr->user_mr = false;
0445 
0446     kfree(in);
0447     return err;
0448 }
0449 
0450 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0451 {
0452     mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
0453 }
0454 
0455 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
0456 {
0457     struct vhost_iotlb_map *map;
0458     u64 start = 0, last = ULLONG_MAX;
0459     int err;
0460 
0461     if (!src) {
0462         err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
0463         return err;
0464     }
0465 
0466     for (map = vhost_iotlb_itree_first(src, start, last); map;
0467         map = vhost_iotlb_itree_next(map, start, last)) {
0468         err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
0469                         map->addr, map->perm);
0470         if (err)
0471             return err;
0472     }
0473     return 0;
0474 }
0475 
0476 static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
0477 {
0478     vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
0479 }
0480 
0481 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
0482 {
0483     struct mlx5_vdpa_direct_mr *dmr;
0484     struct mlx5_vdpa_direct_mr *n;
0485 
0486     destroy_indirect_key(mvdev, mr);
0487     list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
0488         list_del_init(&dmr->list);
0489         unmap_direct_mr(mvdev, dmr);
0490         kfree(dmr);
0491     }
0492 }
0493 
0494 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
0495 {
0496     struct mlx5_vdpa_mr *mr = &mvdev->mr;
0497 
0498     mutex_lock(&mr->mkey_mtx);
0499     if (!mr->initialized)
0500         goto out;
0501 
0502     prune_iotlb(mvdev);
0503     if (mr->user_mr)
0504         destroy_user_mr(mvdev, mr);
0505     else
0506         destroy_dma_mr(mvdev, mr);
0507 
0508     memset(mr, 0, sizeof(*mr));
0509     mr->initialized = false;
0510 out:
0511     mutex_unlock(&mr->mkey_mtx);
0512 }
0513 
0514 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0515 {
0516     struct mlx5_vdpa_mr *mr = &mvdev->mr;
0517     int err;
0518 
0519     if (mr->initialized)
0520         return 0;
0521 
0522     if (iotlb)
0523         err = create_user_mr(mvdev, iotlb);
0524     else
0525         err = create_dma_mr(mvdev, mr);
0526 
0527     if (err)
0528         return err;
0529 
0530     err = dup_iotlb(mvdev, iotlb);
0531     if (err)
0532         goto out_err;
0533 
0534     mr->initialized = true;
0535     return 0;
0536 
0537 out_err:
0538     if (iotlb)
0539         destroy_user_mr(mvdev, mr);
0540     else
0541         destroy_dma_mr(mvdev, mr);
0542 
0543     return err;
0544 }
0545 
0546 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
0547 {
0548     int err;
0549 
0550     mutex_lock(&mvdev->mr.mkey_mtx);
0551     err = _mlx5_vdpa_create_mr(mvdev, iotlb);
0552     mutex_unlock(&mvdev->mr.mkey_mtx);
0553     return err;
0554 }
0555 
0556 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
0557                  bool *change_map)
0558 {
0559     struct mlx5_vdpa_mr *mr = &mvdev->mr;
0560     int err = 0;
0561 
0562     *change_map = false;
0563     mutex_lock(&mr->mkey_mtx);
0564     if (mr->initialized) {
0565         mlx5_vdpa_info(mvdev, "memory map update\n");
0566         *change_map = true;
0567     }
0568     if (!*change_map)
0569         err = _mlx5_vdpa_create_mr(mvdev, iotlb);
0570     mutex_unlock(&mr->mkey_mtx);
0571 
0572     return err;
0573 }