Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
0003 
0004 #include <linux/module.h>
0005 #include <linux/vdpa.h>
0006 #include <linux/vringh.h>
0007 #include <uapi/linux/virtio_net.h>
0008 #include <uapi/linux/virtio_ids.h>
0009 #include <uapi/linux/vdpa.h>
0010 #include <linux/virtio_config.h>
0011 #include <linux/auxiliary_bus.h>
0012 #include <linux/mlx5/cq.h>
0013 #include <linux/mlx5/qp.h>
0014 #include <linux/mlx5/device.h>
0015 #include <linux/mlx5/driver.h>
0016 #include <linux/mlx5/vport.h>
0017 #include <linux/mlx5/fs.h>
0018 #include <linux/mlx5/mlx5_ifc_vdpa.h>
0019 #include <linux/mlx5/mpfs.h>
0020 #include "mlx5_vdpa.h"
0021 
0022 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
0023 MODULE_DESCRIPTION("Mellanox VDPA driver");
0024 MODULE_LICENSE("Dual BSD/GPL");
0025 
0026 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
0027     container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
0028 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
0029 
0030 #define VALID_FEATURES_MASK                                                                        \
0031     (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
0032      BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
0033      BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
0034      BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
0035      BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
0036      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
0037      BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
0038      BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
0039      BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
0040      BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
0041      BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
0042      BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
0043      BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
0044 
0045 #define VALID_STATUS_MASK                                                                          \
0046     (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
0047      VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
0048 
0049 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
0050 
0051 #define MLX5V_UNTAGGED 0x1000
0052 
0053 struct mlx5_vdpa_net_resources {
0054     u32 tisn;
0055     u32 tdn;
0056     u32 tirn;
0057     u32 rqtn;
0058     bool valid;
0059 };
0060 
0061 struct mlx5_vdpa_cq_buf {
0062     struct mlx5_frag_buf_ctrl fbc;
0063     struct mlx5_frag_buf frag_buf;
0064     int cqe_size;
0065     int nent;
0066 };
0067 
0068 struct mlx5_vdpa_cq {
0069     struct mlx5_core_cq mcq;
0070     struct mlx5_vdpa_cq_buf buf;
0071     struct mlx5_db db;
0072     int cqe;
0073 };
0074 
0075 struct mlx5_vdpa_umem {
0076     struct mlx5_frag_buf_ctrl fbc;
0077     struct mlx5_frag_buf frag_buf;
0078     int size;
0079     u32 id;
0080 };
0081 
0082 struct mlx5_vdpa_qp {
0083     struct mlx5_core_qp mqp;
0084     struct mlx5_frag_buf frag_buf;
0085     struct mlx5_db db;
0086     u16 head;
0087     bool fw;
0088 };
0089 
0090 struct mlx5_vq_restore_info {
0091     u32 num_ent;
0092     u64 desc_addr;
0093     u64 device_addr;
0094     u64 driver_addr;
0095     u16 avail_index;
0096     u16 used_index;
0097     bool ready;
0098     bool restore;
0099 };
0100 
0101 struct mlx5_vdpa_virtqueue {
0102     bool ready;
0103     u64 desc_addr;
0104     u64 device_addr;
0105     u64 driver_addr;
0106     u32 num_ent;
0107 
0108     /* Resources for implementing the notification channel from the device
0109      * to the driver. fwqp is the firmware end of an RC connection; the
0110      * other end is vqqp used by the driver. cq is where completions are
0111      * reported.
0112      */
0113     struct mlx5_vdpa_cq cq;
0114     struct mlx5_vdpa_qp fwqp;
0115     struct mlx5_vdpa_qp vqqp;
0116 
0117     /* umem resources are required for the virtqueue operation. They're use
0118      * is internal and they must be provided by the driver.
0119      */
0120     struct mlx5_vdpa_umem umem1;
0121     struct mlx5_vdpa_umem umem2;
0122     struct mlx5_vdpa_umem umem3;
0123 
0124     u32 counter_set_id;
0125     bool initialized;
0126     int index;
0127     u32 virtq_id;
0128     struct mlx5_vdpa_net *ndev;
0129     u16 avail_idx;
0130     u16 used_idx;
0131     int fw_state;
0132 
0133     /* keep last in the struct */
0134     struct mlx5_vq_restore_info ri;
0135 };
0136 
0137 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
0138 {
0139     if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
0140         if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
0141             return idx < 2;
0142         else
0143             return idx < 3;
0144     }
0145 
0146     return idx <= mvdev->max_idx;
0147 }
0148 
0149 #define MLX5V_MACVLAN_SIZE 256
0150 
0151 struct mlx5_vdpa_net {
0152     struct mlx5_vdpa_dev mvdev;
0153     struct mlx5_vdpa_net_resources res;
0154     struct virtio_net_config config;
0155     struct mlx5_vdpa_virtqueue *vqs;
0156     struct vdpa_callback *event_cbs;
0157 
0158     /* Serialize vq resources creation and destruction. This is required
0159      * since memory map might change and we need to destroy and create
0160      * resources while driver in operational.
0161      */
0162     struct rw_semaphore reslock;
0163     struct mlx5_flow_table *rxft;
0164     bool setup;
0165     u32 cur_num_vqs;
0166     u32 rqt_size;
0167     bool nb_registered;
0168     struct notifier_block nb;
0169     struct vdpa_callback config_cb;
0170     struct mlx5_vdpa_wq_ent cvq_ent;
0171     struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
0172 };
0173 
0174 struct macvlan_node {
0175     struct hlist_node hlist;
0176     struct mlx5_flow_handle *ucast_rule;
0177     struct mlx5_flow_handle *mcast_rule;
0178     u64 macvlan;
0179 };
0180 
0181 static void free_resources(struct mlx5_vdpa_net *ndev);
0182 static void init_mvqs(struct mlx5_vdpa_net *ndev);
0183 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
0184 static void teardown_driver(struct mlx5_vdpa_net *ndev);
0185 
0186 static bool mlx5_vdpa_debug;
0187 
0188 #define MLX5_CVQ_MAX_ENT 16
0189 
0190 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
0191     do {                                                                                       \
0192         if (features & BIT_ULL(_feature))                                                  \
0193             mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
0194     } while (0)
0195 
0196 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
0197     do {                                                                                       \
0198         if (status & (_status))                                                            \
0199             mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
0200     } while (0)
0201 
0202 /* TODO: cross-endian support */
0203 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
0204 {
0205     return virtio_legacy_is_little_endian() ||
0206         (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
0207 }
0208 
0209 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
0210 {
0211     return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
0212 }
0213 
0214 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
0215 {
0216     return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
0217 }
0218 
0219 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
0220 {
0221     if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
0222         return 2;
0223 
0224     return mvdev->max_vqs;
0225 }
0226 
0227 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
0228 {
0229     return idx == ctrl_vq_idx(mvdev);
0230 }
0231 
0232 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
0233 {
0234     if (status & ~VALID_STATUS_MASK)
0235         mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
0236                    status & ~VALID_STATUS_MASK);
0237 
0238     if (!mlx5_vdpa_debug)
0239         return;
0240 
0241     mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
0242     if (set && !status) {
0243         mlx5_vdpa_info(mvdev, "driver resets the device\n");
0244         return;
0245     }
0246 
0247     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
0248     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
0249     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
0250     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
0251     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
0252     MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
0253 }
0254 
0255 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
0256 {
0257     if (features & ~VALID_FEATURES_MASK)
0258         mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
0259                    features & ~VALID_FEATURES_MASK);
0260 
0261     if (!mlx5_vdpa_debug)
0262         return;
0263 
0264     mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
0265     if (!features)
0266         mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
0267 
0268     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
0269     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
0270     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
0271     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
0272     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
0273     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
0274     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
0275     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
0276     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
0277     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
0278     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
0279     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
0280     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
0281     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
0282     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
0283     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
0284     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
0285     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
0286     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
0287     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
0288     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
0289     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
0290     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
0291     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
0292     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
0293     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
0294     MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
0295     MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
0296     MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
0297     MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
0298     MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
0299     MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
0300     MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
0301     MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
0302 }
0303 
0304 static int create_tis(struct mlx5_vdpa_net *ndev)
0305 {
0306     struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
0307     u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
0308     void *tisc;
0309     int err;
0310 
0311     tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
0312     MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
0313     err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
0314     if (err)
0315         mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
0316 
0317     return err;
0318 }
0319 
0320 static void destroy_tis(struct mlx5_vdpa_net *ndev)
0321 {
0322     mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
0323 }
0324 
0325 #define MLX5_VDPA_CQE_SIZE 64
0326 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
0327 
0328 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
0329 {
0330     struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
0331     u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
0332     u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
0333     int err;
0334 
0335     err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
0336                        ndev->mvdev.mdev->priv.numa_node);
0337     if (err)
0338         return err;
0339 
0340     mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
0341 
0342     buf->cqe_size = MLX5_VDPA_CQE_SIZE;
0343     buf->nent = nent;
0344 
0345     return 0;
0346 }
0347 
0348 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
0349 {
0350     struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
0351 
0352     return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
0353                     ndev->mvdev.mdev->priv.numa_node);
0354 }
0355 
0356 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
0357 {
0358     mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
0359 }
0360 
0361 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
0362 {
0363     return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
0364 }
0365 
0366 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
0367 {
0368     struct mlx5_cqe64 *cqe64;
0369     void *cqe;
0370     int i;
0371 
0372     for (i = 0; i < buf->nent; i++) {
0373         cqe = get_cqe(vcq, i);
0374         cqe64 = cqe;
0375         cqe64->op_own = MLX5_CQE_INVALID << 4;
0376     }
0377 }
0378 
0379 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
0380 {
0381     struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
0382 
0383     if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
0384         !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
0385         return cqe64;
0386 
0387     return NULL;
0388 }
0389 
0390 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
0391 {
0392     vqp->head += n;
0393     vqp->db.db[0] = cpu_to_be32(vqp->head);
0394 }
0395 
0396 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
0397                struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
0398 {
0399     struct mlx5_vdpa_qp *vqp;
0400     __be64 *pas;
0401     void *qpc;
0402 
0403     vqp = fw ? &mvq->fwqp : &mvq->vqqp;
0404     MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
0405     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
0406     if (vqp->fw) {
0407         /* Firmware QP is allocated by the driver for the firmware's
0408          * use so we can skip part of the params as they will be chosen by firmware
0409          */
0410         qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
0411         MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
0412         MLX5_SET(qpc, qpc, no_sq, 1);
0413         return;
0414     }
0415 
0416     MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
0417     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
0418     MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
0419     MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
0420     MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
0421     MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
0422     MLX5_SET(qpc, qpc, no_sq, 1);
0423     MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
0424     MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
0425     MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
0426     pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
0427     mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
0428 }
0429 
0430 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
0431 {
0432     return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
0433                     num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
0434                     ndev->mvdev.mdev->priv.numa_node);
0435 }
0436 
0437 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
0438 {
0439     mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
0440 }
0441 
0442 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
0443              struct mlx5_vdpa_qp *vqp)
0444 {
0445     struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
0446     int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
0447     u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
0448     void *qpc;
0449     void *in;
0450     int err;
0451 
0452     if (!vqp->fw) {
0453         vqp = &mvq->vqqp;
0454         err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
0455         if (err)
0456             return err;
0457 
0458         err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
0459         if (err)
0460             goto err_db;
0461         inlen += vqp->frag_buf.npages * sizeof(__be64);
0462     }
0463 
0464     in = kzalloc(inlen, GFP_KERNEL);
0465     if (!in) {
0466         err = -ENOMEM;
0467         goto err_kzalloc;
0468     }
0469 
0470     qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
0471     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
0472     MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
0473     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
0474     MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
0475     MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
0476     if (!vqp->fw)
0477         MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
0478     MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
0479     err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
0480     kfree(in);
0481     if (err)
0482         goto err_kzalloc;
0483 
0484     vqp->mqp.uid = ndev->mvdev.res.uid;
0485     vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
0486 
0487     if (!vqp->fw)
0488         rx_post(vqp, mvq->num_ent);
0489 
0490     return 0;
0491 
0492 err_kzalloc:
0493     if (!vqp->fw)
0494         mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
0495 err_db:
0496     if (!vqp->fw)
0497         rq_buf_free(ndev, vqp);
0498 
0499     return err;
0500 }
0501 
0502 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
0503 {
0504     u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
0505 
0506     MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
0507     MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
0508     MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
0509     if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
0510         mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
0511     if (!vqp->fw) {
0512         mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
0513         rq_buf_free(ndev, vqp);
0514     }
0515 }
0516 
0517 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
0518 {
0519     return get_sw_cqe(cq, cq->mcq.cons_index);
0520 }
0521 
0522 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
0523 {
0524     struct mlx5_cqe64 *cqe64;
0525 
0526     cqe64 = next_cqe_sw(vcq);
0527     if (!cqe64)
0528         return -EAGAIN;
0529 
0530     vcq->mcq.cons_index++;
0531     return 0;
0532 }
0533 
0534 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
0535 {
0536     struct mlx5_vdpa_net *ndev = mvq->ndev;
0537     struct vdpa_callback *event_cb;
0538 
0539     event_cb = &ndev->event_cbs[mvq->index];
0540     mlx5_cq_set_ci(&mvq->cq.mcq);
0541 
0542     /* make sure CQ cosumer update is visible to the hardware before updating
0543      * RX doorbell record.
0544      */
0545     dma_wmb();
0546     rx_post(&mvq->vqqp, num);
0547     if (event_cb->callback)
0548         event_cb->callback(event_cb->private);
0549 }
0550 
0551 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
0552 {
0553     struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
0554     struct mlx5_vdpa_net *ndev = mvq->ndev;
0555     void __iomem *uar_page = ndev->mvdev.res.uar->map;
0556     int num = 0;
0557 
0558     while (!mlx5_vdpa_poll_one(&mvq->cq)) {
0559         num++;
0560         if (num > mvq->num_ent / 2) {
0561             /* If completions keep coming while we poll, we want to
0562              * let the hardware know that we consumed them by
0563              * updating the doorbell record.  We also let vdpa core
0564              * know about this so it passes it on the virtio driver
0565              * on the guest.
0566              */
0567             mlx5_vdpa_handle_completions(mvq, num);
0568             num = 0;
0569         }
0570     }
0571 
0572     if (num)
0573         mlx5_vdpa_handle_completions(mvq, num);
0574 
0575     mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
0576 }
0577 
0578 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
0579 {
0580     struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
0581     struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
0582     void __iomem *uar_page = ndev->mvdev.res.uar->map;
0583     u32 out[MLX5_ST_SZ_DW(create_cq_out)];
0584     struct mlx5_vdpa_cq *vcq = &mvq->cq;
0585     __be64 *pas;
0586     int inlen;
0587     void *cqc;
0588     void *in;
0589     int err;
0590     int eqn;
0591 
0592     err = mlx5_db_alloc(mdev, &vcq->db);
0593     if (err)
0594         return err;
0595 
0596     vcq->mcq.set_ci_db = vcq->db.db;
0597     vcq->mcq.arm_db = vcq->db.db + 1;
0598     vcq->mcq.cqe_sz = 64;
0599 
0600     err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
0601     if (err)
0602         goto err_db;
0603 
0604     cq_frag_buf_init(vcq, &vcq->buf);
0605 
0606     inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
0607         MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
0608     in = kzalloc(inlen, GFP_KERNEL);
0609     if (!in) {
0610         err = -ENOMEM;
0611         goto err_vzalloc;
0612     }
0613 
0614     MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
0615     pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
0616     mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
0617 
0618     cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
0619     MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
0620 
0621     /* Use vector 0 by default. Consider adding code to choose least used
0622      * vector.
0623      */
0624     err = mlx5_vector2eqn(mdev, 0, &eqn);
0625     if (err)
0626         goto err_vec;
0627 
0628     cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
0629     MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
0630     MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
0631     MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
0632     MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
0633 
0634     err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
0635     if (err)
0636         goto err_vec;
0637 
0638     vcq->mcq.comp = mlx5_vdpa_cq_comp;
0639     vcq->cqe = num_ent;
0640     vcq->mcq.set_ci_db = vcq->db.db;
0641     vcq->mcq.arm_db = vcq->db.db + 1;
0642     mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
0643     kfree(in);
0644     return 0;
0645 
0646 err_vec:
0647     kfree(in);
0648 err_vzalloc:
0649     cq_frag_buf_free(ndev, &vcq->buf);
0650 err_db:
0651     mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
0652     return err;
0653 }
0654 
0655 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
0656 {
0657     struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
0658     struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
0659     struct mlx5_vdpa_cq *vcq = &mvq->cq;
0660 
0661     if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
0662         mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
0663         return;
0664     }
0665     cq_frag_buf_free(ndev, &vcq->buf);
0666     mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
0667 }
0668 
0669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
0670               struct mlx5_vdpa_umem **umemp)
0671 {
0672     struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
0673     int p_a;
0674     int p_b;
0675 
0676     switch (num) {
0677     case 1:
0678         p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
0679         p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
0680         *umemp = &mvq->umem1;
0681         break;
0682     case 2:
0683         p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
0684         p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
0685         *umemp = &mvq->umem2;
0686         break;
0687     case 3:
0688         p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
0689         p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
0690         *umemp = &mvq->umem3;
0691         break;
0692     }
0693     (*umemp)->size = p_a * mvq->num_ent + p_b;
0694 }
0695 
0696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
0697 {
0698     mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
0699 }
0700 
0701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
0702 {
0703     int inlen;
0704     u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
0705     void *um;
0706     void *in;
0707     int err;
0708     __be64 *pas;
0709     struct mlx5_vdpa_umem *umem;
0710 
0711     set_umem_size(ndev, mvq, num, &umem);
0712     err = umem_frag_buf_alloc(ndev, umem, umem->size);
0713     if (err)
0714         return err;
0715 
0716     inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
0717 
0718     in = kzalloc(inlen, GFP_KERNEL);
0719     if (!in) {
0720         err = -ENOMEM;
0721         goto err_in;
0722     }
0723 
0724     MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
0725     MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
0726     um = MLX5_ADDR_OF(create_umem_in, in, umem);
0727     MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
0728     MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
0729 
0730     pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
0731     mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
0732 
0733     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
0734     if (err) {
0735         mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
0736         goto err_cmd;
0737     }
0738 
0739     kfree(in);
0740     umem->id = MLX5_GET(create_umem_out, out, umem_id);
0741 
0742     return 0;
0743 
0744 err_cmd:
0745     kfree(in);
0746 err_in:
0747     umem_frag_buf_free(ndev, umem);
0748     return err;
0749 }
0750 
0751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
0752 {
0753     u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
0754     u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
0755     struct mlx5_vdpa_umem *umem;
0756 
0757     switch (num) {
0758     case 1:
0759         umem = &mvq->umem1;
0760         break;
0761     case 2:
0762         umem = &mvq->umem2;
0763         break;
0764     case 3:
0765         umem = &mvq->umem3;
0766         break;
0767     }
0768 
0769     MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
0770     MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
0771     if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
0772         return;
0773 
0774     umem_frag_buf_free(ndev, umem);
0775 }
0776 
0777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
0778 {
0779     int num;
0780     int err;
0781 
0782     for (num = 1; num <= 3; num++) {
0783         err = create_umem(ndev, mvq, num);
0784         if (err)
0785             goto err_umem;
0786     }
0787     return 0;
0788 
0789 err_umem:
0790     for (num--; num > 0; num--)
0791         umem_destroy(ndev, mvq, num);
0792 
0793     return err;
0794 }
0795 
0796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
0797 {
0798     int num;
0799 
0800     for (num = 3; num > 0; num--)
0801         umem_destroy(ndev, mvq, num);
0802 }
0803 
0804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
0805 {
0806     u32 type_mask;
0807 
0808     type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
0809 
0810     /* prefer split queue */
0811     if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
0812         return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
0813 
0814     WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
0815 
0816     return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
0817 }
0818 
0819 static bool vq_is_tx(u16 idx)
0820 {
0821     return idx % 2;
0822 }
0823 
0824 static u16 get_features_12_3(u64 features)
0825 {
0826     return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
0827            (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
0828            (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
0829            (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
0830 }
0831 
0832 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
0833 {
0834     return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
0835            BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
0836 }
0837 
0838 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
0839 {
0840     int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
0841     u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
0842     void *obj_context;
0843     void *cmd_hdr;
0844     void *vq_ctx;
0845     void *in;
0846     int err;
0847 
0848     err = umems_create(ndev, mvq);
0849     if (err)
0850         return err;
0851 
0852     in = kzalloc(inlen, GFP_KERNEL);
0853     if (!in) {
0854         err = -ENOMEM;
0855         goto err_alloc;
0856     }
0857 
0858     cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
0859 
0860     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
0861     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
0862     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
0863 
0864     obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
0865     MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
0866     MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
0867     MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
0868          get_features_12_3(ndev->mvdev.actual_features));
0869     vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
0870     MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
0871 
0872     if (vq_is_tx(mvq->index))
0873         MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
0874 
0875     MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
0876     MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
0877     MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
0878     MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
0879     MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
0880          !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
0881     MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
0882     MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
0883     MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
0884     MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
0885     MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
0886     MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
0887     MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
0888     MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
0889     MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
0890     MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
0891     MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
0892     if (counters_supported(&ndev->mvdev))
0893         MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
0894 
0895     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
0896     if (err)
0897         goto err_cmd;
0898 
0899     mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
0900     kfree(in);
0901     mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
0902 
0903     return 0;
0904 
0905 err_cmd:
0906     kfree(in);
0907 err_alloc:
0908     umems_destroy(ndev, mvq);
0909     return err;
0910 }
0911 
0912 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
0913 {
0914     u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
0915     u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
0916 
0917     MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
0918          MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
0919     MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
0920     MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
0921     MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
0922          MLX5_OBJ_TYPE_VIRTIO_NET_Q);
0923     if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
0924         mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
0925         return;
0926     }
0927     mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
0928     umems_destroy(ndev, mvq);
0929 }
0930 
0931 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
0932 {
0933     return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
0934 }
0935 
0936 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
0937 {
0938     return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
0939 }
0940 
0941 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
0942             int *outlen, u32 qpn, u32 rqpn)
0943 {
0944     void *qpc;
0945     void *pp;
0946 
0947     switch (cmd) {
0948     case MLX5_CMD_OP_2RST_QP:
0949         *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
0950         *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
0951         *in = kzalloc(*inlen, GFP_KERNEL);
0952         *out = kzalloc(*outlen, GFP_KERNEL);
0953         if (!*in || !*out)
0954             goto outerr;
0955 
0956         MLX5_SET(qp_2rst_in, *in, opcode, cmd);
0957         MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
0958         MLX5_SET(qp_2rst_in, *in, qpn, qpn);
0959         break;
0960     case MLX5_CMD_OP_RST2INIT_QP:
0961         *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
0962         *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
0963         *in = kzalloc(*inlen, GFP_KERNEL);
0964         *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
0965         if (!*in || !*out)
0966             goto outerr;
0967 
0968         MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
0969         MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
0970         MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
0971         qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
0972         MLX5_SET(qpc, qpc, remote_qpn, rqpn);
0973         MLX5_SET(qpc, qpc, rwe, 1);
0974         pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
0975         MLX5_SET(ads, pp, vhca_port_num, 1);
0976         break;
0977     case MLX5_CMD_OP_INIT2RTR_QP:
0978         *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
0979         *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
0980         *in = kzalloc(*inlen, GFP_KERNEL);
0981         *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
0982         if (!*in || !*out)
0983             goto outerr;
0984 
0985         MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
0986         MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
0987         MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
0988         qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
0989         MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
0990         MLX5_SET(qpc, qpc, log_msg_max, 30);
0991         MLX5_SET(qpc, qpc, remote_qpn, rqpn);
0992         pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
0993         MLX5_SET(ads, pp, fl, 1);
0994         break;
0995     case MLX5_CMD_OP_RTR2RTS_QP:
0996         *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
0997         *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
0998         *in = kzalloc(*inlen, GFP_KERNEL);
0999         *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1000         if (!*in || !*out)
1001             goto outerr;
1002 
1003         MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1004         MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1005         MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1006         qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1007         pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1008         MLX5_SET(ads, pp, ack_timeout, 14);
1009         MLX5_SET(qpc, qpc, retry_count, 7);
1010         MLX5_SET(qpc, qpc, rnr_retry, 7);
1011         break;
1012     default:
1013         goto outerr_nullify;
1014     }
1015 
1016     return;
1017 
1018 outerr:
1019     kfree(*in);
1020     kfree(*out);
1021 outerr_nullify:
1022     *in = NULL;
1023     *out = NULL;
1024 }
1025 
1026 static void free_inout(void *in, void *out)
1027 {
1028     kfree(in);
1029     kfree(out);
1030 }
1031 
1032 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1033  * firmware. The fw argument indicates whether the subjected QP is the one used
1034  * by firmware.
1035  */
1036 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1037 {
1038     int outlen;
1039     int inlen;
1040     void *out;
1041     void *in;
1042     int err;
1043 
1044     alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1045     if (!in || !out)
1046         return -ENOMEM;
1047 
1048     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1049     free_inout(in, out);
1050     return err;
1051 }
1052 
1053 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1054 {
1055     int err;
1056 
1057     err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1058     if (err)
1059         return err;
1060 
1061     err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1062     if (err)
1063         return err;
1064 
1065     err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1066     if (err)
1067         return err;
1068 
1069     err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1070     if (err)
1071         return err;
1072 
1073     err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1074     if (err)
1075         return err;
1076 
1077     err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1078     if (err)
1079         return err;
1080 
1081     return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1082 }
1083 
1084 struct mlx5_virtq_attr {
1085     u8 state;
1086     u16 available_index;
1087     u16 used_index;
1088 };
1089 
1090 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1091                struct mlx5_virtq_attr *attr)
1092 {
1093     int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1094     u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1095     void *out;
1096     void *obj_context;
1097     void *cmd_hdr;
1098     int err;
1099 
1100     out = kzalloc(outlen, GFP_KERNEL);
1101     if (!out)
1102         return -ENOMEM;
1103 
1104     cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1105 
1106     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1107     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1108     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1109     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1110     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1111     if (err)
1112         goto err_cmd;
1113 
1114     obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1115     memset(attr, 0, sizeof(*attr));
1116     attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1117     attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1118     attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1119     kfree(out);
1120     return 0;
1121 
1122 err_cmd:
1123     kfree(out);
1124     return err;
1125 }
1126 
1127 static bool is_valid_state_change(int oldstate, int newstate)
1128 {
1129     switch (oldstate) {
1130     case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1131         return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1132     case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1133         return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1134     case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1135     case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1136     default:
1137         return false;
1138     }
1139 }
1140 
1141 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1142 {
1143     int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1144     u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1145     void *obj_context;
1146     void *cmd_hdr;
1147     void *in;
1148     int err;
1149 
1150     if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1151         return 0;
1152 
1153     if (!is_valid_state_change(mvq->fw_state, state))
1154         return -EINVAL;
1155 
1156     in = kzalloc(inlen, GFP_KERNEL);
1157     if (!in)
1158         return -ENOMEM;
1159 
1160     cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1161 
1162     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1163     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1164     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1165     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1166 
1167     obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1168     MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1169            MLX5_VIRTQ_MODIFY_MASK_STATE);
1170     MLX5_SET(virtio_net_q_object, obj_context, state, state);
1171     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1172     kfree(in);
1173     if (!err)
1174         mvq->fw_state = state;
1175 
1176     return err;
1177 }
1178 
1179 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1180 {
1181     u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1182     u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1183     void *cmd_hdr;
1184     int err;
1185 
1186     if (!counters_supported(&ndev->mvdev))
1187         return 0;
1188 
1189     cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1190 
1191     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1192     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1193     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1194 
1195     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1196     if (err)
1197         return err;
1198 
1199     mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1200 
1201     return 0;
1202 }
1203 
1204 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1205 {
1206     u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1207     u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1208 
1209     if (!counters_supported(&ndev->mvdev))
1210         return;
1211 
1212     MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1213     MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1214     MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1215     MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1216     if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1217         mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1218 }
1219 
1220 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1221 {
1222     u16 idx = mvq->index;
1223     int err;
1224 
1225     if (!mvq->num_ent)
1226         return 0;
1227 
1228     if (mvq->initialized)
1229         return 0;
1230 
1231     err = cq_create(ndev, idx, mvq->num_ent);
1232     if (err)
1233         return err;
1234 
1235     err = qp_create(ndev, mvq, &mvq->fwqp);
1236     if (err)
1237         goto err_fwqp;
1238 
1239     err = qp_create(ndev, mvq, &mvq->vqqp);
1240     if (err)
1241         goto err_vqqp;
1242 
1243     err = connect_qps(ndev, mvq);
1244     if (err)
1245         goto err_connect;
1246 
1247     err = counter_set_alloc(ndev, mvq);
1248     if (err)
1249         goto err_counter;
1250 
1251     err = create_virtqueue(ndev, mvq);
1252     if (err)
1253         goto err_connect;
1254 
1255     if (mvq->ready) {
1256         err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1257         if (err) {
1258             mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1259                        idx, err);
1260             goto err_connect;
1261         }
1262     }
1263 
1264     mvq->initialized = true;
1265     return 0;
1266 
1267 err_connect:
1268     counter_set_dealloc(ndev, mvq);
1269 err_counter:
1270     qp_destroy(ndev, &mvq->vqqp);
1271 err_vqqp:
1272     qp_destroy(ndev, &mvq->fwqp);
1273 err_fwqp:
1274     cq_destroy(ndev, idx);
1275     return err;
1276 }
1277 
1278 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1279 {
1280     struct mlx5_virtq_attr attr;
1281 
1282     if (!mvq->initialized)
1283         return;
1284 
1285     if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1286         return;
1287 
1288     if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1289         mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1290 
1291     if (query_virtqueue(ndev, mvq, &attr)) {
1292         mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1293         return;
1294     }
1295     mvq->avail_idx = attr.available_index;
1296     mvq->used_idx = attr.used_index;
1297 }
1298 
1299 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1300 {
1301     int i;
1302 
1303     for (i = 0; i < ndev->mvdev.max_vqs; i++)
1304         suspend_vq(ndev, &ndev->vqs[i]);
1305 }
1306 
1307 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1308 {
1309     if (!mvq->initialized)
1310         return;
1311 
1312     suspend_vq(ndev, mvq);
1313     destroy_virtqueue(ndev, mvq);
1314     counter_set_dealloc(ndev, mvq);
1315     qp_destroy(ndev, &mvq->vqqp);
1316     qp_destroy(ndev, &mvq->fwqp);
1317     cq_destroy(ndev, mvq->index);
1318     mvq->initialized = false;
1319 }
1320 
1321 static int create_rqt(struct mlx5_vdpa_net *ndev)
1322 {
1323     int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1324     int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1325     __be32 *list;
1326     void *rqtc;
1327     int inlen;
1328     void *in;
1329     int i, j;
1330     int err;
1331 
1332     inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1333     in = kzalloc(inlen, GFP_KERNEL);
1334     if (!in)
1335         return -ENOMEM;
1336 
1337     MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1338     rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1339 
1340     MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1341     MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1342     list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1343     for (i = 0, j = 0; i < act_sz; i++, j += 2)
1344         list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1345 
1346     MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1347     err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1348     kfree(in);
1349     if (err)
1350         return err;
1351 
1352     return 0;
1353 }
1354 
1355 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1356 
1357 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1358 {
1359     int act_sz = roundup_pow_of_two(num / 2);
1360     __be32 *list;
1361     void *rqtc;
1362     int inlen;
1363     void *in;
1364     int i, j;
1365     int err;
1366 
1367     inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1368     in = kzalloc(inlen, GFP_KERNEL);
1369     if (!in)
1370         return -ENOMEM;
1371 
1372     MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1373     MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1374     rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1375     MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1376 
1377     list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1378     for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1379         list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1380 
1381     MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1382     err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1383     kfree(in);
1384     if (err)
1385         return err;
1386 
1387     return 0;
1388 }
1389 
1390 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1391 {
1392     mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1393 }
1394 
1395 static int create_tir(struct mlx5_vdpa_net *ndev)
1396 {
1397 #define HASH_IP_L4PORTS                                                                            \
1398     (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1399      MLX5_HASH_FIELD_SEL_L4_DPORT)
1400     static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1401                            0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1402                            0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1403                            0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1404                            0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1405     void *rss_key;
1406     void *outer;
1407     void *tirc;
1408     void *in;
1409     int err;
1410 
1411     in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1412     if (!in)
1413         return -ENOMEM;
1414 
1415     MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1416     tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1417     MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1418 
1419     MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1420     MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1421     rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1422     memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1423 
1424     outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1425     MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1426     MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1427     MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1428 
1429     MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1430     MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1431 
1432     err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1433     kfree(in);
1434     return err;
1435 }
1436 
1437 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1438 {
1439     mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1440 }
1441 
1442 #define MAX_STEERING_ENT 0x8000
1443 #define MAX_STEERING_GROUPS 2
1444 
1445 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1446                     u16 vid, bool tagged,
1447                     struct mlx5_flow_handle **ucast,
1448                     struct mlx5_flow_handle **mcast)
1449 {
1450     struct mlx5_flow_destination dest = {};
1451     struct mlx5_flow_act flow_act = {};
1452     struct mlx5_flow_handle *rule;
1453     struct mlx5_flow_spec *spec;
1454     void *headers_c;
1455     void *headers_v;
1456     u8 *dmac_c;
1457     u8 *dmac_v;
1458     int err;
1459 
1460     spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1461     if (!spec)
1462         return -ENOMEM;
1463 
1464     spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1465     headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1466     headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1467     dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1468     dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1469     eth_broadcast_addr(dmac_c);
1470     ether_addr_copy(dmac_v, mac);
1471     MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1472     if (tagged) {
1473         MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1474         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1475         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
1476     }
1477     flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1478     dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1479     dest.tir_num = ndev->res.tirn;
1480     rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1481     if (IS_ERR(rule))
1482         return PTR_ERR(rule);
1483 
1484     *ucast = rule;
1485 
1486     memset(dmac_c, 0, ETH_ALEN);
1487     memset(dmac_v, 0, ETH_ALEN);
1488     dmac_c[0] = 1;
1489     dmac_v[0] = 1;
1490     rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1491     kvfree(spec);
1492     if (IS_ERR(rule)) {
1493         err = PTR_ERR(rule);
1494         goto err_mcast;
1495     }
1496 
1497     *mcast = rule;
1498     return 0;
1499 
1500 err_mcast:
1501     mlx5_del_flow_rules(*ucast);
1502     return err;
1503 }
1504 
1505 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1506                      struct mlx5_flow_handle *ucast,
1507                      struct mlx5_flow_handle *mcast)
1508 {
1509     mlx5_del_flow_rules(ucast);
1510     mlx5_del_flow_rules(mcast);
1511 }
1512 
1513 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1514 {
1515     u64 val;
1516 
1517     if (!tagged)
1518         vlan = MLX5V_UNTAGGED;
1519 
1520     val = (u64)vlan << 48 |
1521           (u64)mac[0] << 40 |
1522           (u64)mac[1] << 32 |
1523           (u64)mac[2] << 24 |
1524           (u64)mac[3] << 16 |
1525           (u64)mac[4] << 8 |
1526           (u64)mac[5];
1527 
1528     return val;
1529 }
1530 
1531 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1532 {
1533     struct macvlan_node *pos;
1534     u32 idx;
1535 
1536     idx = hash_64(value, 8); // tbd 8
1537     hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1538         if (pos->macvlan == value)
1539             return pos;
1540     }
1541     return NULL;
1542 }
1543 
1544 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
1545 {
1546     struct macvlan_node *ptr;
1547     u64 val;
1548     u32 idx;
1549     int err;
1550 
1551     val = search_val(mac, vlan, tagged);
1552     if (mac_vlan_lookup(ndev, val))
1553         return -EEXIST;
1554 
1555     ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1556     if (!ptr)
1557         return -ENOMEM;
1558 
1559     err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
1560                        &ptr->ucast_rule, &ptr->mcast_rule);
1561     if (err)
1562         goto err_add;
1563 
1564     ptr->macvlan = val;
1565     idx = hash_64(val, 8);
1566     hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1567     return 0;
1568 
1569 err_add:
1570     kfree(ptr);
1571     return err;
1572 }
1573 
1574 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1575 {
1576     struct macvlan_node *ptr;
1577 
1578     ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1579     if (!ptr)
1580         return;
1581 
1582     hlist_del(&ptr->hlist);
1583     mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
1584     kfree(ptr);
1585 }
1586 
1587 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1588 {
1589     struct macvlan_node *pos;
1590     struct hlist_node *n;
1591     int i;
1592 
1593     for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1594         hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1595             hlist_del(&pos->hlist);
1596             mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
1597             kfree(pos);
1598         }
1599     }
1600 }
1601 
1602 static int setup_steering(struct mlx5_vdpa_net *ndev)
1603 {
1604     struct mlx5_flow_table_attr ft_attr = {};
1605     struct mlx5_flow_namespace *ns;
1606     int err;
1607 
1608     ft_attr.max_fte = MAX_STEERING_ENT;
1609     ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1610 
1611     ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1612     if (!ns) {
1613         mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1614         return -EOPNOTSUPP;
1615     }
1616 
1617     ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1618     if (IS_ERR(ndev->rxft)) {
1619         mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1620         return PTR_ERR(ndev->rxft);
1621     }
1622 
1623     err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1624     if (err)
1625         goto err_add;
1626 
1627     return 0;
1628 
1629 err_add:
1630     mlx5_destroy_flow_table(ndev->rxft);
1631     return err;
1632 }
1633 
1634 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1635 {
1636     clear_mac_vlan_table(ndev);
1637     mlx5_destroy_flow_table(ndev->rxft);
1638 }
1639 
1640 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1641 {
1642     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1643     struct mlx5_control_vq *cvq = &mvdev->cvq;
1644     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1645     struct mlx5_core_dev *pfmdev;
1646     size_t read;
1647     u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1648 
1649     pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1650     switch (cmd) {
1651     case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1652         read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1653         if (read != ETH_ALEN)
1654             break;
1655 
1656         if (!memcmp(ndev->config.mac, mac, 6)) {
1657             status = VIRTIO_NET_OK;
1658             break;
1659         }
1660 
1661         if (is_zero_ether_addr(mac))
1662             break;
1663 
1664         if (!is_zero_ether_addr(ndev->config.mac)) {
1665             if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1666                 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1667                            ndev->config.mac);
1668                 break;
1669             }
1670         }
1671 
1672         if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1673             mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1674                        mac);
1675             break;
1676         }
1677 
1678         /* backup the original mac address so that if failed to add the forward rules
1679          * we could restore it
1680          */
1681         memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1682 
1683         memcpy(ndev->config.mac, mac, ETH_ALEN);
1684 
1685         /* Need recreate the flow table entry, so that the packet could forward back
1686          */
1687         mac_vlan_del(ndev, ndev->config.mac, 0, false);
1688 
1689         if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1690             mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1691 
1692             /* Although it hardly run here, we still need double check */
1693             if (is_zero_ether_addr(mac_back)) {
1694                 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1695                 break;
1696             }
1697 
1698             /* Try to restore original mac address to MFPS table, and try to restore
1699              * the forward rule entry.
1700              */
1701             if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1702                 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1703                            ndev->config.mac);
1704             }
1705 
1706             if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1707                 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1708                            mac_back);
1709             }
1710 
1711             memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1712 
1713             if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1714                 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1715 
1716             break;
1717         }
1718 
1719         status = VIRTIO_NET_OK;
1720         break;
1721 
1722     default:
1723         break;
1724     }
1725 
1726     return status;
1727 }
1728 
1729 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1730 {
1731     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1732     int cur_qps = ndev->cur_num_vqs / 2;
1733     int err;
1734     int i;
1735 
1736     if (cur_qps > newqps) {
1737         err = modify_rqt(ndev, 2 * newqps);
1738         if (err)
1739             return err;
1740 
1741         for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1742             teardown_vq(ndev, &ndev->vqs[i]);
1743 
1744         ndev->cur_num_vqs = 2 * newqps;
1745     } else {
1746         ndev->cur_num_vqs = 2 * newqps;
1747         for (i = cur_qps * 2; i < 2 * newqps; i++) {
1748             err = setup_vq(ndev, &ndev->vqs[i]);
1749             if (err)
1750                 goto clean_added;
1751         }
1752         err = modify_rqt(ndev, 2 * newqps);
1753         if (err)
1754             goto clean_added;
1755     }
1756     return 0;
1757 
1758 clean_added:
1759     for (--i; i >= 2 * cur_qps; --i)
1760         teardown_vq(ndev, &ndev->vqs[i]);
1761 
1762     ndev->cur_num_vqs = 2 * cur_qps;
1763 
1764     return err;
1765 }
1766 
1767 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1768 {
1769     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1770     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1771     struct mlx5_control_vq *cvq = &mvdev->cvq;
1772     struct virtio_net_ctrl_mq mq;
1773     size_t read;
1774     u16 newqps;
1775 
1776     switch (cmd) {
1777     case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1778         /* This mq feature check aligns with pre-existing userspace
1779          * implementation.
1780          *
1781          * Without it, an untrusted driver could fake a multiqueue config
1782          * request down to a non-mq device that may cause kernel to
1783          * panic due to uninitialized resources for extra vqs. Even with
1784          * a well behaving guest driver, it is not expected to allow
1785          * changing the number of vqs on a non-mq device.
1786          */
1787         if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1788             break;
1789 
1790         read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1791         if (read != sizeof(mq))
1792             break;
1793 
1794         newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1795         if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1796             newqps > ndev->rqt_size)
1797             break;
1798 
1799         if (ndev->cur_num_vqs == 2 * newqps) {
1800             status = VIRTIO_NET_OK;
1801             break;
1802         }
1803 
1804         if (!change_num_qps(mvdev, newqps))
1805             status = VIRTIO_NET_OK;
1806 
1807         break;
1808     default:
1809         break;
1810     }
1811 
1812     return status;
1813 }
1814 
1815 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1816 {
1817     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1818     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1819     struct mlx5_control_vq *cvq = &mvdev->cvq;
1820     __virtio16 vlan;
1821     size_t read;
1822     u16 id;
1823 
1824     switch (cmd) {
1825     case VIRTIO_NET_CTRL_VLAN_ADD:
1826         read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1827         if (read != sizeof(vlan))
1828             break;
1829 
1830         id = mlx5vdpa16_to_cpu(mvdev, vlan);
1831         if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1832             break;
1833 
1834         status = VIRTIO_NET_OK;
1835         break;
1836     case VIRTIO_NET_CTRL_VLAN_DEL:
1837         read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1838         if (read != sizeof(vlan))
1839             break;
1840 
1841         id = mlx5vdpa16_to_cpu(mvdev, vlan);
1842         mac_vlan_del(ndev, ndev->config.mac, id, true);
1843         status = VIRTIO_NET_OK;
1844         break;
1845     default:
1846         break;
1847     }
1848 
1849     return status;
1850 }
1851 
1852 static void mlx5_cvq_kick_handler(struct work_struct *work)
1853 {
1854     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1855     struct virtio_net_ctrl_hdr ctrl;
1856     struct mlx5_vdpa_wq_ent *wqent;
1857     struct mlx5_vdpa_dev *mvdev;
1858     struct mlx5_control_vq *cvq;
1859     struct mlx5_vdpa_net *ndev;
1860     size_t read, write;
1861     int err;
1862 
1863     wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1864     mvdev = wqent->mvdev;
1865     ndev = to_mlx5_vdpa_ndev(mvdev);
1866     cvq = &mvdev->cvq;
1867 
1868     down_write(&ndev->reslock);
1869 
1870     if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1871         goto out;
1872 
1873     if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1874         goto out;
1875 
1876     if (!cvq->ready)
1877         goto out;
1878 
1879     while (true) {
1880         err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1881                        GFP_ATOMIC);
1882         if (err <= 0)
1883             break;
1884 
1885         read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1886         if (read != sizeof(ctrl))
1887             break;
1888 
1889         cvq->received_desc++;
1890         switch (ctrl.class) {
1891         case VIRTIO_NET_CTRL_MAC:
1892             status = handle_ctrl_mac(mvdev, ctrl.cmd);
1893             break;
1894         case VIRTIO_NET_CTRL_MQ:
1895             status = handle_ctrl_mq(mvdev, ctrl.cmd);
1896             break;
1897         case VIRTIO_NET_CTRL_VLAN:
1898             status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1899             break;
1900         default:
1901             break;
1902         }
1903 
1904         /* Make sure data is written before advancing index */
1905         smp_wmb();
1906 
1907         write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1908         vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1909         vringh_kiov_cleanup(&cvq->riov);
1910         vringh_kiov_cleanup(&cvq->wiov);
1911 
1912         if (vringh_need_notify_iotlb(&cvq->vring))
1913             vringh_notify(&cvq->vring);
1914 
1915         cvq->completed_desc++;
1916         queue_work(mvdev->wq, &wqent->work);
1917         break;
1918     }
1919 
1920 out:
1921     up_write(&ndev->reslock);
1922 }
1923 
1924 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1925 {
1926     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1927     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1928     struct mlx5_vdpa_virtqueue *mvq;
1929 
1930     if (!is_index_valid(mvdev, idx))
1931         return;
1932 
1933     if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1934         if (!mvdev->wq || !mvdev->cvq.ready)
1935             return;
1936 
1937         queue_work(mvdev->wq, &ndev->cvq_ent.work);
1938         return;
1939     }
1940 
1941     mvq = &ndev->vqs[idx];
1942     if (unlikely(!mvq->ready))
1943         return;
1944 
1945     iowrite16(idx, ndev->mvdev.res.kick_addr);
1946 }
1947 
1948 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1949                     u64 driver_area, u64 device_area)
1950 {
1951     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1952     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1953     struct mlx5_vdpa_virtqueue *mvq;
1954 
1955     if (!is_index_valid(mvdev, idx))
1956         return -EINVAL;
1957 
1958     if (is_ctrl_vq_idx(mvdev, idx)) {
1959         mvdev->cvq.desc_addr = desc_area;
1960         mvdev->cvq.device_addr = device_area;
1961         mvdev->cvq.driver_addr = driver_area;
1962         return 0;
1963     }
1964 
1965     mvq = &ndev->vqs[idx];
1966     mvq->desc_addr = desc_area;
1967     mvq->device_addr = device_area;
1968     mvq->driver_addr = driver_area;
1969     return 0;
1970 }
1971 
1972 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1973 {
1974     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1975     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1976     struct mlx5_vdpa_virtqueue *mvq;
1977 
1978     if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1979         return;
1980 
1981     mvq = &ndev->vqs[idx];
1982     mvq->num_ent = num;
1983 }
1984 
1985 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1986 {
1987     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1988     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1989 
1990     ndev->event_cbs[idx] = *cb;
1991     if (is_ctrl_vq_idx(mvdev, idx))
1992         mvdev->cvq.event_cb = *cb;
1993 }
1994 
1995 static void mlx5_cvq_notify(struct vringh *vring)
1996 {
1997     struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1998 
1999     if (!cvq->event_cb.callback)
2000         return;
2001 
2002     cvq->event_cb.callback(cvq->event_cb.private);
2003 }
2004 
2005 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2006 {
2007     struct mlx5_control_vq *cvq = &mvdev->cvq;
2008 
2009     cvq->ready = ready;
2010     if (!ready)
2011         return;
2012 
2013     cvq->vring.notify = mlx5_cvq_notify;
2014 }
2015 
2016 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2017 {
2018     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2019     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2020     struct mlx5_vdpa_virtqueue *mvq;
2021     int err;
2022 
2023     if (!mvdev->actual_features)
2024         return;
2025 
2026     if (!is_index_valid(mvdev, idx))
2027         return;
2028 
2029     if (is_ctrl_vq_idx(mvdev, idx)) {
2030         set_cvq_ready(mvdev, ready);
2031         return;
2032     }
2033 
2034     mvq = &ndev->vqs[idx];
2035     if (!ready) {
2036         suspend_vq(ndev, mvq);
2037     } else {
2038         err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2039         if (err) {
2040             mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2041             ready = false;
2042         }
2043     }
2044 
2045 
2046     mvq->ready = ready;
2047 }
2048 
2049 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2050 {
2051     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2052     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2053 
2054     if (!is_index_valid(mvdev, idx))
2055         return false;
2056 
2057     if (is_ctrl_vq_idx(mvdev, idx))
2058         return mvdev->cvq.ready;
2059 
2060     return ndev->vqs[idx].ready;
2061 }
2062 
2063 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2064                   const struct vdpa_vq_state *state)
2065 {
2066     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2067     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2068     struct mlx5_vdpa_virtqueue *mvq;
2069 
2070     if (!is_index_valid(mvdev, idx))
2071         return -EINVAL;
2072 
2073     if (is_ctrl_vq_idx(mvdev, idx)) {
2074         mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2075         return 0;
2076     }
2077 
2078     mvq = &ndev->vqs[idx];
2079     if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2080         mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2081         return -EINVAL;
2082     }
2083 
2084     mvq->used_idx = state->split.avail_index;
2085     mvq->avail_idx = state->split.avail_index;
2086     return 0;
2087 }
2088 
2089 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2090 {
2091     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2092     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2093     struct mlx5_vdpa_virtqueue *mvq;
2094     struct mlx5_virtq_attr attr;
2095     int err;
2096 
2097     if (!is_index_valid(mvdev, idx))
2098         return -EINVAL;
2099 
2100     if (is_ctrl_vq_idx(mvdev, idx)) {
2101         state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2102         return 0;
2103     }
2104 
2105     mvq = &ndev->vqs[idx];
2106     /* If the virtq object was destroyed, use the value saved at
2107      * the last minute of suspend_vq. This caters for userspace
2108      * that cares about emulating the index after vq is stopped.
2109      */
2110     if (!mvq->initialized) {
2111         /* Firmware returns a wrong value for the available index.
2112          * Since both values should be identical, we take the value of
2113          * used_idx which is reported correctly.
2114          */
2115         state->split.avail_index = mvq->used_idx;
2116         return 0;
2117     }
2118 
2119     err = query_virtqueue(ndev, mvq, &attr);
2120     if (err) {
2121         mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2122         return err;
2123     }
2124     state->split.avail_index = attr.used_index;
2125     return 0;
2126 }
2127 
2128 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2129 {
2130     return PAGE_SIZE;
2131 }
2132 
2133 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2134 {
2135     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2136 
2137     if (is_ctrl_vq_idx(mvdev, idx))
2138         return MLX5_VDPA_CVQ_GROUP;
2139 
2140     return MLX5_VDPA_DATAVQ_GROUP;
2141 }
2142 
2143 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2144     MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2145     MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2146     MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2147 };
2148 
2149 static u64 mlx_to_vritio_features(u16 dev_features)
2150 {
2151     u64 result = 0;
2152 
2153     if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
2154         result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2155     if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
2156         result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2157     if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
2158         result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2159     if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
2160         result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2161 
2162     return result;
2163 }
2164 
2165 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2166 {
2167     u64 mlx_vdpa_features = 0;
2168     u16 dev_features;
2169 
2170     dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2171     mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2172     if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2173         mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2174     mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2175     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2176     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2177     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2178     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2179     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2180     mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2181 
2182     return mlx_vdpa_features;
2183 }
2184 
2185 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2186 {
2187     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2188     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2189 
2190     print_features(mvdev, ndev->mvdev.mlx_features, false);
2191     return ndev->mvdev.mlx_features;
2192 }
2193 
2194 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2195 {
2196     /* Minimum features to expect */
2197     if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2198         return -EOPNOTSUPP;
2199 
2200     /* Double check features combination sent down by the driver.
2201      * Fail invalid features due to absence of the depended feature.
2202      *
2203      * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2204      * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2205      * By failing the invalid features sent down by untrusted drivers,
2206      * we're assured the assumption made upon is_index_valid() and
2207      * is_ctrl_vq_idx() will not be compromised.
2208      */
2209     if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2210             BIT_ULL(VIRTIO_NET_F_MQ))
2211         return -EINVAL;
2212 
2213     return 0;
2214 }
2215 
2216 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2217 {
2218     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2219     int err;
2220     int i;
2221 
2222     for (i = 0; i < mvdev->max_vqs; i++) {
2223         err = setup_vq(ndev, &ndev->vqs[i]);
2224         if (err)
2225             goto err_vq;
2226     }
2227 
2228     return 0;
2229 
2230 err_vq:
2231     for (--i; i >= 0; i--)
2232         teardown_vq(ndev, &ndev->vqs[i]);
2233 
2234     return err;
2235 }
2236 
2237 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2238 {
2239     struct mlx5_vdpa_virtqueue *mvq;
2240     int i;
2241 
2242     for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2243         mvq = &ndev->vqs[i];
2244         if (!mvq->initialized)
2245             continue;
2246 
2247         teardown_vq(ndev, mvq);
2248     }
2249 }
2250 
2251 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2252 {
2253     if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2254         if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2255             /* MQ supported. CVQ index is right above the last data virtqueue's */
2256             mvdev->max_idx = mvdev->max_vqs;
2257         } else {
2258             /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2259              * CVQ gets index 2
2260              */
2261             mvdev->max_idx = 2;
2262         }
2263     } else {
2264         /* Two data virtqueues only: one for rx and one for tx */
2265         mvdev->max_idx = 1;
2266     }
2267 }
2268 
2269 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2270 {
2271     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2272     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2273     int err;
2274 
2275     print_features(mvdev, features, true);
2276 
2277     err = verify_driver_features(mvdev, features);
2278     if (err)
2279         return err;
2280 
2281     ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2282     if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2283         ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2284     else
2285         ndev->rqt_size = 1;
2286 
2287     ndev->cur_num_vqs = 2 * ndev->rqt_size;
2288 
2289     update_cvq_info(mvdev);
2290     return err;
2291 }
2292 
2293 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2294 {
2295     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2296     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2297 
2298     ndev->config_cb = *cb;
2299 }
2300 
2301 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2302 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2303 {
2304     return MLX5_VDPA_MAX_VQ_ENTRIES;
2305 }
2306 
2307 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2308 {
2309     return VIRTIO_ID_NET;
2310 }
2311 
2312 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2313 {
2314     return PCI_VENDOR_ID_MELLANOX;
2315 }
2316 
2317 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2318 {
2319     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2320     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2321 
2322     print_status(mvdev, ndev->mvdev.status, false);
2323     return ndev->mvdev.status;
2324 }
2325 
2326 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2327 {
2328     struct mlx5_vq_restore_info *ri = &mvq->ri;
2329     struct mlx5_virtq_attr attr = {};
2330     int err;
2331 
2332     if (mvq->initialized) {
2333         err = query_virtqueue(ndev, mvq, &attr);
2334         if (err)
2335             return err;
2336     }
2337 
2338     ri->avail_index = attr.available_index;
2339     ri->used_index = attr.used_index;
2340     ri->ready = mvq->ready;
2341     ri->num_ent = mvq->num_ent;
2342     ri->desc_addr = mvq->desc_addr;
2343     ri->device_addr = mvq->device_addr;
2344     ri->driver_addr = mvq->driver_addr;
2345     ri->restore = true;
2346     return 0;
2347 }
2348 
2349 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2350 {
2351     int i;
2352 
2353     for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2354         memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2355         save_channel_info(ndev, &ndev->vqs[i]);
2356     }
2357     return 0;
2358 }
2359 
2360 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2361 {
2362     int i;
2363 
2364     for (i = 0; i < ndev->mvdev.max_vqs; i++)
2365         memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2366 }
2367 
2368 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2369 {
2370     struct mlx5_vdpa_virtqueue *mvq;
2371     struct mlx5_vq_restore_info *ri;
2372     int i;
2373 
2374     mlx5_clear_vqs(ndev);
2375     init_mvqs(ndev);
2376     for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2377         mvq = &ndev->vqs[i];
2378         ri = &mvq->ri;
2379         if (!ri->restore)
2380             continue;
2381 
2382         mvq->avail_idx = ri->avail_index;
2383         mvq->used_idx = ri->used_index;
2384         mvq->ready = ri->ready;
2385         mvq->num_ent = ri->num_ent;
2386         mvq->desc_addr = ri->desc_addr;
2387         mvq->device_addr = ri->device_addr;
2388         mvq->driver_addr = ri->driver_addr;
2389     }
2390 }
2391 
2392 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2393 {
2394     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2395     int err;
2396 
2397     suspend_vqs(ndev);
2398     err = save_channels_info(ndev);
2399     if (err)
2400         goto err_mr;
2401 
2402     teardown_driver(ndev);
2403     mlx5_vdpa_destroy_mr(mvdev);
2404     err = mlx5_vdpa_create_mr(mvdev, iotlb);
2405     if (err)
2406         goto err_mr;
2407 
2408     if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2409         goto err_mr;
2410 
2411     restore_channels_info(ndev);
2412     err = setup_driver(mvdev);
2413     if (err)
2414         goto err_setup;
2415 
2416     return 0;
2417 
2418 err_setup:
2419     mlx5_vdpa_destroy_mr(mvdev);
2420 err_mr:
2421     return err;
2422 }
2423 
2424 /* reslock must be held for this function */
2425 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2426 {
2427     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2428     int err;
2429 
2430     WARN_ON(!rwsem_is_locked(&ndev->reslock));
2431 
2432     if (ndev->setup) {
2433         mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2434         err = 0;
2435         goto out;
2436     }
2437     err = setup_virtqueues(mvdev);
2438     if (err) {
2439         mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2440         goto out;
2441     }
2442 
2443     err = create_rqt(ndev);
2444     if (err) {
2445         mlx5_vdpa_warn(mvdev, "create_rqt\n");
2446         goto err_rqt;
2447     }
2448 
2449     err = create_tir(ndev);
2450     if (err) {
2451         mlx5_vdpa_warn(mvdev, "create_tir\n");
2452         goto err_tir;
2453     }
2454 
2455     err = setup_steering(ndev);
2456     if (err) {
2457         mlx5_vdpa_warn(mvdev, "setup_steering\n");
2458         goto err_fwd;
2459     }
2460     ndev->setup = true;
2461 
2462     return 0;
2463 
2464 err_fwd:
2465     destroy_tir(ndev);
2466 err_tir:
2467     destroy_rqt(ndev);
2468 err_rqt:
2469     teardown_virtqueues(ndev);
2470 out:
2471     return err;
2472 }
2473 
2474 /* reslock must be held for this function */
2475 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2476 {
2477 
2478     WARN_ON(!rwsem_is_locked(&ndev->reslock));
2479 
2480     if (!ndev->setup)
2481         return;
2482 
2483     teardown_steering(ndev);
2484     destroy_tir(ndev);
2485     destroy_rqt(ndev);
2486     teardown_virtqueues(ndev);
2487     ndev->setup = false;
2488 }
2489 
2490 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2491 {
2492     int i;
2493 
2494     for (i = 0; i < ndev->mvdev.max_vqs; i++)
2495         ndev->vqs[i].ready = false;
2496 
2497     ndev->mvdev.cvq.ready = false;
2498 }
2499 
2500 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2501 {
2502     struct mlx5_control_vq *cvq = &mvdev->cvq;
2503     int err = 0;
2504 
2505     if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2506         err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2507                     MLX5_CVQ_MAX_ENT, false,
2508                     (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2509                     (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2510                     (struct vring_used *)(uintptr_t)cvq->device_addr);
2511 
2512     return err;
2513 }
2514 
2515 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2516 {
2517     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2518     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2519     int err;
2520 
2521     print_status(mvdev, status, true);
2522 
2523     down_write(&ndev->reslock);
2524 
2525     if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2526         if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2527             err = setup_cvq_vring(mvdev);
2528             if (err) {
2529                 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2530                 goto err_setup;
2531             }
2532             err = setup_driver(mvdev);
2533             if (err) {
2534                 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2535                 goto err_setup;
2536             }
2537         } else {
2538             mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2539             goto err_clear;
2540         }
2541     }
2542 
2543     ndev->mvdev.status = status;
2544     up_write(&ndev->reslock);
2545     return;
2546 
2547 err_setup:
2548     mlx5_vdpa_destroy_mr(&ndev->mvdev);
2549     ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2550 err_clear:
2551     up_write(&ndev->reslock);
2552 }
2553 
2554 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2555 {
2556     int i;
2557 
2558     /* default mapping all groups are mapped to asid 0 */
2559     for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2560         mvdev->group2asid[i] = 0;
2561 }
2562 
2563 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2564 {
2565     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2566     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2567 
2568     print_status(mvdev, 0, true);
2569     mlx5_vdpa_info(mvdev, "performing device reset\n");
2570 
2571     down_write(&ndev->reslock);
2572     teardown_driver(ndev);
2573     clear_vqs_ready(ndev);
2574     mlx5_vdpa_destroy_mr(&ndev->mvdev);
2575     ndev->mvdev.status = 0;
2576     ndev->cur_num_vqs = 0;
2577     ndev->mvdev.cvq.received_desc = 0;
2578     ndev->mvdev.cvq.completed_desc = 0;
2579     memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2580     ndev->mvdev.actual_features = 0;
2581     init_group_to_asid_map(mvdev);
2582     ++mvdev->generation;
2583 
2584     if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2585         if (mlx5_vdpa_create_mr(mvdev, NULL))
2586             mlx5_vdpa_warn(mvdev, "create MR failed\n");
2587     }
2588     up_write(&ndev->reslock);
2589 
2590     return 0;
2591 }
2592 
2593 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2594 {
2595     return sizeof(struct virtio_net_config);
2596 }
2597 
2598 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2599                  unsigned int len)
2600 {
2601     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2602     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2603 
2604     if (offset + len <= sizeof(struct virtio_net_config))
2605         memcpy(buf, (u8 *)&ndev->config + offset, len);
2606 }
2607 
2608 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2609                  unsigned int len)
2610 {
2611     /* not supported */
2612 }
2613 
2614 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2615 {
2616     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2617 
2618     return mvdev->generation;
2619 }
2620 
2621 static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2622 {
2623     u64 start = 0ULL, last = 0ULL - 1;
2624     struct vhost_iotlb_map *map;
2625     int err = 0;
2626 
2627     spin_lock(&mvdev->cvq.iommu_lock);
2628     vhost_iotlb_reset(mvdev->cvq.iotlb);
2629 
2630     for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
2631          map = vhost_iotlb_itree_next(map, start, last)) {
2632         err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
2633                         map->last, map->addr, map->perm);
2634         if (err)
2635             goto out;
2636     }
2637 
2638 out:
2639     spin_unlock(&mvdev->cvq.iommu_lock);
2640     return err;
2641 }
2642 
2643 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2644 {
2645     bool change_map;
2646     int err;
2647 
2648     err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2649     if (err) {
2650         mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2651         return err;
2652     }
2653 
2654     if (change_map)
2655         err = mlx5_vdpa_change_map(mvdev, iotlb);
2656 
2657     return err;
2658 }
2659 
2660 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2661                  struct vhost_iotlb *iotlb)
2662 {
2663     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2664     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2665     int err = -EINVAL;
2666 
2667     down_write(&ndev->reslock);
2668     if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
2669         err = set_map_data(mvdev, iotlb);
2670         if (err)
2671             goto out;
2672     }
2673 
2674     if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
2675         err = set_map_control(mvdev, iotlb);
2676 
2677 out:
2678     up_write(&ndev->reslock);
2679     return err;
2680 }
2681 
2682 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2683 {
2684     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2685     struct mlx5_core_dev *pfmdev;
2686     struct mlx5_vdpa_net *ndev;
2687 
2688     ndev = to_mlx5_vdpa_ndev(mvdev);
2689 
2690     free_resources(ndev);
2691     mlx5_vdpa_destroy_mr(mvdev);
2692     if (!is_zero_ether_addr(ndev->config.mac)) {
2693         pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2694         mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2695     }
2696     mlx5_vdpa_free_resources(&ndev->mvdev);
2697     kfree(ndev->event_cbs);
2698     kfree(ndev->vqs);
2699 }
2700 
2701 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2702 {
2703     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2704     struct vdpa_notification_area ret = {};
2705     struct mlx5_vdpa_net *ndev;
2706     phys_addr_t addr;
2707 
2708     if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2709         return ret;
2710 
2711     /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2712      * notification to avoid the risk of mapping pages that contain BAR of more
2713      * than one SF
2714      */
2715     if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2716         return ret;
2717 
2718     ndev = to_mlx5_vdpa_ndev(mvdev);
2719     addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2720     ret.addr = addr;
2721     ret.size = PAGE_SIZE;
2722     return ret;
2723 }
2724 
2725 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2726 {
2727     return -EOPNOTSUPP;
2728 }
2729 
2730 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2731 {
2732     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2733 
2734     return mvdev->actual_features;
2735 }
2736 
2737 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2738                  u64 *received_desc, u64 *completed_desc)
2739 {
2740     u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2741     u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2742     void *cmd_hdr;
2743     void *ctx;
2744     int err;
2745 
2746     if (!counters_supported(&ndev->mvdev))
2747         return -EOPNOTSUPP;
2748 
2749     if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2750         return -EAGAIN;
2751 
2752     cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2753 
2754     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2755     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2756     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2757     MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2758 
2759     err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2760     if (err)
2761         return err;
2762 
2763     ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2764     *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2765     *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2766     return 0;
2767 }
2768 
2769 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2770                      struct sk_buff *msg,
2771                      struct netlink_ext_ack *extack)
2772 {
2773     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2774     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2775     struct mlx5_vdpa_virtqueue *mvq;
2776     struct mlx5_control_vq *cvq;
2777     u64 received_desc;
2778     u64 completed_desc;
2779     int err = 0;
2780 
2781     down_read(&ndev->reslock);
2782     if (!is_index_valid(mvdev, idx)) {
2783         NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2784         err = -EINVAL;
2785         goto out_err;
2786     }
2787 
2788     if (idx == ctrl_vq_idx(mvdev)) {
2789         cvq = &mvdev->cvq;
2790         received_desc = cvq->received_desc;
2791         completed_desc = cvq->completed_desc;
2792         goto out;
2793     }
2794 
2795     mvq = &ndev->vqs[idx];
2796     err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2797     if (err) {
2798         NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2799         goto out_err;
2800     }
2801 
2802 out:
2803     err = -EMSGSIZE;
2804     if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2805         goto out_err;
2806 
2807     if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2808                   VDPA_ATTR_PAD))
2809         goto out_err;
2810 
2811     if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2812         goto out_err;
2813 
2814     if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2815                   VDPA_ATTR_PAD))
2816         goto out_err;
2817 
2818     err = 0;
2819 out_err:
2820     up_read(&ndev->reslock);
2821     return err;
2822 }
2823 
2824 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
2825 {
2826     struct mlx5_control_vq *cvq;
2827 
2828     if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2829         return;
2830 
2831     cvq = &mvdev->cvq;
2832     cvq->ready = false;
2833 }
2834 
2835 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
2836 {
2837     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2838     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2839     struct mlx5_vdpa_virtqueue *mvq;
2840     int i;
2841 
2842     down_write(&ndev->reslock);
2843     mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2844     ndev->nb_registered = false;
2845     flush_workqueue(ndev->mvdev.wq);
2846     for (i = 0; i < ndev->cur_num_vqs; i++) {
2847         mvq = &ndev->vqs[i];
2848         suspend_vq(ndev, mvq);
2849     }
2850     mlx5_vdpa_cvq_suspend(mvdev);
2851     up_write(&ndev->reslock);
2852     return 0;
2853 }
2854 
2855 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
2856                    unsigned int asid)
2857 {
2858     struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2859 
2860     if (group >= MLX5_VDPA_NUMVQ_GROUPS)
2861         return -EINVAL;
2862 
2863     mvdev->group2asid[group] = asid;
2864     return 0;
2865 }
2866 
2867 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2868     .set_vq_address = mlx5_vdpa_set_vq_address,
2869     .set_vq_num = mlx5_vdpa_set_vq_num,
2870     .kick_vq = mlx5_vdpa_kick_vq,
2871     .set_vq_cb = mlx5_vdpa_set_vq_cb,
2872     .set_vq_ready = mlx5_vdpa_set_vq_ready,
2873     .get_vq_ready = mlx5_vdpa_get_vq_ready,
2874     .set_vq_state = mlx5_vdpa_set_vq_state,
2875     .get_vq_state = mlx5_vdpa_get_vq_state,
2876     .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
2877     .get_vq_notification = mlx5_get_vq_notification,
2878     .get_vq_irq = mlx5_get_vq_irq,
2879     .get_vq_align = mlx5_vdpa_get_vq_align,
2880     .get_vq_group = mlx5_vdpa_get_vq_group,
2881     .get_device_features = mlx5_vdpa_get_device_features,
2882     .set_driver_features = mlx5_vdpa_set_driver_features,
2883     .get_driver_features = mlx5_vdpa_get_driver_features,
2884     .set_config_cb = mlx5_vdpa_set_config_cb,
2885     .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2886     .get_device_id = mlx5_vdpa_get_device_id,
2887     .get_vendor_id = mlx5_vdpa_get_vendor_id,
2888     .get_status = mlx5_vdpa_get_status,
2889     .set_status = mlx5_vdpa_set_status,
2890     .reset = mlx5_vdpa_reset,
2891     .get_config_size = mlx5_vdpa_get_config_size,
2892     .get_config = mlx5_vdpa_get_config,
2893     .set_config = mlx5_vdpa_set_config,
2894     .get_generation = mlx5_vdpa_get_generation,
2895     .set_map = mlx5_vdpa_set_map,
2896     .set_group_asid = mlx5_set_group_asid,
2897     .free = mlx5_vdpa_free,
2898     .suspend = mlx5_vdpa_suspend,
2899 };
2900 
2901 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2902 {
2903     u16 hw_mtu;
2904     int err;
2905 
2906     err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2907     if (err)
2908         return err;
2909 
2910     *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2911     return 0;
2912 }
2913 
2914 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2915 {
2916     struct mlx5_vdpa_net_resources *res = &ndev->res;
2917     int err;
2918 
2919     if (res->valid) {
2920         mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2921         return -EEXIST;
2922     }
2923 
2924     err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2925     if (err)
2926         return err;
2927 
2928     err = create_tis(ndev);
2929     if (err)
2930         goto err_tis;
2931 
2932     res->valid = true;
2933 
2934     return 0;
2935 
2936 err_tis:
2937     mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2938     return err;
2939 }
2940 
2941 static void free_resources(struct mlx5_vdpa_net *ndev)
2942 {
2943     struct mlx5_vdpa_net_resources *res = &ndev->res;
2944 
2945     if (!res->valid)
2946         return;
2947 
2948     destroy_tis(ndev);
2949     mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2950     res->valid = false;
2951 }
2952 
2953 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2954 {
2955     struct mlx5_vdpa_virtqueue *mvq;
2956     int i;
2957 
2958     for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
2959         mvq = &ndev->vqs[i];
2960         memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2961         mvq->index = i;
2962         mvq->ndev = ndev;
2963         mvq->fwqp.fw = true;
2964         mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
2965     }
2966     for (; i < ndev->mvdev.max_vqs; i++) {
2967         mvq = &ndev->vqs[i];
2968         memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2969         mvq->index = i;
2970         mvq->ndev = ndev;
2971     }
2972 }
2973 
2974 struct mlx5_vdpa_mgmtdev {
2975     struct vdpa_mgmt_dev mgtdev;
2976     struct mlx5_adev *madev;
2977     struct mlx5_vdpa_net *ndev;
2978 };
2979 
2980 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2981 {
2982     u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2983     u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2984     int err;
2985 
2986     MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2987     MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2988     MLX5_SET(query_vport_state_in, in, vport_number, vport);
2989     if (vport)
2990         MLX5_SET(query_vport_state_in, in, other_vport, 1);
2991 
2992     err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2993     if (err)
2994         return 0;
2995 
2996     return MLX5_GET(query_vport_state_out, out, state);
2997 }
2998 
2999 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
3000 {
3001     if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
3002         VPORT_STATE_UP)
3003         return true;
3004 
3005     return false;
3006 }
3007 
3008 static void update_carrier(struct work_struct *work)
3009 {
3010     struct mlx5_vdpa_wq_ent *wqent;
3011     struct mlx5_vdpa_dev *mvdev;
3012     struct mlx5_vdpa_net *ndev;
3013 
3014     wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
3015     mvdev = wqent->mvdev;
3016     ndev = to_mlx5_vdpa_ndev(mvdev);
3017     if (get_link_state(mvdev))
3018         ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3019     else
3020         ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3021 
3022     if (ndev->config_cb.callback)
3023         ndev->config_cb.callback(ndev->config_cb.private);
3024 
3025     kfree(wqent);
3026 }
3027 
3028 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
3029 {
3030     struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
3031     struct mlx5_eqe *eqe = param;
3032     int ret = NOTIFY_DONE;
3033     struct mlx5_vdpa_wq_ent *wqent;
3034 
3035     if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
3036         switch (eqe->sub_type) {
3037         case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
3038         case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
3039             down_read(&ndev->reslock);
3040             if (!ndev->nb_registered) {
3041                 up_read(&ndev->reslock);
3042                 return NOTIFY_DONE;
3043             }
3044             wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
3045             if (!wqent) {
3046                 up_read(&ndev->reslock);
3047                 return NOTIFY_DONE;
3048             }
3049 
3050             wqent->mvdev = &ndev->mvdev;
3051             INIT_WORK(&wqent->work, update_carrier);
3052             queue_work(ndev->mvdev.wq, &wqent->work);
3053             up_read(&ndev->reslock);
3054             ret = NOTIFY_OK;
3055             break;
3056         default:
3057             return NOTIFY_DONE;
3058         }
3059         return ret;
3060     }
3061     return ret;
3062 }
3063 
3064 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3065 {
3066     int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3067     void *in;
3068     int err;
3069 
3070     in = kvzalloc(inlen, GFP_KERNEL);
3071     if (!in)
3072         return -ENOMEM;
3073 
3074     MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3075     MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3076          mtu + MLX5V_ETH_HARD_MTU);
3077     MLX5_SET(modify_nic_vport_context_in, in, opcode,
3078          MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3079 
3080     err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3081 
3082     kvfree(in);
3083     return err;
3084 }
3085 
3086 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3087                  const struct vdpa_dev_set_config *add_config)
3088 {
3089     struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3090     struct virtio_net_config *config;
3091     struct mlx5_core_dev *pfmdev;
3092     struct mlx5_vdpa_dev *mvdev;
3093     struct mlx5_vdpa_net *ndev;
3094     struct mlx5_core_dev *mdev;
3095     u32 max_vqs;
3096     u16 mtu;
3097     int err;
3098 
3099     if (mgtdev->ndev)
3100         return -ENOSPC;
3101 
3102     mdev = mgtdev->madev->mdev;
3103     if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3104         MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3105         dev_warn(mdev->device, "missing support for split virtqueues\n");
3106         return -EOPNOTSUPP;
3107     }
3108 
3109     max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3110             1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3111     if (max_vqs < 2) {
3112         dev_warn(mdev->device,
3113              "%d virtqueues are supported. At least 2 are required\n",
3114              max_vqs);
3115         return -EAGAIN;
3116     }
3117 
3118     if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3119         if (add_config->net.max_vq_pairs > max_vqs / 2)
3120             return -EINVAL;
3121         max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3122     } else {
3123         max_vqs = 2;
3124     }
3125 
3126     ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3127                  MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3128     if (IS_ERR(ndev))
3129         return PTR_ERR(ndev);
3130 
3131     ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
3132     ndev->mvdev.max_vqs = max_vqs;
3133     mvdev = &ndev->mvdev;
3134     mvdev->mdev = mdev;
3135 
3136     ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3137     ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3138     if (!ndev->vqs || !ndev->event_cbs) {
3139         err = -ENOMEM;
3140         goto err_alloc;
3141     }
3142 
3143     init_mvqs(ndev);
3144     init_rwsem(&ndev->reslock);
3145     config = &ndev->config;
3146 
3147     if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3148         err = config_func_mtu(mdev, add_config->net.mtu);
3149         if (err)
3150             goto err_alloc;
3151     }
3152 
3153     err = query_mtu(mdev, &mtu);
3154     if (err)
3155         goto err_alloc;
3156 
3157     ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3158 
3159     if (get_link_state(mvdev))
3160         ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3161     else
3162         ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3163 
3164     if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3165         memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3166     } else {
3167         err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3168         if (err)
3169             goto err_alloc;
3170     }
3171 
3172     if (!is_zero_ether_addr(config->mac)) {
3173         pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3174         err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3175         if (err)
3176             goto err_alloc;
3177 
3178         ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
3179     }
3180 
3181     config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3182     mvdev->vdev.dma_dev = &mdev->pdev->dev;
3183     err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3184     if (err)
3185         goto err_mpfs;
3186 
3187     if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3188         err = mlx5_vdpa_create_mr(mvdev, NULL);
3189         if (err)
3190             goto err_res;
3191     }
3192 
3193     err = alloc_resources(ndev);
3194     if (err)
3195         goto err_mr;
3196 
3197     ndev->cvq_ent.mvdev = mvdev;
3198     INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3199     mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3200     if (!mvdev->wq) {
3201         err = -ENOMEM;
3202         goto err_res2;
3203     }
3204 
3205     ndev->nb.notifier_call = event_handler;
3206     mlx5_notifier_register(mdev, &ndev->nb);
3207     ndev->nb_registered = true;
3208     mvdev->vdev.mdev = &mgtdev->mgtdev;
3209     err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3210     if (err)
3211         goto err_reg;
3212 
3213     mgtdev->ndev = ndev;
3214     return 0;
3215 
3216 err_reg:
3217     destroy_workqueue(mvdev->wq);
3218 err_res2:
3219     free_resources(ndev);
3220 err_mr:
3221     mlx5_vdpa_destroy_mr(mvdev);
3222 err_res:
3223     mlx5_vdpa_free_resources(&ndev->mvdev);
3224 err_mpfs:
3225     if (!is_zero_ether_addr(config->mac))
3226         mlx5_mpfs_del_mac(pfmdev, config->mac);
3227 err_alloc:
3228     put_device(&mvdev->vdev.dev);
3229     return err;
3230 }
3231 
3232 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3233 {
3234     struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3235     struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3236     struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3237     struct workqueue_struct *wq;
3238 
3239     if (ndev->nb_registered) {
3240         mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
3241         ndev->nb_registered = false;
3242     }
3243     wq = mvdev->wq;
3244     mvdev->wq = NULL;
3245     destroy_workqueue(wq);
3246     _vdpa_unregister_device(dev);
3247     mgtdev->ndev = NULL;
3248 }
3249 
3250 static const struct vdpa_mgmtdev_ops mdev_ops = {
3251     .dev_add = mlx5_vdpa_dev_add,
3252     .dev_del = mlx5_vdpa_dev_del,
3253 };
3254 
3255 static struct virtio_device_id id_table[] = {
3256     { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3257     { 0 },
3258 };
3259 
3260 static int mlx5v_probe(struct auxiliary_device *adev,
3261                const struct auxiliary_device_id *id)
3262 
3263 {
3264     struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3265     struct mlx5_core_dev *mdev = madev->mdev;
3266     struct mlx5_vdpa_mgmtdev *mgtdev;
3267     int err;
3268 
3269     mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3270     if (!mgtdev)
3271         return -ENOMEM;
3272 
3273     mgtdev->mgtdev.ops = &mdev_ops;
3274     mgtdev->mgtdev.device = mdev->device;
3275     mgtdev->mgtdev.id_table = id_table;
3276     mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3277                       BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3278                       BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
3279     mgtdev->mgtdev.max_supported_vqs =
3280         MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3281     mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3282     mgtdev->madev = madev;
3283 
3284     err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3285     if (err)
3286         goto reg_err;
3287 
3288     auxiliary_set_drvdata(adev, mgtdev);
3289 
3290     return 0;
3291 
3292 reg_err:
3293     kfree(mgtdev);
3294     return err;
3295 }
3296 
3297 static void mlx5v_remove(struct auxiliary_device *adev)
3298 {
3299     struct mlx5_vdpa_mgmtdev *mgtdev;
3300 
3301     mgtdev = auxiliary_get_drvdata(adev);
3302     vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3303     kfree(mgtdev);
3304 }
3305 
3306 static const struct auxiliary_device_id mlx5v_id_table[] = {
3307     { .name = MLX5_ADEV_NAME ".vnet", },
3308     {},
3309 };
3310 
3311 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3312 
3313 static struct auxiliary_driver mlx5v_driver = {
3314     .name = "vnet",
3315     .probe = mlx5v_probe,
3316     .remove = mlx5v_remove,
3317     .id_table = mlx5v_id_table,
3318 };
3319 
3320 module_auxiliary_driver(mlx5v_driver);