0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include "mlx5_ib.h"
0034
0035 struct mlx5_ib_gsi_wr {
0036 struct ib_cqe cqe;
0037 struct ib_wc wc;
0038 bool completed:1;
0039 };
0040
0041 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
0042 {
0043 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
0044 }
0045
0046
0047 static void generate_completions(struct mlx5_ib_qp *mqp)
0048 {
0049 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0050 struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
0051 struct mlx5_ib_gsi_wr *wr;
0052 u32 index;
0053
0054 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
0055 index++) {
0056 wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
0057
0058 if (!wr->completed)
0059 break;
0060
0061 WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
0062 wr->completed = false;
0063 }
0064
0065 gsi->outstanding_ci = index;
0066 }
0067
0068 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
0069 {
0070 struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
0071 struct mlx5_ib_gsi_wr *wr =
0072 container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
0073 struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
0074 u64 wr_id;
0075 unsigned long flags;
0076
0077 spin_lock_irqsave(&gsi->lock, flags);
0078 wr->completed = true;
0079 wr_id = wr->wc.wr_id;
0080 wr->wc = *wc;
0081 wr->wc.wr_id = wr_id;
0082 wr->wc.qp = &mqp->ibqp;
0083
0084 generate_completions(mqp);
0085 spin_unlock_irqrestore(&gsi->lock, flags);
0086 }
0087
0088 int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
0089 struct ib_qp_init_attr *attr)
0090 {
0091 struct mlx5_ib_dev *dev = to_mdev(pd->device);
0092 struct mlx5_ib_gsi_qp *gsi;
0093 struct ib_qp_init_attr hw_init_attr = *attr;
0094 const u8 port_num = attr->port_num;
0095 int num_qps = 0;
0096 int ret;
0097
0098 if (mlx5_ib_deth_sqpn_cap(dev)) {
0099 if (MLX5_CAP_GEN(dev->mdev,
0100 port_type) == MLX5_CAP_PORT_TYPE_IB)
0101 num_qps = pd->device->attrs.max_pkeys;
0102 else if (dev->lag_active)
0103 num_qps = dev->lag_ports;
0104 }
0105
0106 gsi = &mqp->gsi;
0107 gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
0108 if (!gsi->tx_qps)
0109 return -ENOMEM;
0110
0111 gsi->outstanding_wrs =
0112 kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
0113 GFP_KERNEL);
0114 if (!gsi->outstanding_wrs) {
0115 ret = -ENOMEM;
0116 goto err_free_tx;
0117 }
0118
0119 if (dev->devr.ports[port_num - 1].gsi) {
0120 mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
0121 port_num);
0122 ret = -EBUSY;
0123 goto err_free_wrs;
0124 }
0125 gsi->num_qps = num_qps;
0126 spin_lock_init(&gsi->lock);
0127
0128 gsi->cap = attr->cap;
0129 gsi->port_num = port_num;
0130
0131 gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
0132 IB_POLL_SOFTIRQ);
0133 if (IS_ERR(gsi->cq)) {
0134 mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
0135 PTR_ERR(gsi->cq));
0136 ret = PTR_ERR(gsi->cq);
0137 goto err_free_wrs;
0138 }
0139
0140 hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
0141 hw_init_attr.send_cq = gsi->cq;
0142 if (num_qps) {
0143 hw_init_attr.cap.max_send_wr = 0;
0144 hw_init_attr.cap.max_send_sge = 0;
0145 hw_init_attr.cap.max_inline_data = 0;
0146 }
0147
0148 gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
0149 if (IS_ERR(gsi->rx_qp)) {
0150 mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
0151 PTR_ERR(gsi->rx_qp));
0152 ret = PTR_ERR(gsi->rx_qp);
0153 goto err_destroy_cq;
0154 }
0155
0156 dev->devr.ports[attr->port_num - 1].gsi = gsi;
0157 return 0;
0158
0159 err_destroy_cq:
0160 ib_free_cq(gsi->cq);
0161 err_free_wrs:
0162 kfree(gsi->outstanding_wrs);
0163 err_free_tx:
0164 kfree(gsi->tx_qps);
0165 return ret;
0166 }
0167
0168 int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
0169 {
0170 struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
0171 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0172 const int port_num = gsi->port_num;
0173 int qp_index;
0174 int ret;
0175
0176 ret = ib_destroy_qp(gsi->rx_qp);
0177 if (ret) {
0178 mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
0179 ret);
0180 return ret;
0181 }
0182 dev->devr.ports[port_num - 1].gsi = NULL;
0183 gsi->rx_qp = NULL;
0184
0185 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
0186 if (!gsi->tx_qps[qp_index])
0187 continue;
0188 WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
0189 gsi->tx_qps[qp_index] = NULL;
0190 }
0191
0192 ib_free_cq(gsi->cq);
0193
0194 kfree(gsi->outstanding_wrs);
0195 kfree(gsi->tx_qps);
0196 return 0;
0197 }
0198
0199 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
0200 {
0201 struct ib_pd *pd = gsi->rx_qp->pd;
0202 struct ib_qp_init_attr init_attr = {
0203 .event_handler = gsi->rx_qp->event_handler,
0204 .qp_context = gsi->rx_qp->qp_context,
0205 .send_cq = gsi->cq,
0206 .recv_cq = gsi->rx_qp->recv_cq,
0207 .cap = {
0208 .max_send_wr = gsi->cap.max_send_wr,
0209 .max_send_sge = gsi->cap.max_send_sge,
0210 .max_inline_data = gsi->cap.max_inline_data,
0211 },
0212 .qp_type = IB_QPT_UD,
0213 .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
0214 };
0215
0216 return ib_create_qp(pd, &init_attr);
0217 }
0218
0219 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
0220 u16 pkey_index)
0221 {
0222 struct mlx5_ib_dev *dev = to_mdev(qp->device);
0223 struct ib_qp_attr attr;
0224 int mask;
0225 int ret;
0226
0227 mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
0228 attr.qp_state = IB_QPS_INIT;
0229 attr.pkey_index = pkey_index;
0230 attr.qkey = IB_QP1_QKEY;
0231 attr.port_num = gsi->port_num;
0232 ret = ib_modify_qp(qp, &attr, mask);
0233 if (ret) {
0234 mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
0235 qp->qp_num, ret);
0236 return ret;
0237 }
0238
0239 attr.qp_state = IB_QPS_RTR;
0240 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
0241 if (ret) {
0242 mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
0243 qp->qp_num, ret);
0244 return ret;
0245 }
0246
0247 attr.qp_state = IB_QPS_RTS;
0248 attr.sq_psn = 0;
0249 ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
0250 if (ret) {
0251 mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
0252 qp->qp_num, ret);
0253 return ret;
0254 }
0255
0256 return 0;
0257 }
0258
0259 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
0260 {
0261 struct ib_device *device = gsi->rx_qp->device;
0262 struct mlx5_ib_dev *dev = to_mdev(device);
0263 int pkey_index = qp_index;
0264 struct mlx5_ib_qp *mqp;
0265 struct ib_qp *qp;
0266 unsigned long flags;
0267 u16 pkey;
0268 int ret;
0269
0270 if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
0271 pkey_index = 0;
0272
0273 ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
0274 if (ret) {
0275 mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
0276 gsi->port_num, qp_index);
0277 return;
0278 }
0279
0280 if (!pkey) {
0281 mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
0282 gsi->port_num, qp_index);
0283 return;
0284 }
0285
0286 spin_lock_irqsave(&gsi->lock, flags);
0287 qp = gsi->tx_qps[qp_index];
0288 spin_unlock_irqrestore(&gsi->lock, flags);
0289 if (qp) {
0290 mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
0291 gsi->port_num, qp_index);
0292 return;
0293 }
0294
0295 qp = create_gsi_ud_qp(gsi);
0296 if (IS_ERR(qp)) {
0297 mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
0298 PTR_ERR(qp));
0299 return;
0300 }
0301
0302 mqp = to_mqp(qp);
0303 if (dev->lag_active)
0304 mqp->gsi_lag_port = qp_index + 1;
0305 ret = modify_to_rts(gsi, qp, pkey_index);
0306 if (ret)
0307 goto err_destroy_qp;
0308
0309 spin_lock_irqsave(&gsi->lock, flags);
0310 WARN_ON_ONCE(gsi->tx_qps[qp_index]);
0311 gsi->tx_qps[qp_index] = qp;
0312 spin_unlock_irqrestore(&gsi->lock, flags);
0313
0314 return;
0315
0316 err_destroy_qp:
0317 WARN_ON_ONCE(qp);
0318 }
0319
0320 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
0321 int attr_mask)
0322 {
0323 struct mlx5_ib_dev *dev = to_mdev(qp->device);
0324 struct mlx5_ib_qp *mqp = to_mqp(qp);
0325 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0326 u16 qp_index;
0327 int ret;
0328
0329 mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
0330
0331 ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
0332 if (ret) {
0333 mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
0334 return ret;
0335 }
0336
0337 if (to_mqp(gsi->rx_qp)->state != IB_QPS_RTS)
0338 return 0;
0339
0340 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
0341 setup_qp(gsi, qp_index);
0342 return 0;
0343 }
0344
0345 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
0346 int qp_attr_mask,
0347 struct ib_qp_init_attr *qp_init_attr)
0348 {
0349 struct mlx5_ib_qp *mqp = to_mqp(qp);
0350 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0351 int ret;
0352
0353 ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
0354 qp_init_attr->cap = gsi->cap;
0355 return ret;
0356 }
0357
0358
0359 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
0360 struct ib_ud_wr *wr, struct ib_wc *wc)
0361 {
0362 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0363 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
0364 struct mlx5_ib_gsi_wr *gsi_wr;
0365
0366 if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
0367 mlx5_ib_warn(dev, "no available GSI work request.\n");
0368 return -ENOMEM;
0369 }
0370
0371 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
0372 gsi->cap.max_send_wr];
0373 gsi->outstanding_pi++;
0374
0375 if (!wc) {
0376 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
0377 gsi_wr->wc.pkey_index = wr->pkey_index;
0378 gsi_wr->wc.wr_id = wr->wr.wr_id;
0379 } else {
0380 gsi_wr->wc = *wc;
0381 gsi_wr->completed = true;
0382 }
0383
0384 gsi_wr->cqe.done = &handle_single_completion;
0385 wr->wr.wr_cqe = &gsi_wr->cqe;
0386
0387 return 0;
0388 }
0389
0390
0391 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
0392 {
0393 struct ib_wc wc = {
0394 { .wr_id = wr->wr.wr_id },
0395 .status = IB_WC_SUCCESS,
0396 .opcode = IB_WC_SEND,
0397 .qp = &mqp->ibqp,
0398 };
0399 int ret;
0400
0401 ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
0402 if (ret)
0403 return ret;
0404
0405 generate_completions(mqp);
0406
0407 return 0;
0408 }
0409
0410
0411 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
0412 {
0413 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
0414 struct mlx5_ib_ah *ah = to_mah(wr->ah);
0415 int qp_index = wr->pkey_index;
0416
0417 if (!gsi->num_qps)
0418 return gsi->rx_qp;
0419
0420 if (dev->lag_active && ah->xmit_port)
0421 qp_index = ah->xmit_port - 1;
0422
0423 if (qp_index >= gsi->num_qps)
0424 return NULL;
0425
0426 return gsi->tx_qps[qp_index];
0427 }
0428
0429 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
0430 const struct ib_send_wr **bad_wr)
0431 {
0432 struct mlx5_ib_qp *mqp = to_mqp(qp);
0433 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0434 struct ib_qp *tx_qp;
0435 unsigned long flags;
0436 int ret;
0437
0438 for (; wr; wr = wr->next) {
0439 struct ib_ud_wr cur_wr = *ud_wr(wr);
0440
0441 cur_wr.wr.next = NULL;
0442
0443 spin_lock_irqsave(&gsi->lock, flags);
0444 tx_qp = get_tx_qp(gsi, &cur_wr);
0445 if (!tx_qp) {
0446 ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
0447 if (ret)
0448 goto err;
0449 spin_unlock_irqrestore(&gsi->lock, flags);
0450 continue;
0451 }
0452
0453 ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
0454 if (ret)
0455 goto err;
0456
0457 ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
0458 if (ret) {
0459
0460 gsi->outstanding_pi--;
0461 goto err;
0462 }
0463 spin_unlock_irqrestore(&gsi->lock, flags);
0464 }
0465
0466 return 0;
0467
0468 err:
0469 spin_unlock_irqrestore(&gsi->lock, flags);
0470 *bad_wr = wr;
0471 return ret;
0472 }
0473
0474 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
0475 const struct ib_recv_wr **bad_wr)
0476 {
0477 struct mlx5_ib_qp *mqp = to_mqp(qp);
0478 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
0479
0480 return ib_post_recv(gsi->rx_qp, wr, bad_wr);
0481 }
0482
0483 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
0484 {
0485 u16 qp_index;
0486
0487 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
0488 setup_qp(gsi, qp_index);
0489 }