0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/kref.h>
0034 #include <rdma/ib_umem.h>
0035 #include <rdma/ib_user_verbs.h>
0036 #include <rdma/ib_cache.h>
0037 #include "mlx5_ib.h"
0038 #include "srq.h"
0039 #include "qp.h"
0040
0041 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
0042 {
0043 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
0044
0045 ibcq->comp_handler(ibcq, ibcq->cq_context);
0046 }
0047
0048 static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
0049 {
0050 struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
0051 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
0052 struct ib_cq *ibcq = &cq->ibcq;
0053 struct ib_event event;
0054
0055 if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
0056 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
0057 type, mcq->cqn);
0058 return;
0059 }
0060
0061 if (ibcq->event_handler) {
0062 event.device = &dev->ib_dev;
0063 event.event = IB_EVENT_CQ_ERR;
0064 event.element.cq = ibcq;
0065 ibcq->event_handler(&event, ibcq->cq_context);
0066 }
0067 }
0068
0069 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
0070 {
0071 return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
0072 }
0073
0074 static u8 sw_ownership_bit(int n, int nent)
0075 {
0076 return (n & nent) ? 1 : 0;
0077 }
0078
0079 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
0080 {
0081 void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
0082 struct mlx5_cqe64 *cqe64;
0083
0084 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
0085
0086 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
0087 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
0088 return cqe;
0089 } else {
0090 return NULL;
0091 }
0092 }
0093
0094 static void *next_cqe_sw(struct mlx5_ib_cq *cq)
0095 {
0096 return get_sw_cqe(cq, cq->mcq.cons_index);
0097 }
0098
0099 static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
0100 {
0101 switch (wq->wr_data[idx]) {
0102 case MLX5_IB_WR_UMR:
0103 return 0;
0104
0105 case IB_WR_LOCAL_INV:
0106 return IB_WC_LOCAL_INV;
0107
0108 case IB_WR_REG_MR:
0109 return IB_WC_REG_MR;
0110
0111 default:
0112 pr_warn("unknown completion status\n");
0113 return 0;
0114 }
0115 }
0116
0117 static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
0118 struct mlx5_ib_wq *wq, int idx)
0119 {
0120 wc->wc_flags = 0;
0121 switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
0122 case MLX5_OPCODE_RDMA_WRITE_IMM:
0123 wc->wc_flags |= IB_WC_WITH_IMM;
0124 fallthrough;
0125 case MLX5_OPCODE_RDMA_WRITE:
0126 wc->opcode = IB_WC_RDMA_WRITE;
0127 break;
0128 case MLX5_OPCODE_SEND_IMM:
0129 wc->wc_flags |= IB_WC_WITH_IMM;
0130 fallthrough;
0131 case MLX5_OPCODE_SEND:
0132 case MLX5_OPCODE_SEND_INVAL:
0133 wc->opcode = IB_WC_SEND;
0134 break;
0135 case MLX5_OPCODE_RDMA_READ:
0136 wc->opcode = IB_WC_RDMA_READ;
0137 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
0138 break;
0139 case MLX5_OPCODE_ATOMIC_CS:
0140 wc->opcode = IB_WC_COMP_SWAP;
0141 wc->byte_len = 8;
0142 break;
0143 case MLX5_OPCODE_ATOMIC_FA:
0144 wc->opcode = IB_WC_FETCH_ADD;
0145 wc->byte_len = 8;
0146 break;
0147 case MLX5_OPCODE_ATOMIC_MASKED_CS:
0148 wc->opcode = IB_WC_MASKED_COMP_SWAP;
0149 wc->byte_len = 8;
0150 break;
0151 case MLX5_OPCODE_ATOMIC_MASKED_FA:
0152 wc->opcode = IB_WC_MASKED_FETCH_ADD;
0153 wc->byte_len = 8;
0154 break;
0155 case MLX5_OPCODE_UMR:
0156 wc->opcode = get_umr_comp(wq, idx);
0157 break;
0158 }
0159 }
0160
0161 enum {
0162 MLX5_GRH_IN_BUFFER = 1,
0163 MLX5_GRH_IN_CQE = 2,
0164 };
0165
0166 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
0167 struct mlx5_ib_qp *qp)
0168 {
0169 enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
0170 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
0171 struct mlx5_ib_srq *srq = NULL;
0172 struct mlx5_ib_wq *wq;
0173 u16 wqe_ctr;
0174 u8 roce_packet_type;
0175 bool vlan_present;
0176 u8 g;
0177
0178 if (qp->ibqp.srq || qp->ibqp.xrcd) {
0179 struct mlx5_core_srq *msrq = NULL;
0180
0181 if (qp->ibqp.xrcd) {
0182 msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
0183 if (msrq)
0184 srq = to_mibsrq(msrq);
0185 } else {
0186 srq = to_msrq(qp->ibqp.srq);
0187 }
0188 if (srq) {
0189 wqe_ctr = be16_to_cpu(cqe->wqe_counter);
0190 wc->wr_id = srq->wrid[wqe_ctr];
0191 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
0192 if (msrq)
0193 mlx5_core_res_put(&msrq->common);
0194 }
0195 } else {
0196 wq = &qp->rq;
0197 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
0198 ++wq->tail;
0199 }
0200 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
0201
0202 switch (get_cqe_opcode(cqe)) {
0203 case MLX5_CQE_RESP_WR_IMM:
0204 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
0205 wc->wc_flags = IB_WC_WITH_IMM;
0206 wc->ex.imm_data = cqe->immediate;
0207 break;
0208 case MLX5_CQE_RESP_SEND:
0209 wc->opcode = IB_WC_RECV;
0210 wc->wc_flags = IB_WC_IP_CSUM_OK;
0211 if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
0212 (cqe->hds_ip_ext & CQE_L4_OK))))
0213 wc->wc_flags = 0;
0214 break;
0215 case MLX5_CQE_RESP_SEND_IMM:
0216 wc->opcode = IB_WC_RECV;
0217 wc->wc_flags = IB_WC_WITH_IMM;
0218 wc->ex.imm_data = cqe->immediate;
0219 break;
0220 case MLX5_CQE_RESP_SEND_INV:
0221 wc->opcode = IB_WC_RECV;
0222 wc->wc_flags = IB_WC_WITH_INVALIDATE;
0223 wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
0224 break;
0225 }
0226 wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
0227 wc->dlid_path_bits = cqe->ml_path;
0228 g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
0229 wc->wc_flags |= g ? IB_WC_GRH : 0;
0230 if (is_qp1(qp->type)) {
0231 u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
0232
0233 ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
0234 &wc->pkey_index);
0235 } else {
0236 wc->pkey_index = 0;
0237 }
0238
0239 if (ll != IB_LINK_LAYER_ETHERNET) {
0240 wc->slid = be16_to_cpu(cqe->slid);
0241 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
0242 return;
0243 }
0244
0245 wc->slid = 0;
0246 vlan_present = cqe->l4_l3_hdr_type & 0x1;
0247 roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
0248 if (vlan_present) {
0249 wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
0250 wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
0251 wc->wc_flags |= IB_WC_WITH_VLAN;
0252 } else {
0253 wc->sl = 0;
0254 }
0255
0256 switch (roce_packet_type) {
0257 case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
0258 wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
0259 break;
0260 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
0261 wc->network_hdr_type = RDMA_NETWORK_IPV6;
0262 break;
0263 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
0264 wc->network_hdr_type = RDMA_NETWORK_IPV4;
0265 break;
0266 }
0267 wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
0268 }
0269
0270 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
0271 {
0272 mlx5_ib_warn(dev, "dump error cqe\n");
0273 mlx5_dump_err_cqe(dev->mdev, cqe);
0274 }
0275
0276 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
0277 struct mlx5_err_cqe *cqe,
0278 struct ib_wc *wc)
0279 {
0280 int dump = 1;
0281
0282 switch (cqe->syndrome) {
0283 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
0284 wc->status = IB_WC_LOC_LEN_ERR;
0285 break;
0286 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
0287 wc->status = IB_WC_LOC_QP_OP_ERR;
0288 break;
0289 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
0290 wc->status = IB_WC_LOC_PROT_ERR;
0291 break;
0292 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
0293 dump = 0;
0294 wc->status = IB_WC_WR_FLUSH_ERR;
0295 break;
0296 case MLX5_CQE_SYNDROME_MW_BIND_ERR:
0297 wc->status = IB_WC_MW_BIND_ERR;
0298 break;
0299 case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
0300 wc->status = IB_WC_BAD_RESP_ERR;
0301 break;
0302 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
0303 wc->status = IB_WC_LOC_ACCESS_ERR;
0304 break;
0305 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
0306 wc->status = IB_WC_REM_INV_REQ_ERR;
0307 break;
0308 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
0309 wc->status = IB_WC_REM_ACCESS_ERR;
0310 break;
0311 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
0312 wc->status = IB_WC_REM_OP_ERR;
0313 break;
0314 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
0315 wc->status = IB_WC_RETRY_EXC_ERR;
0316 dump = 0;
0317 break;
0318 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
0319 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
0320 dump = 0;
0321 break;
0322 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
0323 wc->status = IB_WC_REM_ABORT_ERR;
0324 break;
0325 default:
0326 wc->status = IB_WC_GENERAL_ERR;
0327 break;
0328 }
0329
0330 wc->vendor_err = cqe->vendor_err_synd;
0331 if (dump) {
0332 mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
0333 ib_wc_status_msg(wc->status));
0334 dump_cqe(dev, cqe);
0335 }
0336 }
0337
0338 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
0339 u16 tail, u16 head)
0340 {
0341 u16 idx;
0342
0343 do {
0344 idx = tail & (qp->sq.wqe_cnt - 1);
0345 if (idx == head)
0346 break;
0347
0348 tail = qp->sq.w_list[idx].next;
0349 } while (1);
0350 tail = qp->sq.w_list[idx].next;
0351 qp->sq.last_poll = tail;
0352 }
0353
0354 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
0355 {
0356 mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
0357 }
0358
0359 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
0360 struct ib_sig_err *item)
0361 {
0362 u16 syndrome = be16_to_cpu(cqe->syndrome);
0363
0364 #define GUARD_ERR (1 << 13)
0365 #define APPTAG_ERR (1 << 12)
0366 #define REFTAG_ERR (1 << 11)
0367
0368 if (syndrome & GUARD_ERR) {
0369 item->err_type = IB_SIG_BAD_GUARD;
0370 item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
0371 item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
0372 } else
0373 if (syndrome & REFTAG_ERR) {
0374 item->err_type = IB_SIG_BAD_REFTAG;
0375 item->expected = be32_to_cpu(cqe->expected_reftag);
0376 item->actual = be32_to_cpu(cqe->actual_reftag);
0377 } else
0378 if (syndrome & APPTAG_ERR) {
0379 item->err_type = IB_SIG_BAD_APPTAG;
0380 item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
0381 item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
0382 } else {
0383 pr_err("Got signature completion error with bad syndrome %04x\n",
0384 syndrome);
0385 }
0386
0387 item->sig_err_offset = be64_to_cpu(cqe->err_offset);
0388 item->key = be32_to_cpu(cqe->mkey);
0389 }
0390
0391 static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
0392 int *npolled, bool is_send)
0393 {
0394 struct mlx5_ib_wq *wq;
0395 unsigned int cur;
0396 int np;
0397 int i;
0398
0399 wq = (is_send) ? &qp->sq : &qp->rq;
0400 cur = wq->head - wq->tail;
0401 np = *npolled;
0402
0403 if (cur == 0)
0404 return;
0405
0406 for (i = 0; i < cur && np < num_entries; i++) {
0407 unsigned int idx;
0408
0409 idx = (is_send) ? wq->last_poll : wq->tail;
0410 idx &= (wq->wqe_cnt - 1);
0411 wc->wr_id = wq->wrid[idx];
0412 wc->status = IB_WC_WR_FLUSH_ERR;
0413 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
0414 wq->tail++;
0415 if (is_send)
0416 wq->last_poll = wq->w_list[idx].next;
0417 np++;
0418 wc->qp = &qp->ibqp;
0419 wc++;
0420 }
0421 *npolled = np;
0422 }
0423
0424 static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
0425 struct ib_wc *wc, int *npolled)
0426 {
0427 struct mlx5_ib_qp *qp;
0428
0429 *npolled = 0;
0430
0431 list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
0432 sw_comp(qp, num_entries, wc + *npolled, npolled, true);
0433 if (*npolled >= num_entries)
0434 return;
0435 }
0436
0437 list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
0438 sw_comp(qp, num_entries, wc + *npolled, npolled, false);
0439 if (*npolled >= num_entries)
0440 return;
0441 }
0442 }
0443
0444 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
0445 struct mlx5_ib_qp **cur_qp,
0446 struct ib_wc *wc)
0447 {
0448 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
0449 struct mlx5_err_cqe *err_cqe;
0450 struct mlx5_cqe64 *cqe64;
0451 struct mlx5_core_qp *mqp;
0452 struct mlx5_ib_wq *wq;
0453 uint8_t opcode;
0454 uint32_t qpn;
0455 u16 wqe_ctr;
0456 void *cqe;
0457 int idx;
0458
0459 repoll:
0460 cqe = next_cqe_sw(cq);
0461 if (!cqe)
0462 return -EAGAIN;
0463
0464 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
0465
0466 ++cq->mcq.cons_index;
0467
0468
0469
0470
0471 rmb();
0472
0473 opcode = get_cqe_opcode(cqe64);
0474 if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
0475 if (likely(cq->resize_buf)) {
0476 free_cq_buf(dev, &cq->buf);
0477 cq->buf = *cq->resize_buf;
0478 kfree(cq->resize_buf);
0479 cq->resize_buf = NULL;
0480 goto repoll;
0481 } else {
0482 mlx5_ib_warn(dev, "unexpected resize cqe\n");
0483 }
0484 }
0485
0486 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
0487 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
0488
0489
0490
0491
0492 mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
0493 *cur_qp = to_mibqp(mqp);
0494 }
0495
0496 wc->qp = &(*cur_qp)->ibqp;
0497 switch (opcode) {
0498 case MLX5_CQE_REQ:
0499 wq = &(*cur_qp)->sq;
0500 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
0501 idx = wqe_ctr & (wq->wqe_cnt - 1);
0502 handle_good_req(wc, cqe64, wq, idx);
0503 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
0504 wc->wr_id = wq->wrid[idx];
0505 wq->tail = wq->wqe_head[idx] + 1;
0506 wc->status = IB_WC_SUCCESS;
0507 break;
0508 case MLX5_CQE_RESP_WR_IMM:
0509 case MLX5_CQE_RESP_SEND:
0510 case MLX5_CQE_RESP_SEND_IMM:
0511 case MLX5_CQE_RESP_SEND_INV:
0512 handle_responder(wc, cqe64, *cur_qp);
0513 wc->status = IB_WC_SUCCESS;
0514 break;
0515 case MLX5_CQE_RESIZE_CQ:
0516 break;
0517 case MLX5_CQE_REQ_ERR:
0518 case MLX5_CQE_RESP_ERR:
0519 err_cqe = (struct mlx5_err_cqe *)cqe64;
0520 mlx5_handle_error_cqe(dev, err_cqe, wc);
0521 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
0522 opcode == MLX5_CQE_REQ_ERR ?
0523 "Requestor" : "Responder", cq->mcq.cqn);
0524 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
0525 err_cqe->syndrome, err_cqe->vendor_err_synd);
0526 if (wc->status != IB_WC_WR_FLUSH_ERR &&
0527 (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
0528 dev->umrc.state = MLX5_UMR_STATE_RECOVER;
0529
0530 if (opcode == MLX5_CQE_REQ_ERR) {
0531 wq = &(*cur_qp)->sq;
0532 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
0533 idx = wqe_ctr & (wq->wqe_cnt - 1);
0534 wc->wr_id = wq->wrid[idx];
0535 wq->tail = wq->wqe_head[idx] + 1;
0536 } else {
0537 struct mlx5_ib_srq *srq;
0538
0539 if ((*cur_qp)->ibqp.srq) {
0540 srq = to_msrq((*cur_qp)->ibqp.srq);
0541 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
0542 wc->wr_id = srq->wrid[wqe_ctr];
0543 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
0544 } else {
0545 wq = &(*cur_qp)->rq;
0546 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
0547 ++wq->tail;
0548 }
0549 }
0550 break;
0551 case MLX5_CQE_SIG_ERR: {
0552 struct mlx5_sig_err_cqe *sig_err_cqe =
0553 (struct mlx5_sig_err_cqe *)cqe64;
0554 struct mlx5_core_sig_ctx *sig;
0555
0556 xa_lock(&dev->sig_mrs);
0557 sig = xa_load(&dev->sig_mrs,
0558 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
0559 get_sig_err_item(sig_err_cqe, &sig->err_item);
0560 sig->sig_err_exists = true;
0561 sig->sigerr_count++;
0562
0563 mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
0564 cq->mcq.cqn, sig->err_item.key,
0565 sig->err_item.err_type,
0566 sig->err_item.sig_err_offset,
0567 sig->err_item.expected,
0568 sig->err_item.actual);
0569
0570 xa_unlock(&dev->sig_mrs);
0571 goto repoll;
0572 }
0573 }
0574
0575 return 0;
0576 }
0577
0578 static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
0579 struct ib_wc *wc, bool is_fatal_err)
0580 {
0581 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
0582 struct mlx5_ib_wc *soft_wc, *next;
0583 int npolled = 0;
0584
0585 list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
0586 if (npolled >= num_entries)
0587 break;
0588
0589 mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
0590 cq->mcq.cqn);
0591
0592 if (unlikely(is_fatal_err)) {
0593 soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
0594 soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
0595 }
0596 wc[npolled++] = soft_wc->wc;
0597 list_del(&soft_wc->list);
0598 kfree(soft_wc);
0599 }
0600
0601 return npolled;
0602 }
0603
0604 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
0605 {
0606 struct mlx5_ib_cq *cq = to_mcq(ibcq);
0607 struct mlx5_ib_qp *cur_qp = NULL;
0608 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
0609 struct mlx5_core_dev *mdev = dev->mdev;
0610 unsigned long flags;
0611 int soft_polled = 0;
0612 int npolled;
0613
0614 spin_lock_irqsave(&cq->lock, flags);
0615 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
0616
0617 if (unlikely(!list_empty(&cq->wc_list)))
0618 soft_polled = poll_soft_wc(cq, num_entries, wc, true);
0619
0620 mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
0621 wc + soft_polled, &npolled);
0622 goto out;
0623 }
0624
0625 if (unlikely(!list_empty(&cq->wc_list)))
0626 soft_polled = poll_soft_wc(cq, num_entries, wc, false);
0627
0628 for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
0629 if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
0630 break;
0631 }
0632
0633 if (npolled)
0634 mlx5_cq_set_ci(&cq->mcq);
0635 out:
0636 spin_unlock_irqrestore(&cq->lock, flags);
0637
0638 return soft_polled + npolled;
0639 }
0640
0641 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
0642 {
0643 struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
0644 struct mlx5_ib_cq *cq = to_mcq(ibcq);
0645 void __iomem *uar_page = mdev->priv.uar->map;
0646 unsigned long irq_flags;
0647 int ret = 0;
0648
0649 spin_lock_irqsave(&cq->lock, irq_flags);
0650 if (cq->notify_flags != IB_CQ_NEXT_COMP)
0651 cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
0652
0653 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
0654 ret = 1;
0655 spin_unlock_irqrestore(&cq->lock, irq_flags);
0656
0657 mlx5_cq_arm(&cq->mcq,
0658 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
0659 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
0660 uar_page, to_mcq(ibcq)->mcq.cons_index);
0661
0662 return ret;
0663 }
0664
0665 static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
0666 struct mlx5_ib_cq_buf *buf,
0667 int nent,
0668 int cqe_size)
0669 {
0670 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
0671 u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
0672 u8 log_wq_sz = ilog2(cqe_size);
0673 int err;
0674
0675 err = mlx5_frag_buf_alloc_node(dev->mdev,
0676 nent * cqe_size,
0677 frag_buf,
0678 dev->mdev->priv.numa_node);
0679 if (err)
0680 return err;
0681
0682 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
0683
0684 buf->cqe_size = cqe_size;
0685 buf->nent = nent;
0686
0687 return 0;
0688 }
0689
0690 enum {
0691 MLX5_CQE_RES_FORMAT_HASH = 0,
0692 MLX5_CQE_RES_FORMAT_CSUM = 1,
0693 MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
0694 };
0695
0696 static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
0697 {
0698 switch (format) {
0699 case MLX5_IB_CQE_RES_FORMAT_HASH:
0700 return MLX5_CQE_RES_FORMAT_HASH;
0701 case MLX5_IB_CQE_RES_FORMAT_CSUM:
0702 return MLX5_CQE_RES_FORMAT_CSUM;
0703 case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
0704 if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
0705 return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
0706 return -EOPNOTSUPP;
0707 default:
0708 return -EINVAL;
0709 }
0710 }
0711
0712 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
0713 struct mlx5_ib_cq *cq, int entries, u32 **cqb,
0714 int *cqe_size, int *index, int *inlen)
0715 {
0716 struct mlx5_ib_create_cq ucmd = {};
0717 unsigned long page_size;
0718 unsigned int page_offset_quantized;
0719 size_t ucmdlen;
0720 __be64 *pas;
0721 int ncont;
0722 void *cqc;
0723 int err;
0724 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
0725 udata, struct mlx5_ib_ucontext, ibucontext);
0726
0727 ucmdlen = min(udata->inlen, sizeof(ucmd));
0728 if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
0729 return -EINVAL;
0730
0731 if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
0732 return -EFAULT;
0733
0734 if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
0735 MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX |
0736 MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)))
0737 return -EINVAL;
0738
0739 if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
0740 ucmd.reserved0 || ucmd.reserved1)
0741 return -EINVAL;
0742
0743 *cqe_size = ucmd.cqe_size;
0744
0745 cq->buf.umem =
0746 ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
0747 entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
0748 if (IS_ERR(cq->buf.umem)) {
0749 err = PTR_ERR(cq->buf.umem);
0750 return err;
0751 }
0752
0753 page_size = mlx5_umem_find_best_cq_quantized_pgoff(
0754 cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
0755 page_offset, 64, &page_offset_quantized);
0756 if (!page_size) {
0757 err = -EINVAL;
0758 goto err_umem;
0759 }
0760
0761 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
0762 if (err)
0763 goto err_umem;
0764
0765 ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
0766 mlx5_ib_dbg(
0767 dev,
0768 "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
0769 ucmd.buf_addr, entries * ucmd.cqe_size,
0770 ib_umem_num_pages(cq->buf.umem), page_size, ncont);
0771
0772 *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
0773 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
0774 *cqb = kvzalloc(*inlen, GFP_KERNEL);
0775 if (!*cqb) {
0776 err = -ENOMEM;
0777 goto err_db;
0778 }
0779
0780 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
0781 mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
0782
0783 cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
0784 MLX5_SET(cqc, cqc, log_page_size,
0785 order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
0786 MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
0787
0788 if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
0789 *index = ucmd.uar_page_index;
0790 } else if (context->bfregi.lib_uar_dyn) {
0791 err = -EINVAL;
0792 goto err_cqb;
0793 } else {
0794 *index = context->bfregi.sys_pages[0];
0795 }
0796
0797 if (ucmd.cqe_comp_en == 1) {
0798 int mini_cqe_format;
0799
0800 if (!((*cqe_size == 128 &&
0801 MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
0802 (*cqe_size == 64 &&
0803 MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
0804 err = -EOPNOTSUPP;
0805 mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
0806 *cqe_size);
0807 goto err_cqb;
0808 }
0809
0810 mini_cqe_format =
0811 mini_cqe_res_format_to_hw(dev,
0812 ucmd.cqe_comp_res_format);
0813 if (mini_cqe_format < 0) {
0814 err = mini_cqe_format;
0815 mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
0816 ucmd.cqe_comp_res_format, err);
0817 goto err_cqb;
0818 }
0819
0820 MLX5_SET(cqc, cqc, cqe_comp_en, 1);
0821 MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
0822 }
0823
0824 if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
0825 if (*cqe_size != 128 ||
0826 !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
0827 err = -EOPNOTSUPP;
0828 mlx5_ib_warn(dev,
0829 "CQE padding is not supported for CQE size of %dB!\n",
0830 *cqe_size);
0831 goto err_cqb;
0832 }
0833
0834 cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
0835 }
0836
0837 if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)
0838 cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS;
0839
0840 MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
0841 return 0;
0842
0843 err_cqb:
0844 kvfree(*cqb);
0845
0846 err_db:
0847 mlx5_ib_db_unmap_user(context, &cq->db);
0848
0849 err_umem:
0850 ib_umem_release(cq->buf.umem);
0851 return err;
0852 }
0853
0854 static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
0855 {
0856 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
0857 udata, struct mlx5_ib_ucontext, ibucontext);
0858
0859 mlx5_ib_db_unmap_user(context, &cq->db);
0860 ib_umem_release(cq->buf.umem);
0861 }
0862
0863 static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
0864 {
0865 int i;
0866 void *cqe;
0867 struct mlx5_cqe64 *cqe64;
0868
0869 for (i = 0; i < buf->nent; i++) {
0870 cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
0871 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
0872 cqe64->op_own = MLX5_CQE_INVALID << 4;
0873 }
0874 }
0875
0876 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
0877 int entries, int cqe_size,
0878 u32 **cqb, int *index, int *inlen)
0879 {
0880 __be64 *pas;
0881 void *cqc;
0882 int err;
0883
0884 err = mlx5_db_alloc(dev->mdev, &cq->db);
0885 if (err)
0886 return err;
0887
0888 cq->mcq.set_ci_db = cq->db.db;
0889 cq->mcq.arm_db = cq->db.db + 1;
0890 cq->mcq.cqe_sz = cqe_size;
0891
0892 err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
0893 if (err)
0894 goto err_db;
0895
0896 init_cq_frag_buf(&cq->buf);
0897
0898 *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
0899 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
0900 cq->buf.frag_buf.npages;
0901 *cqb = kvzalloc(*inlen, GFP_KERNEL);
0902 if (!*cqb) {
0903 err = -ENOMEM;
0904 goto err_buf;
0905 }
0906
0907 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
0908 mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
0909
0910 cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
0911 MLX5_SET(cqc, cqc, log_page_size,
0912 cq->buf.frag_buf.page_shift -
0913 MLX5_ADAPTER_PAGE_SHIFT);
0914
0915 *index = dev->mdev->priv.uar->index;
0916
0917 return 0;
0918
0919 err_buf:
0920 free_cq_buf(dev, &cq->buf);
0921
0922 err_db:
0923 mlx5_db_free(dev->mdev, &cq->db);
0924 return err;
0925 }
0926
0927 static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
0928 {
0929 free_cq_buf(dev, &cq->buf);
0930 mlx5_db_free(dev->mdev, &cq->db);
0931 }
0932
0933 static void notify_soft_wc_handler(struct work_struct *work)
0934 {
0935 struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
0936 notify_work);
0937
0938 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
0939 }
0940
0941 int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0942 struct ib_udata *udata)
0943 {
0944 struct ib_device *ibdev = ibcq->device;
0945 int entries = attr->cqe;
0946 int vector = attr->comp_vector;
0947 struct mlx5_ib_dev *dev = to_mdev(ibdev);
0948 struct mlx5_ib_cq *cq = to_mcq(ibcq);
0949 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
0950 int index;
0951 int inlen;
0952 u32 *cqb = NULL;
0953 void *cqc;
0954 int cqe_size;
0955 int eqn;
0956 int err;
0957
0958 if (entries < 0 ||
0959 (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
0960 return -EINVAL;
0961
0962 if (check_cq_create_flags(attr->flags))
0963 return -EOPNOTSUPP;
0964
0965 entries = roundup_pow_of_two(entries + 1);
0966 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
0967 return -EINVAL;
0968
0969 cq->ibcq.cqe = entries - 1;
0970 mutex_init(&cq->resize_mutex);
0971 spin_lock_init(&cq->lock);
0972 cq->resize_buf = NULL;
0973 cq->resize_umem = NULL;
0974 cq->create_flags = attr->flags;
0975 INIT_LIST_HEAD(&cq->list_send_qp);
0976 INIT_LIST_HEAD(&cq->list_recv_qp);
0977
0978 if (udata) {
0979 err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
0980 &index, &inlen);
0981 if (err)
0982 return err;
0983 } else {
0984 cqe_size = cache_line_size() == 128 ? 128 : 64;
0985 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
0986 &index, &inlen);
0987 if (err)
0988 return err;
0989
0990 INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
0991 }
0992
0993 err = mlx5_vector2eqn(dev->mdev, vector, &eqn);
0994 if (err)
0995 goto err_cqb;
0996
0997 cq->cqe_size = cqe_size;
0998
0999 cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
1000 MLX5_SET(cqc, cqc, cqe_sz,
1001 cqe_sz_to_mlx_sz(cqe_size,
1002 cq->private_flags &
1003 MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
1004 MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
1005 MLX5_SET(cqc, cqc, uar_page, index);
1006 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1007 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
1008 if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
1009 MLX5_SET(cqc, cqc, oi, 1);
1010
1011 err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
1012 if (err)
1013 goto err_cqb;
1014
1015 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
1016 if (udata)
1017 cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
1018 else
1019 cq->mcq.comp = mlx5_ib_cq_comp;
1020 cq->mcq.event = mlx5_ib_cq_event;
1021
1022 INIT_LIST_HEAD(&cq->wc_list);
1023
1024 if (udata)
1025 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
1026 err = -EFAULT;
1027 goto err_cmd;
1028 }
1029
1030
1031 kvfree(cqb);
1032 return 0;
1033
1034 err_cmd:
1035 mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
1036
1037 err_cqb:
1038 kvfree(cqb);
1039 if (udata)
1040 destroy_cq_user(cq, udata);
1041 else
1042 destroy_cq_kernel(dev, cq);
1043 return err;
1044 }
1045
1046 int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
1047 {
1048 struct mlx5_ib_dev *dev = to_mdev(cq->device);
1049 struct mlx5_ib_cq *mcq = to_mcq(cq);
1050 int ret;
1051
1052 ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
1053 if (ret)
1054 return ret;
1055
1056 if (udata)
1057 destroy_cq_user(mcq, udata);
1058 else
1059 destroy_cq_kernel(dev, mcq);
1060 return 0;
1061 }
1062
1063 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
1064 {
1065 return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
1066 }
1067
1068 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
1069 {
1070 struct mlx5_cqe64 *cqe64, *dest64;
1071 void *cqe, *dest;
1072 u32 prod_index;
1073 int nfreed = 0;
1074 u8 owner_bit;
1075
1076 if (!cq)
1077 return;
1078
1079
1080
1081
1082
1083
1084
1085 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
1086 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
1087 break;
1088
1089
1090
1091
1092 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
1093 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
1094 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
1095 if (is_equal_rsn(cqe64, rsn)) {
1096 if (srq && (ntohl(cqe64->srqn) & 0xffffff))
1097 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
1098 ++nfreed;
1099 } else if (nfreed) {
1100 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
1101 dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
1102 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1103 memcpy(dest, cqe, cq->mcq.cqe_sz);
1104 dest64->op_own = owner_bit |
1105 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1106 }
1107 }
1108
1109 if (nfreed) {
1110 cq->mcq.cons_index += nfreed;
1111
1112
1113
1114 wmb();
1115 mlx5_cq_set_ci(&cq->mcq);
1116 }
1117 }
1118
1119 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
1120 {
1121 if (!cq)
1122 return;
1123
1124 spin_lock_irq(&cq->lock);
1125 __mlx5_ib_cq_clean(cq, qpn, srq);
1126 spin_unlock_irq(&cq->lock);
1127 }
1128
1129 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1130 {
1131 struct mlx5_ib_dev *dev = to_mdev(cq->device);
1132 struct mlx5_ib_cq *mcq = to_mcq(cq);
1133 int err;
1134
1135 if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
1136 return -EOPNOTSUPP;
1137
1138 if (cq_period > MLX5_MAX_CQ_PERIOD)
1139 return -EINVAL;
1140
1141 err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
1142 cq_period, cq_count);
1143 if (err)
1144 mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
1145
1146 return err;
1147 }
1148
1149 static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1150 int entries, struct ib_udata *udata,
1151 int *cqe_size)
1152 {
1153 struct mlx5_ib_resize_cq ucmd;
1154 struct ib_umem *umem;
1155 int err;
1156
1157 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
1158 if (err)
1159 return err;
1160
1161 if (ucmd.reserved0 || ucmd.reserved1)
1162 return -EINVAL;
1163
1164
1165 if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
1166 return -EINVAL;
1167
1168 umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
1169 (size_t)ucmd.cqe_size * entries,
1170 IB_ACCESS_LOCAL_WRITE);
1171 if (IS_ERR(umem)) {
1172 err = PTR_ERR(umem);
1173 return err;
1174 }
1175
1176 cq->resize_umem = umem;
1177 *cqe_size = ucmd.cqe_size;
1178
1179 return 0;
1180 }
1181
1182 static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1183 int entries, int cqe_size)
1184 {
1185 int err;
1186
1187 cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
1188 if (!cq->resize_buf)
1189 return -ENOMEM;
1190
1191 err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
1192 if (err)
1193 goto ex;
1194
1195 init_cq_frag_buf(cq->resize_buf);
1196
1197 return 0;
1198
1199 ex:
1200 kfree(cq->resize_buf);
1201 return err;
1202 }
1203
1204 static int copy_resize_cqes(struct mlx5_ib_cq *cq)
1205 {
1206 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
1207 struct mlx5_cqe64 *scqe64;
1208 struct mlx5_cqe64 *dcqe64;
1209 void *start_cqe;
1210 void *scqe;
1211 void *dcqe;
1212 int ssize;
1213 int dsize;
1214 int i;
1215 u8 sw_own;
1216
1217 ssize = cq->buf.cqe_size;
1218 dsize = cq->resize_buf->cqe_size;
1219 if (ssize != dsize) {
1220 mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
1221 return -EINVAL;
1222 }
1223
1224 i = cq->mcq.cons_index;
1225 scqe = get_sw_cqe(cq, i);
1226 scqe64 = ssize == 64 ? scqe : scqe + 64;
1227 start_cqe = scqe;
1228 if (!scqe) {
1229 mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1230 return -EINVAL;
1231 }
1232
1233 while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
1234 dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
1235 (i + 1) & cq->resize_buf->nent);
1236 dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1237 sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
1238 memcpy(dcqe, scqe, dsize);
1239 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1240
1241 ++i;
1242 scqe = get_sw_cqe(cq, i);
1243 scqe64 = ssize == 64 ? scqe : scqe + 64;
1244 if (!scqe) {
1245 mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1246 return -EINVAL;
1247 }
1248
1249 if (scqe == start_cqe) {
1250 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1251 cq->mcq.cqn);
1252 return -ENOMEM;
1253 }
1254 }
1255 ++cq->mcq.cons_index;
1256 return 0;
1257 }
1258
1259 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
1260 {
1261 struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
1262 struct mlx5_ib_cq *cq = to_mcq(ibcq);
1263 void *cqc;
1264 u32 *in;
1265 int err;
1266 int npas;
1267 __be64 *pas;
1268 unsigned int page_offset_quantized = 0;
1269 unsigned int page_shift;
1270 int inlen;
1271 int cqe_size;
1272 unsigned long flags;
1273
1274 if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
1275 pr_info("Firmware does not support resize CQ\n");
1276 return -ENOSYS;
1277 }
1278
1279 if (entries < 1 ||
1280 entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
1281 mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
1282 entries,
1283 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
1284 return -EINVAL;
1285 }
1286
1287 entries = roundup_pow_of_two(entries + 1);
1288 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
1289 return -EINVAL;
1290
1291 if (entries == ibcq->cqe + 1)
1292 return 0;
1293
1294 mutex_lock(&cq->resize_mutex);
1295 if (udata) {
1296 unsigned long page_size;
1297
1298 err = resize_user(dev, cq, entries, udata, &cqe_size);
1299 if (err)
1300 goto ex;
1301
1302 page_size = mlx5_umem_find_best_cq_quantized_pgoff(
1303 cq->resize_umem, cqc, log_page_size,
1304 MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
1305 &page_offset_quantized);
1306 if (!page_size) {
1307 err = -EINVAL;
1308 goto ex_resize;
1309 }
1310 npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
1311 page_shift = order_base_2(page_size);
1312 } else {
1313 struct mlx5_frag_buf *frag_buf;
1314
1315 cqe_size = 64;
1316 err = resize_kernel(dev, cq, entries, cqe_size);
1317 if (err)
1318 goto ex;
1319 frag_buf = &cq->resize_buf->frag_buf;
1320 npas = frag_buf->npages;
1321 page_shift = frag_buf->page_shift;
1322 }
1323
1324 inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
1325 MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
1326
1327 in = kvzalloc(inlen, GFP_KERNEL);
1328 if (!in) {
1329 err = -ENOMEM;
1330 goto ex_resize;
1331 }
1332
1333 pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
1334 if (udata)
1335 mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
1336 0);
1337 else
1338 mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
1339
1340 MLX5_SET(modify_cq_in, in,
1341 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
1342 MLX5_MODIFY_CQ_MASK_LOG_SIZE |
1343 MLX5_MODIFY_CQ_MASK_PG_OFFSET |
1344 MLX5_MODIFY_CQ_MASK_PG_SIZE);
1345
1346 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
1347
1348 MLX5_SET(cqc, cqc, log_page_size,
1349 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1350 MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
1351 MLX5_SET(cqc, cqc, cqe_sz,
1352 cqe_sz_to_mlx_sz(cqe_size,
1353 cq->private_flags &
1354 MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
1355 MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
1356
1357 MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
1358 MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
1359
1360 err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
1361 if (err)
1362 goto ex_alloc;
1363
1364 if (udata) {
1365 cq->ibcq.cqe = entries - 1;
1366 ib_umem_release(cq->buf.umem);
1367 cq->buf.umem = cq->resize_umem;
1368 cq->resize_umem = NULL;
1369 } else {
1370 struct mlx5_ib_cq_buf tbuf;
1371 int resized = 0;
1372
1373 spin_lock_irqsave(&cq->lock, flags);
1374 if (cq->resize_buf) {
1375 err = copy_resize_cqes(cq);
1376 if (!err) {
1377 tbuf = cq->buf;
1378 cq->buf = *cq->resize_buf;
1379 kfree(cq->resize_buf);
1380 cq->resize_buf = NULL;
1381 resized = 1;
1382 }
1383 }
1384 cq->ibcq.cqe = entries - 1;
1385 spin_unlock_irqrestore(&cq->lock, flags);
1386 if (resized)
1387 free_cq_buf(dev, &tbuf);
1388 }
1389 mutex_unlock(&cq->resize_mutex);
1390
1391 kvfree(in);
1392 return 0;
1393
1394 ex_alloc:
1395 kvfree(in);
1396
1397 ex_resize:
1398 ib_umem_release(cq->resize_umem);
1399 if (!udata) {
1400 free_cq_buf(dev, cq->resize_buf);
1401 cq->resize_buf = NULL;
1402 }
1403 ex:
1404 mutex_unlock(&cq->resize_mutex);
1405 return err;
1406 }
1407
1408 int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
1409 {
1410 struct mlx5_ib_cq *cq;
1411
1412 if (!ibcq)
1413 return 128;
1414
1415 cq = to_mcq(ibcq);
1416 return cq->cqe_size;
1417 }
1418
1419
1420 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
1421 {
1422 struct mlx5_ib_wc *soft_wc;
1423 struct mlx5_ib_cq *cq = to_mcq(ibcq);
1424 unsigned long flags;
1425
1426 soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
1427 if (!soft_wc)
1428 return -ENOMEM;
1429
1430 soft_wc->wc = *wc;
1431 spin_lock_irqsave(&cq->lock, flags);
1432 list_add_tail(&soft_wc->list, &cq->wc_list);
1433 if (cq->notify_flags == IB_CQ_NEXT_COMP ||
1434 wc->status != IB_WC_SUCCESS) {
1435 cq->notify_flags = 0;
1436 schedule_work(&cq->notify_work);
1437 }
1438 spin_unlock_irqrestore(&cq->lock, flags);
1439
1440 return 0;
1441 }