0001
0002
0003
0004
0005
0006 #include <linux/errno.h>
0007 #include <linux/types.h>
0008 #include <linux/net.h>
0009 #include <linux/scatterlist.h>
0010 #include <linux/llist.h>
0011 #include <asm/barrier.h>
0012 #include <net/tcp.h>
0013
0014 #include "siw.h"
0015 #include "siw_verbs.h"
0016 #include "siw_mem.h"
0017
0018 static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
0019 [SIW_QP_STATE_IDLE] = "IDLE",
0020 [SIW_QP_STATE_RTR] = "RTR",
0021 [SIW_QP_STATE_RTS] = "RTS",
0022 [SIW_QP_STATE_CLOSING] = "CLOSING",
0023 [SIW_QP_STATE_TERMINATE] = "TERMINATE",
0024 [SIW_QP_STATE_ERROR] = "ERROR"
0025 };
0026
0027
0028
0029
0030
0031
0032 struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
0033 {
0034 .hdr_len = sizeof(struct iwarp_rdma_write),
0035 .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
0036 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
0037 cpu_to_be16(DDP_VERSION << 8) |
0038 cpu_to_be16(RDMAP_VERSION << 6) |
0039 cpu_to_be16(RDMAP_RDMA_WRITE),
0040 .rx_data = siw_proc_write },
0041 {
0042 .hdr_len = sizeof(struct iwarp_rdma_rreq),
0043 .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
0044 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0045 cpu_to_be16(RDMAP_VERSION << 6) |
0046 cpu_to_be16(RDMAP_RDMA_READ_REQ),
0047 .rx_data = siw_proc_rreq },
0048 {
0049 .hdr_len = sizeof(struct iwarp_rdma_rresp),
0050 .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
0051 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
0052 cpu_to_be16(DDP_VERSION << 8) |
0053 cpu_to_be16(RDMAP_VERSION << 6) |
0054 cpu_to_be16(RDMAP_RDMA_READ_RESP),
0055 .rx_data = siw_proc_rresp },
0056 {
0057 .hdr_len = sizeof(struct iwarp_send),
0058 .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
0059 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0060 cpu_to_be16(RDMAP_VERSION << 6) |
0061 cpu_to_be16(RDMAP_SEND),
0062 .rx_data = siw_proc_send },
0063 {
0064 .hdr_len = sizeof(struct iwarp_send_inv),
0065 .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
0066 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0067 cpu_to_be16(RDMAP_VERSION << 6) |
0068 cpu_to_be16(RDMAP_SEND_INVAL),
0069 .rx_data = siw_proc_send },
0070 {
0071 .hdr_len = sizeof(struct iwarp_send),
0072 .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
0073 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0074 cpu_to_be16(RDMAP_VERSION << 6) |
0075 cpu_to_be16(RDMAP_SEND_SE),
0076 .rx_data = siw_proc_send },
0077 {
0078 .hdr_len = sizeof(struct iwarp_send_inv),
0079 .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
0080 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0081 cpu_to_be16(RDMAP_VERSION << 6) |
0082 cpu_to_be16(RDMAP_SEND_SE_INVAL),
0083 .rx_data = siw_proc_send },
0084 {
0085 .hdr_len = sizeof(struct iwarp_terminate),
0086 .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
0087 .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0088 cpu_to_be16(RDMAP_VERSION << 6) |
0089 cpu_to_be16(RDMAP_TERMINATE),
0090 .rx_data = siw_proc_terminate }
0091 };
0092
0093 void siw_qp_llp_data_ready(struct sock *sk)
0094 {
0095 struct siw_qp *qp;
0096
0097 read_lock(&sk->sk_callback_lock);
0098
0099 if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
0100 goto done;
0101
0102 qp = sk_to_qp(sk);
0103
0104 if (likely(!qp->rx_stream.rx_suspend &&
0105 down_read_trylock(&qp->state_lock))) {
0106 read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
0107
0108 if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
0109
0110
0111
0112
0113
0114
0115 tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
0116
0117 up_read(&qp->state_lock);
0118 } else {
0119 siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
0120 qp->rx_stream.rx_suspend);
0121 }
0122 done:
0123 read_unlock(&sk->sk_callback_lock);
0124 }
0125
0126 void siw_qp_llp_close(struct siw_qp *qp)
0127 {
0128 siw_dbg_qp(qp, "enter llp close, state = %s\n",
0129 siw_qp_state_to_string[qp->attrs.state]);
0130
0131 down_write(&qp->state_lock);
0132
0133 qp->rx_stream.rx_suspend = 1;
0134 qp->tx_ctx.tx_suspend = 1;
0135 qp->attrs.sk = NULL;
0136
0137 switch (qp->attrs.state) {
0138 case SIW_QP_STATE_RTS:
0139 case SIW_QP_STATE_RTR:
0140 case SIW_QP_STATE_IDLE:
0141 case SIW_QP_STATE_TERMINATE:
0142 qp->attrs.state = SIW_QP_STATE_ERROR;
0143 break;
0144
0145
0146
0147
0148
0149
0150 case SIW_QP_STATE_CLOSING:
0151 if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
0152 qp->attrs.state = SIW_QP_STATE_ERROR;
0153 else
0154 qp->attrs.state = SIW_QP_STATE_IDLE;
0155 break;
0156
0157 default:
0158 siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
0159 siw_qp_state_to_string[qp->attrs.state]);
0160 break;
0161 }
0162 siw_sq_flush(qp);
0163 siw_rq_flush(qp);
0164
0165
0166
0167
0168 if (qp->cep) {
0169 siw_cep_put(qp->cep);
0170 qp->cep = NULL;
0171 }
0172
0173 up_write(&qp->state_lock);
0174
0175 siw_dbg_qp(qp, "llp close exit: state %s\n",
0176 siw_qp_state_to_string[qp->attrs.state]);
0177 }
0178
0179
0180
0181
0182
0183 void siw_qp_llp_write_space(struct sock *sk)
0184 {
0185 struct siw_cep *cep;
0186
0187 read_lock(&sk->sk_callback_lock);
0188
0189 cep = sk_to_cep(sk);
0190 if (cep) {
0191 cep->sk_write_space(sk);
0192
0193 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
0194 (void)siw_sq_start(cep->qp);
0195 }
0196
0197 read_unlock(&sk->sk_callback_lock);
0198 }
0199
0200 static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
0201 {
0202 if (irq_size) {
0203 irq_size = roundup_pow_of_two(irq_size);
0204 qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
0205 if (!qp->irq) {
0206 qp->attrs.irq_size = 0;
0207 return -ENOMEM;
0208 }
0209 }
0210 if (orq_size) {
0211 orq_size = roundup_pow_of_two(orq_size);
0212 qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
0213 if (!qp->orq) {
0214 qp->attrs.orq_size = 0;
0215 qp->attrs.irq_size = 0;
0216 vfree(qp->irq);
0217 return -ENOMEM;
0218 }
0219 }
0220 qp->attrs.irq_size = irq_size;
0221 qp->attrs.orq_size = orq_size;
0222 siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
0223 return 0;
0224 }
0225
0226 static int siw_qp_enable_crc(struct siw_qp *qp)
0227 {
0228 struct siw_rx_stream *c_rx = &qp->rx_stream;
0229 struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
0230 int size;
0231
0232 if (siw_crypto_shash == NULL)
0233 return -ENOENT;
0234
0235 size = crypto_shash_descsize(siw_crypto_shash) +
0236 sizeof(struct shash_desc);
0237
0238 c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
0239 c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
0240 if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
0241 kfree(c_tx->mpa_crc_hd);
0242 kfree(c_rx->mpa_crc_hd);
0243 c_tx->mpa_crc_hd = NULL;
0244 c_rx->mpa_crc_hd = NULL;
0245 return -ENOMEM;
0246 }
0247 c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
0248 c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
0249
0250 return 0;
0251 }
0252
0253
0254
0255
0256
0257
0258
0259
0260 int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
0261 {
0262 struct siw_wqe *wqe = tx_wqe(qp);
0263 unsigned long flags;
0264 int rv = 0;
0265
0266 spin_lock_irqsave(&qp->sq_lock, flags);
0267
0268 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
0269 spin_unlock_irqrestore(&qp->sq_lock, flags);
0270 return -EIO;
0271 }
0272 memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0273
0274 wqe->wr_status = SIW_WR_QUEUED;
0275 wqe->sqe.flags = 0;
0276 wqe->sqe.num_sge = 1;
0277 wqe->sqe.sge[0].length = 0;
0278 wqe->sqe.sge[0].laddr = 0;
0279 wqe->sqe.sge[0].lkey = 0;
0280
0281
0282
0283
0284 wqe->sqe.rkey = 1;
0285 wqe->sqe.raddr = 0;
0286 wqe->processed = 0;
0287
0288 if (ctrl & MPA_V2_RDMA_WRITE_RTR)
0289 wqe->sqe.opcode = SIW_OP_WRITE;
0290 else if (ctrl & MPA_V2_RDMA_READ_RTR) {
0291 struct siw_sqe *rreq = NULL;
0292
0293 wqe->sqe.opcode = SIW_OP_READ;
0294
0295 spin_lock(&qp->orq_lock);
0296
0297 if (qp->attrs.orq_size)
0298 rreq = orq_get_free(qp);
0299 if (rreq) {
0300 siw_read_to_orq(rreq, &wqe->sqe);
0301 qp->orq_put++;
0302 } else
0303 rv = -EIO;
0304
0305 spin_unlock(&qp->orq_lock);
0306 } else
0307 rv = -EINVAL;
0308
0309 if (rv)
0310 wqe->wr_status = SIW_WR_IDLE;
0311
0312 spin_unlock_irqrestore(&qp->sq_lock, flags);
0313
0314 if (!rv)
0315 rv = siw_sq_start(qp);
0316
0317 return rv;
0318 }
0319
0320
0321
0322
0323 enum ddp_ecode siw_tagged_error(enum siw_access_state state)
0324 {
0325 switch (state) {
0326 case E_STAG_INVALID:
0327 return DDP_ECODE_T_INVALID_STAG;
0328 case E_BASE_BOUNDS:
0329 return DDP_ECODE_T_BASE_BOUNDS;
0330 case E_PD_MISMATCH:
0331 return DDP_ECODE_T_STAG_NOT_ASSOC;
0332 case E_ACCESS_PERM:
0333
0334
0335
0336
0337
0338 return DDP_ECODE_T_INVALID_STAG;
0339 default:
0340 WARN_ON(1);
0341 return DDP_ECODE_T_INVALID_STAG;
0342 }
0343 }
0344
0345
0346
0347
0348 enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
0349 {
0350 switch (state) {
0351 case E_STAG_INVALID:
0352 return RDMAP_ECODE_INVALID_STAG;
0353 case E_BASE_BOUNDS:
0354 return RDMAP_ECODE_BASE_BOUNDS;
0355 case E_PD_MISMATCH:
0356 return RDMAP_ECODE_STAG_NOT_ASSOC;
0357 case E_ACCESS_PERM:
0358 return RDMAP_ECODE_ACCESS_RIGHTS;
0359 default:
0360 return RDMAP_ECODE_UNSPECIFIED;
0361 }
0362 }
0363
0364 void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
0365 u8 ecode, int in_tx)
0366 {
0367 if (!qp->term_info.valid) {
0368 memset(&qp->term_info, 0, sizeof(qp->term_info));
0369 qp->term_info.layer = layer;
0370 qp->term_info.etype = etype;
0371 qp->term_info.ecode = ecode;
0372 qp->term_info.in_tx = in_tx;
0373 qp->term_info.valid = 1;
0374 }
0375 siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
0376 layer, etype, ecode, in_tx ? "yes" : "no");
0377 }
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387 void siw_send_terminate(struct siw_qp *qp)
0388 {
0389 struct kvec iov[3];
0390 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
0391 struct iwarp_terminate *term = NULL;
0392 union iwarp_hdr *err_hdr = NULL;
0393 struct socket *s = qp->attrs.sk;
0394 struct siw_rx_stream *srx = &qp->rx_stream;
0395 union iwarp_hdr *rx_hdr = &srx->hdr;
0396 u32 crc = 0;
0397 int num_frags, len_terminate, rv;
0398
0399 if (!qp->term_info.valid)
0400 return;
0401
0402 qp->term_info.valid = 0;
0403
0404 if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
0405 siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
0406 tx_type(tx_wqe(qp)));
0407 return;
0408 }
0409 if (!s && qp->cep)
0410
0411 s = qp->cep->sock;
0412
0413 if (!s) {
0414 siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
0415 return;
0416 }
0417
0418 term = kzalloc(sizeof(*term), GFP_KERNEL);
0419 if (!term)
0420 return;
0421
0422 term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
0423 term->ddp_mo = 0;
0424 term->ddp_msn = cpu_to_be32(1);
0425
0426 iov[0].iov_base = term;
0427 iov[0].iov_len = sizeof(*term);
0428
0429 if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
0430 ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
0431 (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
0432 err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
0433 if (!err_hdr) {
0434 kfree(term);
0435 return;
0436 }
0437 }
0438 memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
0439 sizeof(struct iwarp_ctrl));
0440
0441 __rdmap_term_set_layer(term, qp->term_info.layer);
0442 __rdmap_term_set_etype(term, qp->term_info.etype);
0443 __rdmap_term_set_ecode(term, qp->term_info.ecode);
0444
0445 switch (qp->term_info.layer) {
0446 case TERM_ERROR_LAYER_RDMAP:
0447 if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
0448
0449 break;
0450
0451 if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
0452
0453
0454
0455
0456 term->flag_m = 1;
0457 term->flag_d = 1;
0458 term->flag_r = 1;
0459
0460 if (qp->term_info.in_tx) {
0461 struct iwarp_rdma_rreq *rreq;
0462 struct siw_wqe *wqe = tx_wqe(qp);
0463
0464
0465
0466
0467
0468
0469 rreq = (struct iwarp_rdma_rreq *)err_hdr;
0470
0471 memcpy(&rreq->ctrl,
0472 &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
0473 sizeof(struct iwarp_ctrl));
0474
0475 rreq->rsvd = 0;
0476 rreq->ddp_qn =
0477 htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
0478
0479
0480 rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
0481
0482 rreq->ddp_mo = htonl(wqe->processed);
0483 rreq->sink_stag = htonl(wqe->sqe.rkey);
0484 rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
0485 rreq->read_size = htonl(wqe->sqe.sge[0].length);
0486 rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
0487 rreq->source_to =
0488 cpu_to_be64(wqe->sqe.sge[0].laddr);
0489
0490 iov[1].iov_base = rreq;
0491 iov[1].iov_len = sizeof(*rreq);
0492
0493 rx_hdr = (union iwarp_hdr *)rreq;
0494 } else {
0495
0496
0497
0498 iov[1].iov_base = rx_hdr;
0499
0500 if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
0501 RDMAP_RDMA_READ_REQ)
0502 iov[1].iov_len =
0503 sizeof(struct iwarp_rdma_rreq);
0504 else
0505 iov[1].iov_len =
0506 sizeof(struct iwarp_send);
0507 }
0508 } else {
0509
0510
0511
0512 if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
0513 (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
0514 break;
0515
0516 iov[1].iov_base = rx_hdr;
0517
0518
0519 if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
0520 iov[1].iov_len =
0521 sizeof(struct iwarp_rdma_write);
0522 else
0523 iov[1].iov_len = sizeof(struct iwarp_send);
0524
0525 term->flag_m = 1;
0526 term->flag_d = 1;
0527 }
0528 term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
0529 break;
0530
0531 case TERM_ERROR_LAYER_DDP:
0532
0533
0534
0535
0536
0537
0538
0539
0540 if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
0541 (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
0542 ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
0543 (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
0544 break;
0545
0546 iov[1].iov_base = rx_hdr;
0547
0548 if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
0549 iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
0550 else
0551 iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
0552
0553 term->flag_m = 1;
0554 term->flag_d = 1;
0555 break;
0556
0557 default:
0558 break;
0559 }
0560 if (term->flag_m || term->flag_d || term->flag_r) {
0561 iov[2].iov_base = &crc;
0562 iov[2].iov_len = sizeof(crc);
0563 len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
0564 num_frags = 3;
0565 } else {
0566 iov[1].iov_base = &crc;
0567 iov[1].iov_len = sizeof(crc);
0568 len_terminate = sizeof(*term) + MPA_CRC_SIZE;
0569 num_frags = 2;
0570 }
0571
0572
0573 if (term->flag_m) {
0574 u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
0575 enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
0576
0577 real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
0578 rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
0579 }
0580
0581 term->ctrl.mpa_len =
0582 cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
0583 if (qp->tx_ctx.mpa_crc_hd) {
0584 crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
0585 if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
0586 (u8 *)iov[0].iov_base,
0587 iov[0].iov_len))
0588 goto out;
0589
0590 if (num_frags == 3) {
0591 if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
0592 (u8 *)iov[1].iov_base,
0593 iov[1].iov_len))
0594 goto out;
0595 }
0596 crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
0597 }
0598
0599 rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
0600 siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
0601 rv == len_terminate ? "success" : "failure",
0602 __rdmap_term_layer(term), __rdmap_term_etype(term),
0603 __rdmap_term_ecode(term), rv);
0604 out:
0605 kfree(term);
0606 kfree(err_hdr);
0607 }
0608
0609
0610
0611
0612 static void siw_qp_modify_nonstate(struct siw_qp *qp,
0613 struct siw_qp_attrs *attrs,
0614 enum siw_qp_attr_mask mask)
0615 {
0616 if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
0617 if (attrs->flags & SIW_RDMA_BIND_ENABLED)
0618 qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
0619 else
0620 qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
0621
0622 if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
0623 qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
0624 else
0625 qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
0626
0627 if (attrs->flags & SIW_RDMA_READ_ENABLED)
0628 qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
0629 else
0630 qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
0631 }
0632 }
0633
0634 static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
0635 struct siw_qp_attrs *attrs,
0636 enum siw_qp_attr_mask mask)
0637 {
0638 int rv = 0;
0639
0640 switch (attrs->state) {
0641 case SIW_QP_STATE_RTS:
0642 if (attrs->flags & SIW_MPA_CRC) {
0643 rv = siw_qp_enable_crc(qp);
0644 if (rv)
0645 break;
0646 }
0647 if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
0648 siw_dbg_qp(qp, "no socket\n");
0649 rv = -EINVAL;
0650 break;
0651 }
0652 if (!(mask & SIW_QP_ATTR_MPA)) {
0653 siw_dbg_qp(qp, "no MPA\n");
0654 rv = -EINVAL;
0655 break;
0656 }
0657
0658
0659
0660 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
0661 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
0662 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
0663
0664
0665
0666
0667 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
0668 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
0669 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
0670
0671
0672
0673
0674
0675 rv = siw_qp_readq_init(qp, attrs->irq_size,
0676 attrs->orq_size);
0677 if (rv)
0678 break;
0679
0680 qp->attrs.sk = attrs->sk;
0681 qp->attrs.state = SIW_QP_STATE_RTS;
0682
0683 siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
0684 attrs->flags & SIW_MPA_CRC ? "y" : "n",
0685 qp->attrs.orq_size, qp->attrs.irq_size);
0686 break;
0687
0688 case SIW_QP_STATE_ERROR:
0689 siw_rq_flush(qp);
0690 qp->attrs.state = SIW_QP_STATE_ERROR;
0691 if (qp->cep) {
0692 siw_cep_put(qp->cep);
0693 qp->cep = NULL;
0694 }
0695 break;
0696
0697 default:
0698 break;
0699 }
0700 return rv;
0701 }
0702
0703 static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
0704 struct siw_qp_attrs *attrs)
0705 {
0706 int drop_conn = 0;
0707
0708 switch (attrs->state) {
0709 case SIW_QP_STATE_CLOSING:
0710
0711
0712
0713
0714
0715
0716
0717
0718 if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
0719 qp->attrs.state = SIW_QP_STATE_CLOSING;
0720 } else {
0721 qp->attrs.state = SIW_QP_STATE_ERROR;
0722 siw_sq_flush(qp);
0723 }
0724 siw_rq_flush(qp);
0725
0726 drop_conn = 1;
0727 break;
0728
0729 case SIW_QP_STATE_TERMINATE:
0730 qp->attrs.state = SIW_QP_STATE_TERMINATE;
0731
0732 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
0733 RDMAP_ETYPE_CATASTROPHIC,
0734 RDMAP_ECODE_UNSPECIFIED, 1);
0735 drop_conn = 1;
0736 break;
0737
0738 case SIW_QP_STATE_ERROR:
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752 siw_sq_flush(qp);
0753 siw_rq_flush(qp);
0754 qp->attrs.state = SIW_QP_STATE_ERROR;
0755 drop_conn = 1;
0756 break;
0757
0758 default:
0759 break;
0760 }
0761 return drop_conn;
0762 }
0763
0764 static void siw_qp_nextstate_from_term(struct siw_qp *qp,
0765 struct siw_qp_attrs *attrs)
0766 {
0767 switch (attrs->state) {
0768 case SIW_QP_STATE_ERROR:
0769 siw_rq_flush(qp);
0770 qp->attrs.state = SIW_QP_STATE_ERROR;
0771
0772 if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
0773 siw_sq_flush(qp);
0774 break;
0775
0776 default:
0777 break;
0778 }
0779 }
0780
0781 static int siw_qp_nextstate_from_close(struct siw_qp *qp,
0782 struct siw_qp_attrs *attrs)
0783 {
0784 int rv = 0;
0785
0786 switch (attrs->state) {
0787 case SIW_QP_STATE_IDLE:
0788 WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
0789 qp->attrs.state = SIW_QP_STATE_IDLE;
0790 break;
0791
0792 case SIW_QP_STATE_CLOSING:
0793
0794
0795
0796
0797 break;
0798
0799 case SIW_QP_STATE_ERROR:
0800
0801
0802
0803
0804 qp->attrs.state = SIW_QP_STATE_ERROR;
0805
0806 if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
0807 siw_sq_flush(qp);
0808
0809 siw_rq_flush(qp);
0810 break;
0811
0812 default:
0813 siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
0814 siw_qp_state_to_string[qp->attrs.state],
0815 siw_qp_state_to_string[attrs->state]);
0816
0817 rv = -ECONNABORTED;
0818 }
0819 return rv;
0820 }
0821
0822
0823
0824
0825 int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
0826 enum siw_qp_attr_mask mask)
0827 {
0828 int drop_conn = 0, rv = 0;
0829
0830 if (!mask)
0831 return 0;
0832
0833 siw_dbg_qp(qp, "state: %s => %s\n",
0834 siw_qp_state_to_string[qp->attrs.state],
0835 siw_qp_state_to_string[attrs->state]);
0836
0837 if (mask != SIW_QP_ATTR_STATE)
0838 siw_qp_modify_nonstate(qp, attrs, mask);
0839
0840 if (!(mask & SIW_QP_ATTR_STATE))
0841 return 0;
0842
0843 switch (qp->attrs.state) {
0844 case SIW_QP_STATE_IDLE:
0845 case SIW_QP_STATE_RTR:
0846 rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
0847 break;
0848
0849 case SIW_QP_STATE_RTS:
0850 drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
0851 break;
0852
0853 case SIW_QP_STATE_TERMINATE:
0854 siw_qp_nextstate_from_term(qp, attrs);
0855 break;
0856
0857 case SIW_QP_STATE_CLOSING:
0858 siw_qp_nextstate_from_close(qp, attrs);
0859 break;
0860 default:
0861 break;
0862 }
0863 if (drop_conn)
0864 siw_qp_cm_drop(qp, 0);
0865
0866 return rv;
0867 }
0868
0869 void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
0870 {
0871 rreq->id = sqe->id;
0872 rreq->opcode = sqe->opcode;
0873 rreq->sge[0].laddr = sqe->sge[0].laddr;
0874 rreq->sge[0].length = sqe->sge[0].length;
0875 rreq->sge[0].lkey = sqe->sge[0].lkey;
0876 rreq->sge[1].lkey = sqe->sge[1].lkey;
0877 rreq->flags = sqe->flags | SIW_WQE_VALID;
0878 rreq->num_sge = 1;
0879 }
0880
0881 static int siw_activate_tx_from_sq(struct siw_qp *qp)
0882 {
0883 struct siw_sqe *sqe;
0884 struct siw_wqe *wqe = tx_wqe(qp);
0885 int rv = 1;
0886
0887 sqe = sq_get_next(qp);
0888 if (!sqe)
0889 return 0;
0890
0891 memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0892 wqe->wr_status = SIW_WR_QUEUED;
0893
0894
0895 memcpy(&wqe->sqe, sqe, sizeof(*sqe));
0896
0897 if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
0898 rv = -EINVAL;
0899 goto out;
0900 }
0901 if (wqe->sqe.flags & SIW_WQE_INLINE) {
0902 if (wqe->sqe.opcode != SIW_OP_SEND &&
0903 wqe->sqe.opcode != SIW_OP_WRITE) {
0904 rv = -EINVAL;
0905 goto out;
0906 }
0907 if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
0908 rv = -EINVAL;
0909 goto out;
0910 }
0911 wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
0912 wqe->sqe.sge[0].lkey = 0;
0913 wqe->sqe.num_sge = 1;
0914 }
0915 if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
0916
0917 if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
0918 wqe->sqe.opcode ==
0919 SIW_OP_READ_LOCAL_INV)) {
0920 siw_dbg_qp(qp, "cannot fence read\n");
0921 rv = -EINVAL;
0922 goto out;
0923 }
0924 spin_lock(&qp->orq_lock);
0925
0926 if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
0927 qp->tx_ctx.orq_fence = 1;
0928 rv = 0;
0929 }
0930 spin_unlock(&qp->orq_lock);
0931
0932 } else if (wqe->sqe.opcode == SIW_OP_READ ||
0933 wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
0934 struct siw_sqe *rreq;
0935
0936 if (unlikely(!qp->attrs.orq_size)) {
0937
0938 rv = -EINVAL;
0939 goto out;
0940 }
0941 wqe->sqe.num_sge = 1;
0942
0943 spin_lock(&qp->orq_lock);
0944
0945 rreq = orq_get_free(qp);
0946 if (rreq) {
0947
0948
0949
0950
0951 siw_read_to_orq(rreq, &wqe->sqe);
0952 qp->orq_put++;
0953 } else {
0954 qp->tx_ctx.orq_fence = 1;
0955 rv = 0;
0956 }
0957 spin_unlock(&qp->orq_lock);
0958 }
0959
0960
0961 smp_store_mb(sqe->flags, 0);
0962 qp->sq_get++;
0963 out:
0964 if (unlikely(rv < 0)) {
0965 siw_dbg_qp(qp, "error %d\n", rv);
0966 wqe->wr_status = SIW_WR_IDLE;
0967 }
0968 return rv;
0969 }
0970
0971
0972
0973
0974
0975
0976
0977 int siw_activate_tx(struct siw_qp *qp)
0978 {
0979 struct siw_sqe *irqe;
0980 struct siw_wqe *wqe = tx_wqe(qp);
0981
0982 if (!qp->attrs.irq_size)
0983 return siw_activate_tx_from_sq(qp);
0984
0985 irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
0986
0987 if (!(irqe->flags & SIW_WQE_VALID))
0988 return siw_activate_tx_from_sq(qp);
0989
0990
0991
0992
0993
0994 if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
0995 qp->irq_burst = 0;
0996 return siw_activate_tx_from_sq(qp);
0997 }
0998 memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0999 wqe->wr_status = SIW_WR_QUEUED;
1000
1001
1002 wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
1003 wqe->sqe.flags = 0;
1004 if (irqe->num_sge) {
1005 wqe->sqe.num_sge = 1;
1006 wqe->sqe.sge[0].length = irqe->sge[0].length;
1007 wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
1008 wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
1009 } else {
1010 wqe->sqe.num_sge = 0;
1011 }
1012
1013
1014
1015
1016 wqe->sqe.sge[1].length = irqe->sge[1].length;
1017
1018 wqe->sqe.rkey = irqe->rkey;
1019 wqe->sqe.raddr = irqe->raddr;
1020
1021 wqe->processed = 0;
1022 qp->irq_get++;
1023
1024
1025 smp_store_mb(irqe->flags, 0);
1026
1027 return 1;
1028 }
1029
1030
1031
1032
1033
1034 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1035 {
1036 u32 cq_notify;
1037
1038 if (!cq->base_cq.comp_handler)
1039 return false;
1040
1041
1042 cq_notify = READ_ONCE(cq->notify->flags);
1043
1044 if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1045 ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1046 (flags & SIW_WQE_SOLICITED))) {
1047
1048
1049
1050
1051
1052
1053 WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
1054
1055 return true;
1056 }
1057 return false;
1058 }
1059
1060 int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1061 enum siw_wc_status status)
1062 {
1063 struct siw_cq *cq = qp->scq;
1064 int rv = 0;
1065
1066 if (cq) {
1067 u32 sqe_flags = sqe->flags;
1068 struct siw_cqe *cqe;
1069 u32 idx;
1070 unsigned long flags;
1071
1072 spin_lock_irqsave(&cq->lock, flags);
1073
1074 idx = cq->cq_put % cq->num_cqe;
1075 cqe = &cq->queue[idx];
1076
1077 if (!READ_ONCE(cqe->flags)) {
1078 bool notify;
1079
1080 cqe->id = sqe->id;
1081 cqe->opcode = sqe->opcode;
1082 cqe->status = status;
1083 cqe->imm_data = 0;
1084 cqe->bytes = bytes;
1085
1086 if (rdma_is_kernel_res(&cq->base_cq.res))
1087 cqe->base_qp = &qp->base_qp;
1088 else
1089 cqe->qp_id = qp_id(qp);
1090
1091
1092 WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1093
1094 smp_store_mb(sqe->flags, 0);
1095
1096 cq->cq_put++;
1097 notify = siw_cq_notify_now(cq, sqe_flags);
1098
1099 spin_unlock_irqrestore(&cq->lock, flags);
1100
1101 if (notify) {
1102 siw_dbg_cq(cq, "Call completion handler\n");
1103 cq->base_cq.comp_handler(&cq->base_cq,
1104 cq->base_cq.cq_context);
1105 }
1106 } else {
1107 spin_unlock_irqrestore(&cq->lock, flags);
1108 rv = -ENOMEM;
1109 siw_cq_event(cq, IB_EVENT_CQ_ERR);
1110 }
1111 } else {
1112
1113 smp_store_mb(sqe->flags, 0);
1114 }
1115 return rv;
1116 }
1117
1118 int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1119 u32 inval_stag, enum siw_wc_status status)
1120 {
1121 struct siw_cq *cq = qp->rcq;
1122 int rv = 0;
1123
1124 if (cq) {
1125 struct siw_cqe *cqe;
1126 u32 idx;
1127 unsigned long flags;
1128
1129 spin_lock_irqsave(&cq->lock, flags);
1130
1131 idx = cq->cq_put % cq->num_cqe;
1132 cqe = &cq->queue[idx];
1133
1134 if (!READ_ONCE(cqe->flags)) {
1135 bool notify;
1136 u8 cqe_flags = SIW_WQE_VALID;
1137
1138 cqe->id = rqe->id;
1139 cqe->opcode = SIW_OP_RECEIVE;
1140 cqe->status = status;
1141 cqe->imm_data = 0;
1142 cqe->bytes = bytes;
1143
1144 if (rdma_is_kernel_res(&cq->base_cq.res)) {
1145 cqe->base_qp = &qp->base_qp;
1146 if (inval_stag) {
1147 cqe_flags |= SIW_WQE_REM_INVAL;
1148 cqe->inval_stag = inval_stag;
1149 }
1150 } else {
1151 cqe->qp_id = qp_id(qp);
1152 }
1153
1154 WRITE_ONCE(cqe->flags, cqe_flags);
1155
1156 smp_store_mb(rqe->flags, 0);
1157
1158 cq->cq_put++;
1159 notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1160
1161 spin_unlock_irqrestore(&cq->lock, flags);
1162
1163 if (notify) {
1164 siw_dbg_cq(cq, "Call completion handler\n");
1165 cq->base_cq.comp_handler(&cq->base_cq,
1166 cq->base_cq.cq_context);
1167 }
1168 } else {
1169 spin_unlock_irqrestore(&cq->lock, flags);
1170 rv = -ENOMEM;
1171 siw_cq_event(cq, IB_EVENT_CQ_ERR);
1172 }
1173 } else {
1174
1175 smp_store_mb(rqe->flags, 0);
1176 }
1177 return rv;
1178 }
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188 void siw_sq_flush(struct siw_qp *qp)
1189 {
1190 struct siw_sqe *sqe;
1191 struct siw_wqe *wqe = tx_wqe(qp);
1192 int async_event = 0;
1193
1194
1195
1196
1197 while (qp->attrs.orq_size) {
1198 sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1199 if (!READ_ONCE(sqe->flags))
1200 break;
1201
1202 if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1203 break;
1204
1205 WRITE_ONCE(sqe->flags, 0);
1206 qp->orq_get++;
1207 }
1208
1209
1210
1211 if (wqe->wr_status != SIW_WR_IDLE) {
1212 siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1213 tx_type(wqe), wqe->wr_status);
1214
1215 siw_wqe_put_mem(wqe, tx_type(wqe));
1216
1217 if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1218 ((tx_type(wqe) != SIW_OP_READ &&
1219 tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1220 wqe->wr_status == SIW_WR_QUEUED))
1221
1222
1223
1224
1225 siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1226 SIW_WC_WR_FLUSH_ERR);
1227
1228 wqe->wr_status = SIW_WR_IDLE;
1229 }
1230
1231
1232
1233 while (qp->attrs.sq_size) {
1234 sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1235 if (!READ_ONCE(sqe->flags))
1236 break;
1237
1238 async_event = 1;
1239 if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1240
1241
1242
1243
1244 break;
1245
1246 WRITE_ONCE(sqe->flags, 0);
1247 qp->sq_get++;
1248 }
1249 if (async_event)
1250 siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1251 }
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264 void siw_rq_flush(struct siw_qp *qp)
1265 {
1266 struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1267
1268
1269
1270
1271 if (wqe->wr_status != SIW_WR_IDLE) {
1272 siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1273 rx_type(wqe), wqe->wr_status);
1274
1275 siw_wqe_put_mem(wqe, rx_type(wqe));
1276
1277 if (rx_type(wqe) == SIW_OP_RECEIVE) {
1278 siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1279 0, SIW_WC_WR_FLUSH_ERR);
1280 } else if (rx_type(wqe) != SIW_OP_READ &&
1281 rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1282 rx_type(wqe) != SIW_OP_WRITE) {
1283 siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1284 }
1285 wqe->wr_status = SIW_WR_IDLE;
1286 }
1287 wqe = &qp->rx_tagged.wqe_active;
1288
1289 if (wqe->wr_status != SIW_WR_IDLE) {
1290 siw_wqe_put_mem(wqe, rx_type(wqe));
1291 wqe->wr_status = SIW_WR_IDLE;
1292 }
1293
1294
1295
1296 while (qp->attrs.rq_size) {
1297 struct siw_rqe *rqe =
1298 &qp->recvq[qp->rq_get % qp->attrs.rq_size];
1299
1300 if (!READ_ONCE(rqe->flags))
1301 break;
1302
1303 if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1304 break;
1305
1306 WRITE_ONCE(rqe->flags, 0);
1307 qp->rq_get++;
1308 }
1309 }
1310
1311 int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1312 {
1313 int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
1314 GFP_KERNEL);
1315
1316 if (!rv) {
1317 kref_init(&qp->ref);
1318 qp->sdev = sdev;
1319 siw_dbg_qp(qp, "new QP\n");
1320 }
1321 return rv;
1322 }
1323
1324 void siw_free_qp(struct kref *ref)
1325 {
1326 struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1327 struct siw_device *sdev = qp->sdev;
1328 unsigned long flags;
1329
1330 if (qp->cep)
1331 siw_cep_put(qp->cep);
1332
1333 found = xa_erase(&sdev->qp_xa, qp_id(qp));
1334 WARN_ON(found != qp);
1335 spin_lock_irqsave(&sdev->lock, flags);
1336 list_del(&qp->devq);
1337 spin_unlock_irqrestore(&sdev->lock, flags);
1338
1339 vfree(qp->sendq);
1340 vfree(qp->recvq);
1341 vfree(qp->irq);
1342 vfree(qp->orq);
1343
1344 siw_put_tx_cpu(qp->tx_cpu);
1345
1346 atomic_dec(&sdev->num_qp);
1347 }