Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
0002 
0003 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
0004 /* Copyright (c) 2008-2019, IBM Corporation */
0005 
0006 #include <linux/errno.h>
0007 #include <linux/types.h>
0008 #include <linux/net.h>
0009 #include <linux/scatterlist.h>
0010 #include <linux/llist.h>
0011 #include <asm/barrier.h>
0012 #include <net/tcp.h>
0013 
0014 #include "siw.h"
0015 #include "siw_verbs.h"
0016 #include "siw_mem.h"
0017 
0018 static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
0019     [SIW_QP_STATE_IDLE] = "IDLE",
0020     [SIW_QP_STATE_RTR] = "RTR",
0021     [SIW_QP_STATE_RTS] = "RTS",
0022     [SIW_QP_STATE_CLOSING] = "CLOSING",
0023     [SIW_QP_STATE_TERMINATE] = "TERMINATE",
0024     [SIW_QP_STATE_ERROR] = "ERROR"
0025 };
0026 
0027 /*
0028  * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
0029  * per-RDMAP message basis. Please keep order of initializer. All MPA len
0030  * is initialized to minimum packet size.
0031  */
0032 struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
0033     { /* RDMAP_RDMA_WRITE */
0034       .hdr_len = sizeof(struct iwarp_rdma_write),
0035       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
0036       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
0037                  cpu_to_be16(DDP_VERSION << 8) |
0038                  cpu_to_be16(RDMAP_VERSION << 6) |
0039                  cpu_to_be16(RDMAP_RDMA_WRITE),
0040       .rx_data = siw_proc_write },
0041     { /* RDMAP_RDMA_READ_REQ */
0042       .hdr_len = sizeof(struct iwarp_rdma_rreq),
0043       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
0044       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0045                  cpu_to_be16(RDMAP_VERSION << 6) |
0046                  cpu_to_be16(RDMAP_RDMA_READ_REQ),
0047       .rx_data = siw_proc_rreq },
0048     { /* RDMAP_RDMA_READ_RESP */
0049       .hdr_len = sizeof(struct iwarp_rdma_rresp),
0050       .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
0051       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
0052                  cpu_to_be16(DDP_VERSION << 8) |
0053                  cpu_to_be16(RDMAP_VERSION << 6) |
0054                  cpu_to_be16(RDMAP_RDMA_READ_RESP),
0055       .rx_data = siw_proc_rresp },
0056     { /* RDMAP_SEND */
0057       .hdr_len = sizeof(struct iwarp_send),
0058       .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
0059       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0060                  cpu_to_be16(RDMAP_VERSION << 6) |
0061                  cpu_to_be16(RDMAP_SEND),
0062       .rx_data = siw_proc_send },
0063     { /* RDMAP_SEND_INVAL */
0064       .hdr_len = sizeof(struct iwarp_send_inv),
0065       .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
0066       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0067                  cpu_to_be16(RDMAP_VERSION << 6) |
0068                  cpu_to_be16(RDMAP_SEND_INVAL),
0069       .rx_data = siw_proc_send },
0070     { /* RDMAP_SEND_SE */
0071       .hdr_len = sizeof(struct iwarp_send),
0072       .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
0073       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0074                  cpu_to_be16(RDMAP_VERSION << 6) |
0075                  cpu_to_be16(RDMAP_SEND_SE),
0076       .rx_data = siw_proc_send },
0077     { /* RDMAP_SEND_SE_INVAL */
0078       .hdr_len = sizeof(struct iwarp_send_inv),
0079       .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
0080       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0081                  cpu_to_be16(RDMAP_VERSION << 6) |
0082                  cpu_to_be16(RDMAP_SEND_SE_INVAL),
0083       .rx_data = siw_proc_send },
0084     { /* RDMAP_TERMINATE */
0085       .hdr_len = sizeof(struct iwarp_terminate),
0086       .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
0087       .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
0088                  cpu_to_be16(RDMAP_VERSION << 6) |
0089                  cpu_to_be16(RDMAP_TERMINATE),
0090       .rx_data = siw_proc_terminate }
0091 };
0092 
0093 void siw_qp_llp_data_ready(struct sock *sk)
0094 {
0095     struct siw_qp *qp;
0096 
0097     read_lock(&sk->sk_callback_lock);
0098 
0099     if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
0100         goto done;
0101 
0102     qp = sk_to_qp(sk);
0103 
0104     if (likely(!qp->rx_stream.rx_suspend &&
0105            down_read_trylock(&qp->state_lock))) {
0106         read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
0107 
0108         if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
0109             /*
0110              * Implements data receive operation during
0111              * socket callback. TCP gracefully catches
0112              * the case where there is nothing to receive
0113              * (not calling siw_tcp_rx_data() then).
0114              */
0115             tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
0116 
0117         up_read(&qp->state_lock);
0118     } else {
0119         siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
0120                qp->rx_stream.rx_suspend);
0121     }
0122 done:
0123     read_unlock(&sk->sk_callback_lock);
0124 }
0125 
0126 void siw_qp_llp_close(struct siw_qp *qp)
0127 {
0128     siw_dbg_qp(qp, "enter llp close, state = %s\n",
0129            siw_qp_state_to_string[qp->attrs.state]);
0130 
0131     down_write(&qp->state_lock);
0132 
0133     qp->rx_stream.rx_suspend = 1;
0134     qp->tx_ctx.tx_suspend = 1;
0135     qp->attrs.sk = NULL;
0136 
0137     switch (qp->attrs.state) {
0138     case SIW_QP_STATE_RTS:
0139     case SIW_QP_STATE_RTR:
0140     case SIW_QP_STATE_IDLE:
0141     case SIW_QP_STATE_TERMINATE:
0142         qp->attrs.state = SIW_QP_STATE_ERROR;
0143         break;
0144     /*
0145      * SIW_QP_STATE_CLOSING:
0146      *
0147      * This is a forced close. shall the QP be moved to
0148      * ERROR or IDLE ?
0149      */
0150     case SIW_QP_STATE_CLOSING:
0151         if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
0152             qp->attrs.state = SIW_QP_STATE_ERROR;
0153         else
0154             qp->attrs.state = SIW_QP_STATE_IDLE;
0155         break;
0156 
0157     default:
0158         siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
0159                siw_qp_state_to_string[qp->attrs.state]);
0160         break;
0161     }
0162     siw_sq_flush(qp);
0163     siw_rq_flush(qp);
0164 
0165     /*
0166      * Dereference closing CEP
0167      */
0168     if (qp->cep) {
0169         siw_cep_put(qp->cep);
0170         qp->cep = NULL;
0171     }
0172 
0173     up_write(&qp->state_lock);
0174 
0175     siw_dbg_qp(qp, "llp close exit: state %s\n",
0176            siw_qp_state_to_string[qp->attrs.state]);
0177 }
0178 
0179 /*
0180  * socket callback routine informing about newly available send space.
0181  * Function schedules SQ work for processing SQ items.
0182  */
0183 void siw_qp_llp_write_space(struct sock *sk)
0184 {
0185     struct siw_cep *cep;
0186 
0187     read_lock(&sk->sk_callback_lock);
0188 
0189     cep  = sk_to_cep(sk);
0190     if (cep) {
0191         cep->sk_write_space(sk);
0192 
0193         if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
0194             (void)siw_sq_start(cep->qp);
0195     }
0196 
0197     read_unlock(&sk->sk_callback_lock);
0198 }
0199 
0200 static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
0201 {
0202     if (irq_size) {
0203         irq_size = roundup_pow_of_two(irq_size);
0204         qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
0205         if (!qp->irq) {
0206             qp->attrs.irq_size = 0;
0207             return -ENOMEM;
0208         }
0209     }
0210     if (orq_size) {
0211         orq_size = roundup_pow_of_two(orq_size);
0212         qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
0213         if (!qp->orq) {
0214             qp->attrs.orq_size = 0;
0215             qp->attrs.irq_size = 0;
0216             vfree(qp->irq);
0217             return -ENOMEM;
0218         }
0219     }
0220     qp->attrs.irq_size = irq_size;
0221     qp->attrs.orq_size = orq_size;
0222     siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
0223     return 0;
0224 }
0225 
0226 static int siw_qp_enable_crc(struct siw_qp *qp)
0227 {
0228     struct siw_rx_stream *c_rx = &qp->rx_stream;
0229     struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
0230     int size;
0231 
0232     if (siw_crypto_shash == NULL)
0233         return -ENOENT;
0234 
0235     size = crypto_shash_descsize(siw_crypto_shash) +
0236         sizeof(struct shash_desc);
0237 
0238     c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
0239     c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
0240     if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
0241         kfree(c_tx->mpa_crc_hd);
0242         kfree(c_rx->mpa_crc_hd);
0243         c_tx->mpa_crc_hd = NULL;
0244         c_rx->mpa_crc_hd = NULL;
0245         return -ENOMEM;
0246     }
0247     c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
0248     c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
0249 
0250     return 0;
0251 }
0252 
0253 /*
0254  * Send a non signalled READ or WRITE to peer side as negotiated
0255  * with MPAv2 P2P setup protocol. The work request is only created
0256  * as a current active WR and does not consume Send Queue space.
0257  *
0258  * Caller must hold QP state lock.
0259  */
0260 int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
0261 {
0262     struct siw_wqe *wqe = tx_wqe(qp);
0263     unsigned long flags;
0264     int rv = 0;
0265 
0266     spin_lock_irqsave(&qp->sq_lock, flags);
0267 
0268     if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
0269         spin_unlock_irqrestore(&qp->sq_lock, flags);
0270         return -EIO;
0271     }
0272     memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0273 
0274     wqe->wr_status = SIW_WR_QUEUED;
0275     wqe->sqe.flags = 0;
0276     wqe->sqe.num_sge = 1;
0277     wqe->sqe.sge[0].length = 0;
0278     wqe->sqe.sge[0].laddr = 0;
0279     wqe->sqe.sge[0].lkey = 0;
0280     /*
0281      * While it must not be checked for inbound zero length
0282      * READ/WRITE, some HW may treat STag 0 special.
0283      */
0284     wqe->sqe.rkey = 1;
0285     wqe->sqe.raddr = 0;
0286     wqe->processed = 0;
0287 
0288     if (ctrl & MPA_V2_RDMA_WRITE_RTR)
0289         wqe->sqe.opcode = SIW_OP_WRITE;
0290     else if (ctrl & MPA_V2_RDMA_READ_RTR) {
0291         struct siw_sqe *rreq = NULL;
0292 
0293         wqe->sqe.opcode = SIW_OP_READ;
0294 
0295         spin_lock(&qp->orq_lock);
0296 
0297         if (qp->attrs.orq_size)
0298             rreq = orq_get_free(qp);
0299         if (rreq) {
0300             siw_read_to_orq(rreq, &wqe->sqe);
0301             qp->orq_put++;
0302         } else
0303             rv = -EIO;
0304 
0305         spin_unlock(&qp->orq_lock);
0306     } else
0307         rv = -EINVAL;
0308 
0309     if (rv)
0310         wqe->wr_status = SIW_WR_IDLE;
0311 
0312     spin_unlock_irqrestore(&qp->sq_lock, flags);
0313 
0314     if (!rv)
0315         rv = siw_sq_start(qp);
0316 
0317     return rv;
0318 }
0319 
0320 /*
0321  * Map memory access error to DDP tagged error
0322  */
0323 enum ddp_ecode siw_tagged_error(enum siw_access_state state)
0324 {
0325     switch (state) {
0326     case E_STAG_INVALID:
0327         return DDP_ECODE_T_INVALID_STAG;
0328     case E_BASE_BOUNDS:
0329         return DDP_ECODE_T_BASE_BOUNDS;
0330     case E_PD_MISMATCH:
0331         return DDP_ECODE_T_STAG_NOT_ASSOC;
0332     case E_ACCESS_PERM:
0333         /*
0334          * RFC 5041 (DDP) lacks an ecode for insufficient access
0335          * permissions. 'Invalid STag' seem to be the closest
0336          * match though.
0337          */
0338         return DDP_ECODE_T_INVALID_STAG;
0339     default:
0340         WARN_ON(1);
0341         return DDP_ECODE_T_INVALID_STAG;
0342     }
0343 }
0344 
0345 /*
0346  * Map memory access error to RDMAP protection error
0347  */
0348 enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
0349 {
0350     switch (state) {
0351     case E_STAG_INVALID:
0352         return RDMAP_ECODE_INVALID_STAG;
0353     case E_BASE_BOUNDS:
0354         return RDMAP_ECODE_BASE_BOUNDS;
0355     case E_PD_MISMATCH:
0356         return RDMAP_ECODE_STAG_NOT_ASSOC;
0357     case E_ACCESS_PERM:
0358         return RDMAP_ECODE_ACCESS_RIGHTS;
0359     default:
0360         return RDMAP_ECODE_UNSPECIFIED;
0361     }
0362 }
0363 
0364 void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
0365             u8 ecode, int in_tx)
0366 {
0367     if (!qp->term_info.valid) {
0368         memset(&qp->term_info, 0, sizeof(qp->term_info));
0369         qp->term_info.layer = layer;
0370         qp->term_info.etype = etype;
0371         qp->term_info.ecode = ecode;
0372         qp->term_info.in_tx = in_tx;
0373         qp->term_info.valid = 1;
0374     }
0375     siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
0376            layer, etype, ecode, in_tx ? "yes" : "no");
0377 }
0378 
0379 /*
0380  * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
0381  * Sending TERMINATE messages is best effort - such messages
0382  * can only be send if the QP is still connected and it does
0383  * not have another outbound message in-progress, i.e. the
0384  * TERMINATE message must not interfer with an incomplete current
0385  * transmit operation.
0386  */
0387 void siw_send_terminate(struct siw_qp *qp)
0388 {
0389     struct kvec iov[3];
0390     struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
0391     struct iwarp_terminate *term = NULL;
0392     union iwarp_hdr *err_hdr = NULL;
0393     struct socket *s = qp->attrs.sk;
0394     struct siw_rx_stream *srx = &qp->rx_stream;
0395     union iwarp_hdr *rx_hdr = &srx->hdr;
0396     u32 crc = 0;
0397     int num_frags, len_terminate, rv;
0398 
0399     if (!qp->term_info.valid)
0400         return;
0401 
0402     qp->term_info.valid = 0;
0403 
0404     if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
0405         siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
0406                tx_type(tx_wqe(qp)));
0407         return;
0408     }
0409     if (!s && qp->cep)
0410         /* QP not yet in RTS. Take socket from connection end point */
0411         s = qp->cep->sock;
0412 
0413     if (!s) {
0414         siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
0415         return;
0416     }
0417 
0418     term = kzalloc(sizeof(*term), GFP_KERNEL);
0419     if (!term)
0420         return;
0421 
0422     term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
0423     term->ddp_mo = 0;
0424     term->ddp_msn = cpu_to_be32(1);
0425 
0426     iov[0].iov_base = term;
0427     iov[0].iov_len = sizeof(*term);
0428 
0429     if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
0430         ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
0431          (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
0432         err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
0433         if (!err_hdr) {
0434             kfree(term);
0435             return;
0436         }
0437     }
0438     memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
0439            sizeof(struct iwarp_ctrl));
0440 
0441     __rdmap_term_set_layer(term, qp->term_info.layer);
0442     __rdmap_term_set_etype(term, qp->term_info.etype);
0443     __rdmap_term_set_ecode(term, qp->term_info.ecode);
0444 
0445     switch (qp->term_info.layer) {
0446     case TERM_ERROR_LAYER_RDMAP:
0447         if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
0448             /* No additional DDP/RDMAP header to be included */
0449             break;
0450 
0451         if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
0452             /*
0453              * Complete RDMAP frame will get attached, and
0454              * DDP segment length is valid
0455              */
0456             term->flag_m = 1;
0457             term->flag_d = 1;
0458             term->flag_r = 1;
0459 
0460             if (qp->term_info.in_tx) {
0461                 struct iwarp_rdma_rreq *rreq;
0462                 struct siw_wqe *wqe = tx_wqe(qp);
0463 
0464                 /* Inbound RREQ error, detected during
0465                  * RRESP creation. Take state from
0466                  * current TX work queue element to
0467                  * reconstruct peers RREQ.
0468                  */
0469                 rreq = (struct iwarp_rdma_rreq *)err_hdr;
0470 
0471                 memcpy(&rreq->ctrl,
0472                        &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
0473                        sizeof(struct iwarp_ctrl));
0474 
0475                 rreq->rsvd = 0;
0476                 rreq->ddp_qn =
0477                     htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
0478 
0479                 /* Provide RREQ's MSN as kept aside */
0480                 rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
0481 
0482                 rreq->ddp_mo = htonl(wqe->processed);
0483                 rreq->sink_stag = htonl(wqe->sqe.rkey);
0484                 rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
0485                 rreq->read_size = htonl(wqe->sqe.sge[0].length);
0486                 rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
0487                 rreq->source_to =
0488                     cpu_to_be64(wqe->sqe.sge[0].laddr);
0489 
0490                 iov[1].iov_base = rreq;
0491                 iov[1].iov_len = sizeof(*rreq);
0492 
0493                 rx_hdr = (union iwarp_hdr *)rreq;
0494             } else {
0495                 /* Take RDMAP/DDP information from
0496                  * current (failed) inbound frame.
0497                  */
0498                 iov[1].iov_base = rx_hdr;
0499 
0500                 if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
0501                     RDMAP_RDMA_READ_REQ)
0502                     iov[1].iov_len =
0503                         sizeof(struct iwarp_rdma_rreq);
0504                 else /* SEND type */
0505                     iov[1].iov_len =
0506                         sizeof(struct iwarp_send);
0507             }
0508         } else {
0509             /* Do not report DDP hdr information if packet
0510              * layout is unknown
0511              */
0512             if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
0513                 (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
0514                 break;
0515 
0516             iov[1].iov_base = rx_hdr;
0517 
0518             /* Only DDP frame will get attached */
0519             if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
0520                 iov[1].iov_len =
0521                     sizeof(struct iwarp_rdma_write);
0522             else
0523                 iov[1].iov_len = sizeof(struct iwarp_send);
0524 
0525             term->flag_m = 1;
0526             term->flag_d = 1;
0527         }
0528         term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
0529         break;
0530 
0531     case TERM_ERROR_LAYER_DDP:
0532         /* Report error encountered while DDP processing.
0533          * This can only happen as a result of inbound
0534          * DDP processing
0535          */
0536 
0537         /* Do not report DDP hdr information if packet
0538          * layout is unknown
0539          */
0540         if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
0541              (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
0542             ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
0543              (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
0544             break;
0545 
0546         iov[1].iov_base = rx_hdr;
0547 
0548         if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
0549             iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
0550         else
0551             iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
0552 
0553         term->flag_m = 1;
0554         term->flag_d = 1;
0555         break;
0556 
0557     default:
0558         break;
0559     }
0560     if (term->flag_m || term->flag_d || term->flag_r) {
0561         iov[2].iov_base = &crc;
0562         iov[2].iov_len = sizeof(crc);
0563         len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
0564         num_frags = 3;
0565     } else {
0566         iov[1].iov_base = &crc;
0567         iov[1].iov_len = sizeof(crc);
0568         len_terminate = sizeof(*term) + MPA_CRC_SIZE;
0569         num_frags = 2;
0570     }
0571 
0572     /* Adjust DDP Segment Length parameter, if valid */
0573     if (term->flag_m) {
0574         u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
0575         enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
0576 
0577         real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
0578         rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
0579     }
0580 
0581     term->ctrl.mpa_len =
0582         cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
0583     if (qp->tx_ctx.mpa_crc_hd) {
0584         crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
0585         if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
0586                     (u8 *)iov[0].iov_base,
0587                     iov[0].iov_len))
0588             goto out;
0589 
0590         if (num_frags == 3) {
0591             if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
0592                         (u8 *)iov[1].iov_base,
0593                         iov[1].iov_len))
0594                 goto out;
0595         }
0596         crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
0597     }
0598 
0599     rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
0600     siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
0601            rv == len_terminate ? "success" : "failure",
0602            __rdmap_term_layer(term), __rdmap_term_etype(term),
0603            __rdmap_term_ecode(term), rv);
0604 out:
0605     kfree(term);
0606     kfree(err_hdr);
0607 }
0608 
0609 /*
0610  * Handle all attrs other than state
0611  */
0612 static void siw_qp_modify_nonstate(struct siw_qp *qp,
0613                    struct siw_qp_attrs *attrs,
0614                    enum siw_qp_attr_mask mask)
0615 {
0616     if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
0617         if (attrs->flags & SIW_RDMA_BIND_ENABLED)
0618             qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
0619         else
0620             qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
0621 
0622         if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
0623             qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
0624         else
0625             qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
0626 
0627         if (attrs->flags & SIW_RDMA_READ_ENABLED)
0628             qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
0629         else
0630             qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
0631     }
0632 }
0633 
0634 static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
0635                       struct siw_qp_attrs *attrs,
0636                       enum siw_qp_attr_mask mask)
0637 {
0638     int rv = 0;
0639 
0640     switch (attrs->state) {
0641     case SIW_QP_STATE_RTS:
0642         if (attrs->flags & SIW_MPA_CRC) {
0643             rv = siw_qp_enable_crc(qp);
0644             if (rv)
0645                 break;
0646         }
0647         if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
0648             siw_dbg_qp(qp, "no socket\n");
0649             rv = -EINVAL;
0650             break;
0651         }
0652         if (!(mask & SIW_QP_ATTR_MPA)) {
0653             siw_dbg_qp(qp, "no MPA\n");
0654             rv = -EINVAL;
0655             break;
0656         }
0657         /*
0658          * Initialize iWARP TX state
0659          */
0660         qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
0661         qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
0662         qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
0663 
0664         /*
0665          * Initialize iWARP RX state
0666          */
0667         qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
0668         qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
0669         qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
0670 
0671         /*
0672          * init IRD free queue, caller has already checked
0673          * limits.
0674          */
0675         rv = siw_qp_readq_init(qp, attrs->irq_size,
0676                        attrs->orq_size);
0677         if (rv)
0678             break;
0679 
0680         qp->attrs.sk = attrs->sk;
0681         qp->attrs.state = SIW_QP_STATE_RTS;
0682 
0683         siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
0684                attrs->flags & SIW_MPA_CRC ? "y" : "n",
0685                qp->attrs.orq_size, qp->attrs.irq_size);
0686         break;
0687 
0688     case SIW_QP_STATE_ERROR:
0689         siw_rq_flush(qp);
0690         qp->attrs.state = SIW_QP_STATE_ERROR;
0691         if (qp->cep) {
0692             siw_cep_put(qp->cep);
0693             qp->cep = NULL;
0694         }
0695         break;
0696 
0697     default:
0698         break;
0699     }
0700     return rv;
0701 }
0702 
0703 static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
0704                      struct siw_qp_attrs *attrs)
0705 {
0706     int drop_conn = 0;
0707 
0708     switch (attrs->state) {
0709     case SIW_QP_STATE_CLOSING:
0710         /*
0711          * Verbs: move to IDLE if SQ and ORQ are empty.
0712          * Move to ERROR otherwise. But first of all we must
0713          * close the connection. So we keep CLOSING or ERROR
0714          * as a transient state, schedule connection drop work
0715          * and wait for the socket state change upcall to
0716          * come back closed.
0717          */
0718         if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
0719             qp->attrs.state = SIW_QP_STATE_CLOSING;
0720         } else {
0721             qp->attrs.state = SIW_QP_STATE_ERROR;
0722             siw_sq_flush(qp);
0723         }
0724         siw_rq_flush(qp);
0725 
0726         drop_conn = 1;
0727         break;
0728 
0729     case SIW_QP_STATE_TERMINATE:
0730         qp->attrs.state = SIW_QP_STATE_TERMINATE;
0731 
0732         siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
0733                    RDMAP_ETYPE_CATASTROPHIC,
0734                    RDMAP_ECODE_UNSPECIFIED, 1);
0735         drop_conn = 1;
0736         break;
0737 
0738     case SIW_QP_STATE_ERROR:
0739         /*
0740          * This is an emergency close.
0741          *
0742          * Any in progress transmit operation will get
0743          * cancelled.
0744          * This will likely result in a protocol failure,
0745          * if a TX operation is in transit. The caller
0746          * could unconditional wait to give the current
0747          * operation a chance to complete.
0748          * Esp., how to handle the non-empty IRQ case?
0749          * The peer was asking for data transfer at a valid
0750          * point in time.
0751          */
0752         siw_sq_flush(qp);
0753         siw_rq_flush(qp);
0754         qp->attrs.state = SIW_QP_STATE_ERROR;
0755         drop_conn = 1;
0756         break;
0757 
0758     default:
0759         break;
0760     }
0761     return drop_conn;
0762 }
0763 
0764 static void siw_qp_nextstate_from_term(struct siw_qp *qp,
0765                        struct siw_qp_attrs *attrs)
0766 {
0767     switch (attrs->state) {
0768     case SIW_QP_STATE_ERROR:
0769         siw_rq_flush(qp);
0770         qp->attrs.state = SIW_QP_STATE_ERROR;
0771 
0772         if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
0773             siw_sq_flush(qp);
0774         break;
0775 
0776     default:
0777         break;
0778     }
0779 }
0780 
0781 static int siw_qp_nextstate_from_close(struct siw_qp *qp,
0782                        struct siw_qp_attrs *attrs)
0783 {
0784     int rv = 0;
0785 
0786     switch (attrs->state) {
0787     case SIW_QP_STATE_IDLE:
0788         WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
0789         qp->attrs.state = SIW_QP_STATE_IDLE;
0790         break;
0791 
0792     case SIW_QP_STATE_CLOSING:
0793         /*
0794          * The LLP may already moved the QP to closing
0795          * due to graceful peer close init
0796          */
0797         break;
0798 
0799     case SIW_QP_STATE_ERROR:
0800         /*
0801          * QP was moved to CLOSING by LLP event
0802          * not yet seen by user.
0803          */
0804         qp->attrs.state = SIW_QP_STATE_ERROR;
0805 
0806         if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
0807             siw_sq_flush(qp);
0808 
0809         siw_rq_flush(qp);
0810         break;
0811 
0812     default:
0813         siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
0814                siw_qp_state_to_string[qp->attrs.state],
0815                siw_qp_state_to_string[attrs->state]);
0816 
0817         rv = -ECONNABORTED;
0818     }
0819     return rv;
0820 }
0821 
0822 /*
0823  * Caller must hold qp->state_lock
0824  */
0825 int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
0826           enum siw_qp_attr_mask mask)
0827 {
0828     int drop_conn = 0, rv = 0;
0829 
0830     if (!mask)
0831         return 0;
0832 
0833     siw_dbg_qp(qp, "state: %s => %s\n",
0834            siw_qp_state_to_string[qp->attrs.state],
0835            siw_qp_state_to_string[attrs->state]);
0836 
0837     if (mask != SIW_QP_ATTR_STATE)
0838         siw_qp_modify_nonstate(qp, attrs, mask);
0839 
0840     if (!(mask & SIW_QP_ATTR_STATE))
0841         return 0;
0842 
0843     switch (qp->attrs.state) {
0844     case SIW_QP_STATE_IDLE:
0845     case SIW_QP_STATE_RTR:
0846         rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
0847         break;
0848 
0849     case SIW_QP_STATE_RTS:
0850         drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
0851         break;
0852 
0853     case SIW_QP_STATE_TERMINATE:
0854         siw_qp_nextstate_from_term(qp, attrs);
0855         break;
0856 
0857     case SIW_QP_STATE_CLOSING:
0858         siw_qp_nextstate_from_close(qp, attrs);
0859         break;
0860     default:
0861         break;
0862     }
0863     if (drop_conn)
0864         siw_qp_cm_drop(qp, 0);
0865 
0866     return rv;
0867 }
0868 
0869 void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
0870 {
0871     rreq->id = sqe->id;
0872     rreq->opcode = sqe->opcode;
0873     rreq->sge[0].laddr = sqe->sge[0].laddr;
0874     rreq->sge[0].length = sqe->sge[0].length;
0875     rreq->sge[0].lkey = sqe->sge[0].lkey;
0876     rreq->sge[1].lkey = sqe->sge[1].lkey;
0877     rreq->flags = sqe->flags | SIW_WQE_VALID;
0878     rreq->num_sge = 1;
0879 }
0880 
0881 static int siw_activate_tx_from_sq(struct siw_qp *qp)
0882 {
0883     struct siw_sqe *sqe;
0884     struct siw_wqe *wqe = tx_wqe(qp);
0885     int rv = 1;
0886 
0887     sqe = sq_get_next(qp);
0888     if (!sqe)
0889         return 0;
0890 
0891     memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0892     wqe->wr_status = SIW_WR_QUEUED;
0893 
0894     /* First copy SQE to kernel private memory */
0895     memcpy(&wqe->sqe, sqe, sizeof(*sqe));
0896 
0897     if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
0898         rv = -EINVAL;
0899         goto out;
0900     }
0901     if (wqe->sqe.flags & SIW_WQE_INLINE) {
0902         if (wqe->sqe.opcode != SIW_OP_SEND &&
0903             wqe->sqe.opcode != SIW_OP_WRITE) {
0904             rv = -EINVAL;
0905             goto out;
0906         }
0907         if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
0908             rv = -EINVAL;
0909             goto out;
0910         }
0911         wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
0912         wqe->sqe.sge[0].lkey = 0;
0913         wqe->sqe.num_sge = 1;
0914     }
0915     if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
0916         /* A READ cannot be fenced */
0917         if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
0918                  wqe->sqe.opcode ==
0919                      SIW_OP_READ_LOCAL_INV)) {
0920             siw_dbg_qp(qp, "cannot fence read\n");
0921             rv = -EINVAL;
0922             goto out;
0923         }
0924         spin_lock(&qp->orq_lock);
0925 
0926         if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
0927             qp->tx_ctx.orq_fence = 1;
0928             rv = 0;
0929         }
0930         spin_unlock(&qp->orq_lock);
0931 
0932     } else if (wqe->sqe.opcode == SIW_OP_READ ||
0933            wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
0934         struct siw_sqe *rreq;
0935 
0936         if (unlikely(!qp->attrs.orq_size)) {
0937             /* We negotiated not to send READ req's */
0938             rv = -EINVAL;
0939             goto out;
0940         }
0941         wqe->sqe.num_sge = 1;
0942 
0943         spin_lock(&qp->orq_lock);
0944 
0945         rreq = orq_get_free(qp);
0946         if (rreq) {
0947             /*
0948              * Make an immediate copy in ORQ to be ready
0949              * to process loopback READ reply
0950              */
0951             siw_read_to_orq(rreq, &wqe->sqe);
0952             qp->orq_put++;
0953         } else {
0954             qp->tx_ctx.orq_fence = 1;
0955             rv = 0;
0956         }
0957         spin_unlock(&qp->orq_lock);
0958     }
0959 
0960     /* Clear SQE, can be re-used by application */
0961     smp_store_mb(sqe->flags, 0);
0962     qp->sq_get++;
0963 out:
0964     if (unlikely(rv < 0)) {
0965         siw_dbg_qp(qp, "error %d\n", rv);
0966         wqe->wr_status = SIW_WR_IDLE;
0967     }
0968     return rv;
0969 }
0970 
0971 /*
0972  * Must be called with SQ locked.
0973  * To avoid complete SQ starvation by constant inbound READ requests,
0974  * the active IRQ will not be served after qp->irq_burst, if the
0975  * SQ has pending work.
0976  */
0977 int siw_activate_tx(struct siw_qp *qp)
0978 {
0979     struct siw_sqe *irqe;
0980     struct siw_wqe *wqe = tx_wqe(qp);
0981 
0982     if (!qp->attrs.irq_size)
0983         return siw_activate_tx_from_sq(qp);
0984 
0985     irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
0986 
0987     if (!(irqe->flags & SIW_WQE_VALID))
0988         return siw_activate_tx_from_sq(qp);
0989 
0990     /*
0991      * Avoid local WQE processing starvation in case
0992      * of constant inbound READ request stream
0993      */
0994     if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
0995         qp->irq_burst = 0;
0996         return siw_activate_tx_from_sq(qp);
0997     }
0998     memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
0999     wqe->wr_status = SIW_WR_QUEUED;
1000 
1001     /* start READ RESPONSE */
1002     wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
1003     wqe->sqe.flags = 0;
1004     if (irqe->num_sge) {
1005         wqe->sqe.num_sge = 1;
1006         wqe->sqe.sge[0].length = irqe->sge[0].length;
1007         wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
1008         wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
1009     } else {
1010         wqe->sqe.num_sge = 0;
1011     }
1012 
1013     /* Retain original RREQ's message sequence number for
1014      * potential error reporting cases.
1015      */
1016     wqe->sqe.sge[1].length = irqe->sge[1].length;
1017 
1018     wqe->sqe.rkey = irqe->rkey;
1019     wqe->sqe.raddr = irqe->raddr;
1020 
1021     wqe->processed = 0;
1022     qp->irq_get++;
1023 
1024     /* mark current IRQ entry free */
1025     smp_store_mb(irqe->flags, 0);
1026 
1027     return 1;
1028 }
1029 
1030 /*
1031  * Check if current CQ state qualifies for calling CQ completion
1032  * handler. Must be called with CQ lock held.
1033  */
1034 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1035 {
1036     u32 cq_notify;
1037 
1038     if (!cq->base_cq.comp_handler)
1039         return false;
1040 
1041     /* Read application shared notification state */
1042     cq_notify = READ_ONCE(cq->notify->flags);
1043 
1044     if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1045         ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1046          (flags & SIW_WQE_SOLICITED))) {
1047         /*
1048          * CQ notification is one-shot: Since the
1049          * current CQE causes user notification,
1050          * the CQ gets dis-aremd and must be re-aremd
1051          * by the user for a new notification.
1052          */
1053         WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
1054 
1055         return true;
1056     }
1057     return false;
1058 }
1059 
1060 int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1061              enum siw_wc_status status)
1062 {
1063     struct siw_cq *cq = qp->scq;
1064     int rv = 0;
1065 
1066     if (cq) {
1067         u32 sqe_flags = sqe->flags;
1068         struct siw_cqe *cqe;
1069         u32 idx;
1070         unsigned long flags;
1071 
1072         spin_lock_irqsave(&cq->lock, flags);
1073 
1074         idx = cq->cq_put % cq->num_cqe;
1075         cqe = &cq->queue[idx];
1076 
1077         if (!READ_ONCE(cqe->flags)) {
1078             bool notify;
1079 
1080             cqe->id = sqe->id;
1081             cqe->opcode = sqe->opcode;
1082             cqe->status = status;
1083             cqe->imm_data = 0;
1084             cqe->bytes = bytes;
1085 
1086             if (rdma_is_kernel_res(&cq->base_cq.res))
1087                 cqe->base_qp = &qp->base_qp;
1088             else
1089                 cqe->qp_id = qp_id(qp);
1090 
1091             /* mark CQE valid for application */
1092             WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1093             /* recycle SQE */
1094             smp_store_mb(sqe->flags, 0);
1095 
1096             cq->cq_put++;
1097             notify = siw_cq_notify_now(cq, sqe_flags);
1098 
1099             spin_unlock_irqrestore(&cq->lock, flags);
1100 
1101             if (notify) {
1102                 siw_dbg_cq(cq, "Call completion handler\n");
1103                 cq->base_cq.comp_handler(&cq->base_cq,
1104                         cq->base_cq.cq_context);
1105             }
1106         } else {
1107             spin_unlock_irqrestore(&cq->lock, flags);
1108             rv = -ENOMEM;
1109             siw_cq_event(cq, IB_EVENT_CQ_ERR);
1110         }
1111     } else {
1112         /* recycle SQE */
1113         smp_store_mb(sqe->flags, 0);
1114     }
1115     return rv;
1116 }
1117 
1118 int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1119              u32 inval_stag, enum siw_wc_status status)
1120 {
1121     struct siw_cq *cq = qp->rcq;
1122     int rv = 0;
1123 
1124     if (cq) {
1125         struct siw_cqe *cqe;
1126         u32 idx;
1127         unsigned long flags;
1128 
1129         spin_lock_irqsave(&cq->lock, flags);
1130 
1131         idx = cq->cq_put % cq->num_cqe;
1132         cqe = &cq->queue[idx];
1133 
1134         if (!READ_ONCE(cqe->flags)) {
1135             bool notify;
1136             u8 cqe_flags = SIW_WQE_VALID;
1137 
1138             cqe->id = rqe->id;
1139             cqe->opcode = SIW_OP_RECEIVE;
1140             cqe->status = status;
1141             cqe->imm_data = 0;
1142             cqe->bytes = bytes;
1143 
1144             if (rdma_is_kernel_res(&cq->base_cq.res)) {
1145                 cqe->base_qp = &qp->base_qp;
1146                 if (inval_stag) {
1147                     cqe_flags |= SIW_WQE_REM_INVAL;
1148                     cqe->inval_stag = inval_stag;
1149                 }
1150             } else {
1151                 cqe->qp_id = qp_id(qp);
1152             }
1153             /* mark CQE valid for application */
1154             WRITE_ONCE(cqe->flags, cqe_flags);
1155             /* recycle RQE */
1156             smp_store_mb(rqe->flags, 0);
1157 
1158             cq->cq_put++;
1159             notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1160 
1161             spin_unlock_irqrestore(&cq->lock, flags);
1162 
1163             if (notify) {
1164                 siw_dbg_cq(cq, "Call completion handler\n");
1165                 cq->base_cq.comp_handler(&cq->base_cq,
1166                         cq->base_cq.cq_context);
1167             }
1168         } else {
1169             spin_unlock_irqrestore(&cq->lock, flags);
1170             rv = -ENOMEM;
1171             siw_cq_event(cq, IB_EVENT_CQ_ERR);
1172         }
1173     } else {
1174         /* recycle RQE */
1175         smp_store_mb(rqe->flags, 0);
1176     }
1177     return rv;
1178 }
1179 
1180 /*
1181  * siw_sq_flush()
1182  *
1183  * Flush SQ and ORRQ entries to CQ.
1184  *
1185  * Must be called with QP state write lock held.
1186  * Therefore, SQ and ORQ lock must not be taken.
1187  */
1188 void siw_sq_flush(struct siw_qp *qp)
1189 {
1190     struct siw_sqe *sqe;
1191     struct siw_wqe *wqe = tx_wqe(qp);
1192     int async_event = 0;
1193 
1194     /*
1195      * Start with completing any work currently on the ORQ
1196      */
1197     while (qp->attrs.orq_size) {
1198         sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1199         if (!READ_ONCE(sqe->flags))
1200             break;
1201 
1202         if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1203             break;
1204 
1205         WRITE_ONCE(sqe->flags, 0);
1206         qp->orq_get++;
1207     }
1208     /*
1209      * Flush an in-progress WQE if present
1210      */
1211     if (wqe->wr_status != SIW_WR_IDLE) {
1212         siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1213                tx_type(wqe), wqe->wr_status);
1214 
1215         siw_wqe_put_mem(wqe, tx_type(wqe));
1216 
1217         if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1218             ((tx_type(wqe) != SIW_OP_READ &&
1219               tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1220              wqe->wr_status == SIW_WR_QUEUED))
1221             /*
1222              * An in-progress Read Request is already in
1223              * the ORQ
1224              */
1225             siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1226                      SIW_WC_WR_FLUSH_ERR);
1227 
1228         wqe->wr_status = SIW_WR_IDLE;
1229     }
1230     /*
1231      * Flush the Send Queue
1232      */
1233     while (qp->attrs.sq_size) {
1234         sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1235         if (!READ_ONCE(sqe->flags))
1236             break;
1237 
1238         async_event = 1;
1239         if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1240             /*
1241              * Shall IB_EVENT_SQ_DRAINED be supressed if work
1242              * completion fails?
1243              */
1244             break;
1245 
1246         WRITE_ONCE(sqe->flags, 0);
1247         qp->sq_get++;
1248     }
1249     if (async_event)
1250         siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1251 }
1252 
1253 /*
1254  * siw_rq_flush()
1255  *
1256  * Flush recv queue entries to CQ. Also
1257  * takes care of pending active tagged and untagged
1258  * inbound transfers, which have target memory
1259  * referenced.
1260  *
1261  * Must be called with QP state write lock held.
1262  * Therefore, RQ lock must not be taken.
1263  */
1264 void siw_rq_flush(struct siw_qp *qp)
1265 {
1266     struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1267 
1268     /*
1269      * Flush an in-progress untagged operation if present
1270      */
1271     if (wqe->wr_status != SIW_WR_IDLE) {
1272         siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1273                rx_type(wqe), wqe->wr_status);
1274 
1275         siw_wqe_put_mem(wqe, rx_type(wqe));
1276 
1277         if (rx_type(wqe) == SIW_OP_RECEIVE) {
1278             siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1279                      0, SIW_WC_WR_FLUSH_ERR);
1280         } else if (rx_type(wqe) != SIW_OP_READ &&
1281                rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1282                rx_type(wqe) != SIW_OP_WRITE) {
1283             siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1284         }
1285         wqe->wr_status = SIW_WR_IDLE;
1286     }
1287     wqe = &qp->rx_tagged.wqe_active;
1288 
1289     if (wqe->wr_status != SIW_WR_IDLE) {
1290         siw_wqe_put_mem(wqe, rx_type(wqe));
1291         wqe->wr_status = SIW_WR_IDLE;
1292     }
1293     /*
1294      * Flush the Receive Queue
1295      */
1296     while (qp->attrs.rq_size) {
1297         struct siw_rqe *rqe =
1298             &qp->recvq[qp->rq_get % qp->attrs.rq_size];
1299 
1300         if (!READ_ONCE(rqe->flags))
1301             break;
1302 
1303         if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1304             break;
1305 
1306         WRITE_ONCE(rqe->flags, 0);
1307         qp->rq_get++;
1308     }
1309 }
1310 
1311 int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1312 {
1313     int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
1314               GFP_KERNEL);
1315 
1316     if (!rv) {
1317         kref_init(&qp->ref);
1318         qp->sdev = sdev;
1319         siw_dbg_qp(qp, "new QP\n");
1320     }
1321     return rv;
1322 }
1323 
1324 void siw_free_qp(struct kref *ref)
1325 {
1326     struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1327     struct siw_device *sdev = qp->sdev;
1328     unsigned long flags;
1329 
1330     if (qp->cep)
1331         siw_cep_put(qp->cep);
1332 
1333     found = xa_erase(&sdev->qp_xa, qp_id(qp));
1334     WARN_ON(found != qp);
1335     spin_lock_irqsave(&sdev->lock, flags);
1336     list_del(&qp->devq);
1337     spin_unlock_irqrestore(&sdev->lock, flags);
1338 
1339     vfree(qp->sendq);
1340     vfree(qp->recvq);
1341     vfree(qp->irq);
1342     vfree(qp->orq);
1343 
1344     siw_put_tx_cpu(qp->tx_cpu);
1345 
1346     atomic_dec(&sdev->num_qp);
1347 }