Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
0002 
0003 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
0004 /* Copyright (c) 2008-2019, IBM Corporation */
0005 
0006 #include <linux/errno.h>
0007 #include <linux/types.h>
0008 #include <linux/net.h>
0009 #include <linux/scatterlist.h>
0010 #include <linux/highmem.h>
0011 
0012 #include <rdma/iw_cm.h>
0013 #include <rdma/ib_verbs.h>
0014 
0015 #include "siw.h"
0016 #include "siw_verbs.h"
0017 #include "siw_mem.h"
0018 
0019 /*
0020  * siw_rx_umem()
0021  *
0022  * Receive data of @len into target referenced by @dest_addr.
0023  *
0024  * @srx:    Receive Context
0025  * @umem:   siw representation of target memory
0026  * @dest_addr:  user virtual address
0027  * @len:    number of bytes to place
0028  */
0029 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
0030                u64 dest_addr, int len)
0031 {
0032     int copied = 0;
0033 
0034     while (len) {
0035         struct page *p;
0036         int pg_off, bytes, rv;
0037         void *dest;
0038 
0039         p = siw_get_upage(umem, dest_addr);
0040         if (unlikely(!p)) {
0041             pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
0042                 __func__, qp_id(rx_qp(srx)),
0043                 (void *)(uintptr_t)dest_addr,
0044                 (void *)(uintptr_t)umem->fp_addr);
0045             /* siw internal error */
0046             srx->skb_copied += copied;
0047             srx->skb_new -= copied;
0048 
0049             return -EFAULT;
0050         }
0051         pg_off = dest_addr & ~PAGE_MASK;
0052         bytes = min(len, (int)PAGE_SIZE - pg_off);
0053 
0054         siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
0055 
0056         dest = kmap_atomic(p);
0057         rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
0058                    bytes);
0059 
0060         if (unlikely(rv)) {
0061             kunmap_atomic(dest);
0062             srx->skb_copied += copied;
0063             srx->skb_new -= copied;
0064 
0065             pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
0066                 qp_id(rx_qp(srx)), __func__, len, p, rv);
0067 
0068             return -EFAULT;
0069         }
0070         if (srx->mpa_crc_hd) {
0071             if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
0072                 crypto_shash_update(srx->mpa_crc_hd,
0073                     (u8 *)(dest + pg_off), bytes);
0074                 kunmap_atomic(dest);
0075             } else {
0076                 kunmap_atomic(dest);
0077                 /*
0078                  * Do CRC on original, not target buffer.
0079                  * Some user land applications may
0080                  * concurrently write the target buffer,
0081                  * which would yield a broken CRC.
0082                  * Walking the skb twice is very ineffcient.
0083                  * Folding the CRC into skb_copy_bits()
0084                  * would be much better, but is currently
0085                  * not supported.
0086                  */
0087                 siw_crc_skb(srx, bytes);
0088             }
0089         } else {
0090             kunmap_atomic(dest);
0091         }
0092         srx->skb_offset += bytes;
0093         copied += bytes;
0094         len -= bytes;
0095         dest_addr += bytes;
0096         pg_off = 0;
0097     }
0098     srx->skb_copied += copied;
0099     srx->skb_new -= copied;
0100 
0101     return copied;
0102 }
0103 
0104 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
0105 {
0106     int rv;
0107 
0108     siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
0109 
0110     rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
0111     if (unlikely(rv)) {
0112         pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
0113             qp_id(rx_qp(srx)), __func__, len, kva, rv);
0114 
0115         return rv;
0116     }
0117     if (srx->mpa_crc_hd)
0118         crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
0119 
0120     srx->skb_offset += len;
0121     srx->skb_copied += len;
0122     srx->skb_new -= len;
0123 
0124     return len;
0125 }
0126 
0127 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
0128               struct siw_mem *mem, u64 addr, int len)
0129 {
0130     struct siw_pbl *pbl = mem->pbl;
0131     u64 offset = addr - mem->va;
0132     int copied = 0;
0133 
0134     while (len) {
0135         int bytes;
0136         dma_addr_t buf_addr =
0137             siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
0138         if (!buf_addr)
0139             break;
0140 
0141         bytes = min(bytes, len);
0142         if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) ==
0143             bytes) {
0144             copied += bytes;
0145             offset += bytes;
0146             len -= bytes;
0147         } else {
0148             break;
0149         }
0150     }
0151     return copied;
0152 }
0153 
0154 /*
0155  * siw_rresp_check_ntoh()
0156  *
0157  * Check incoming RRESP fragment header against expected
0158  * header values and update expected values for potential next
0159  * fragment.
0160  *
0161  * NOTE: This function must be called only if a RRESP DDP segment
0162  *       starts but not for fragmented consecutive pieces of an
0163  *       already started DDP segment.
0164  */
0165 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
0166                 struct siw_rx_fpdu *frx)
0167 {
0168     struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
0169     struct siw_wqe *wqe = &frx->wqe_active;
0170     enum ddp_ecode ecode;
0171 
0172     u32 sink_stag = be32_to_cpu(rresp->sink_stag);
0173     u64 sink_to = be64_to_cpu(rresp->sink_to);
0174 
0175     if (frx->first_ddp_seg) {
0176         srx->ddp_stag = wqe->sqe.sge[0].lkey;
0177         srx->ddp_to = wqe->sqe.sge[0].laddr;
0178         frx->pbl_idx = 0;
0179     }
0180     /* Below checks extend beyond the semantics of DDP, and
0181      * into RDMAP:
0182      * We check if the read response matches exactly the
0183      * read request which was send to the remote peer to
0184      * trigger this read response. RFC5040/5041 do not
0185      * always have a proper error code for the detected
0186      * error cases. We choose 'base or bounds error' for
0187      * cases where the inbound STag is valid, but offset
0188      * or length do not match our response receive state.
0189      */
0190     if (unlikely(srx->ddp_stag != sink_stag)) {
0191         pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
0192             qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
0193         ecode = DDP_ECODE_T_INVALID_STAG;
0194         goto error;
0195     }
0196     if (unlikely(srx->ddp_to != sink_to)) {
0197         pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
0198             qp_id(rx_qp(srx)), (unsigned long long)sink_to,
0199             (unsigned long long)srx->ddp_to);
0200         ecode = DDP_ECODE_T_BASE_BOUNDS;
0201         goto error;
0202     }
0203     if (unlikely(!frx->more_ddp_segs &&
0204              (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
0205         pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
0206             qp_id(rx_qp(srx)),
0207             wqe->processed + srx->fpdu_part_rem, wqe->bytes);
0208         ecode = DDP_ECODE_T_BASE_BOUNDS;
0209         goto error;
0210     }
0211     return 0;
0212 error:
0213     siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
0214                DDP_ETYPE_TAGGED_BUF, ecode, 0);
0215     return -EINVAL;
0216 }
0217 
0218 /*
0219  * siw_write_check_ntoh()
0220  *
0221  * Check incoming WRITE fragment header against expected
0222  * header values and update expected values for potential next
0223  * fragment
0224  *
0225  * NOTE: This function must be called only if a WRITE DDP segment
0226  *       starts but not for fragmented consecutive pieces of an
0227  *       already started DDP segment.
0228  */
0229 static int siw_write_check_ntoh(struct siw_rx_stream *srx,
0230                 struct siw_rx_fpdu *frx)
0231 {
0232     struct iwarp_rdma_write *write = &srx->hdr.rwrite;
0233     enum ddp_ecode ecode;
0234 
0235     u32 sink_stag = be32_to_cpu(write->sink_stag);
0236     u64 sink_to = be64_to_cpu(write->sink_to);
0237 
0238     if (frx->first_ddp_seg) {
0239         srx->ddp_stag = sink_stag;
0240         srx->ddp_to = sink_to;
0241         frx->pbl_idx = 0;
0242     } else {
0243         if (unlikely(srx->ddp_stag != sink_stag)) {
0244             pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
0245                 qp_id(rx_qp(srx)), sink_stag,
0246                 srx->ddp_stag);
0247             ecode = DDP_ECODE_T_INVALID_STAG;
0248             goto error;
0249         }
0250         if (unlikely(srx->ddp_to != sink_to)) {
0251             pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
0252                 qp_id(rx_qp(srx)),
0253                 (unsigned long long)sink_to,
0254                 (unsigned long long)srx->ddp_to);
0255             ecode = DDP_ECODE_T_BASE_BOUNDS;
0256             goto error;
0257         }
0258     }
0259     return 0;
0260 error:
0261     siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
0262                DDP_ETYPE_TAGGED_BUF, ecode, 0);
0263     return -EINVAL;
0264 }
0265 
0266 /*
0267  * siw_send_check_ntoh()
0268  *
0269  * Check incoming SEND fragment header against expected
0270  * header values and update expected MSN if no next
0271  * fragment expected
0272  *
0273  * NOTE: This function must be called only if a SEND DDP segment
0274  *       starts but not for fragmented consecutive pieces of an
0275  *       already started DDP segment.
0276  */
0277 static int siw_send_check_ntoh(struct siw_rx_stream *srx,
0278                    struct siw_rx_fpdu *frx)
0279 {
0280     struct iwarp_send_inv *send = &srx->hdr.send_inv;
0281     struct siw_wqe *wqe = &frx->wqe_active;
0282     enum ddp_ecode ecode;
0283 
0284     u32 ddp_msn = be32_to_cpu(send->ddp_msn);
0285     u32 ddp_mo = be32_to_cpu(send->ddp_mo);
0286     u32 ddp_qn = be32_to_cpu(send->ddp_qn);
0287 
0288     if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
0289         pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
0290             qp_id(rx_qp(srx)), ddp_qn);
0291         ecode = DDP_ECODE_UT_INVALID_QN;
0292         goto error;
0293     }
0294     if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
0295         pr_warn("siw: [QP %u]: send msn: %u != %u\n",
0296             qp_id(rx_qp(srx)), ddp_msn,
0297             srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
0298         ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
0299         goto error;
0300     }
0301     if (unlikely(ddp_mo != wqe->processed)) {
0302         pr_warn("siw: [QP %u], send mo: %u != %u\n",
0303             qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
0304         ecode = DDP_ECODE_UT_INVALID_MO;
0305         goto error;
0306     }
0307     if (frx->first_ddp_seg) {
0308         /* initialize user memory write position */
0309         frx->sge_idx = 0;
0310         frx->sge_off = 0;
0311         frx->pbl_idx = 0;
0312 
0313         /* only valid for SEND_INV and SEND_SE_INV operations */
0314         srx->inval_stag = be32_to_cpu(send->inval_stag);
0315     }
0316     if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
0317         siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
0318                wqe->bytes, wqe->processed, srx->fpdu_part_rem);
0319         wqe->wc_status = SIW_WC_LOC_LEN_ERR;
0320         ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
0321         goto error;
0322     }
0323     return 0;
0324 error:
0325     siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
0326                DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
0327     return -EINVAL;
0328 }
0329 
0330 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
0331 {
0332     struct siw_rqe *rqe;
0333     struct siw_srq *srq;
0334     struct siw_wqe *wqe = NULL;
0335     bool srq_event = false;
0336     unsigned long flags;
0337 
0338     srq = qp->srq;
0339     if (srq) {
0340         spin_lock_irqsave(&srq->lock, flags);
0341         if (unlikely(!srq->num_rqe))
0342             goto out;
0343 
0344         rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
0345     } else {
0346         if (unlikely(!qp->recvq))
0347             goto out;
0348 
0349         rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
0350     }
0351     if (likely(rqe->flags == SIW_WQE_VALID)) {
0352         int num_sge = rqe->num_sge;
0353 
0354         if (likely(num_sge <= SIW_MAX_SGE)) {
0355             int i = 0;
0356 
0357             wqe = rx_wqe(&qp->rx_untagged);
0358             rx_type(wqe) = SIW_OP_RECEIVE;
0359             wqe->wr_status = SIW_WR_INPROGRESS;
0360             wqe->bytes = 0;
0361             wqe->processed = 0;
0362 
0363             wqe->rqe.id = rqe->id;
0364             wqe->rqe.num_sge = num_sge;
0365 
0366             while (i < num_sge) {
0367                 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
0368                 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
0369                 wqe->rqe.sge[i].length = rqe->sge[i].length;
0370                 wqe->bytes += wqe->rqe.sge[i].length;
0371                 wqe->mem[i] = NULL;
0372                 i++;
0373             }
0374             /* can be re-used by appl */
0375             smp_store_mb(rqe->flags, 0);
0376         } else {
0377             siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
0378             if (srq)
0379                 spin_unlock_irqrestore(&srq->lock, flags);
0380             return NULL;
0381         }
0382         if (!srq) {
0383             qp->rq_get++;
0384         } else {
0385             if (srq->armed) {
0386                 /* Test SRQ limit */
0387                 u32 off = (srq->rq_get + srq->limit) %
0388                       srq->num_rqe;
0389                 struct siw_rqe *rqe2 = &srq->recvq[off];
0390 
0391                 if (!(rqe2->flags & SIW_WQE_VALID)) {
0392                     srq->armed = false;
0393                     srq_event = true;
0394                 }
0395             }
0396             srq->rq_get++;
0397         }
0398     }
0399 out:
0400     if (srq) {
0401         spin_unlock_irqrestore(&srq->lock, flags);
0402         if (srq_event)
0403             siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
0404     }
0405     return wqe;
0406 }
0407 
0408 /*
0409  * siw_proc_send:
0410  *
0411  * Process one incoming SEND and place data into memory referenced by
0412  * receive wqe.
0413  *
0414  * Function supports partially received sends (suspending/resuming
0415  * current receive wqe processing)
0416  *
0417  * return value:
0418  *  0:       reached the end of a DDP segment
0419  *  -EAGAIN: to be called again to finish the DDP segment
0420  */
0421 int siw_proc_send(struct siw_qp *qp)
0422 {
0423     struct siw_rx_stream *srx = &qp->rx_stream;
0424     struct siw_rx_fpdu *frx = &qp->rx_untagged;
0425     struct siw_wqe *wqe;
0426     u32 data_bytes; /* all data bytes available */
0427     u32 rcvd_bytes; /* sum of data bytes rcvd */
0428     int rv = 0;
0429 
0430     if (frx->first_ddp_seg) {
0431         wqe = siw_rqe_get(qp);
0432         if (unlikely(!wqe)) {
0433             siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0434                        DDP_ETYPE_UNTAGGED_BUF,
0435                        DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
0436             return -ENOENT;
0437         }
0438     } else {
0439         wqe = rx_wqe(frx);
0440     }
0441     if (srx->state == SIW_GET_DATA_START) {
0442         rv = siw_send_check_ntoh(srx, frx);
0443         if (unlikely(rv)) {
0444             siw_qp_event(qp, IB_EVENT_QP_FATAL);
0445             return rv;
0446         }
0447         if (!srx->fpdu_part_rem) /* zero length SEND */
0448             return 0;
0449     }
0450     data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
0451     rcvd_bytes = 0;
0452 
0453     /* A zero length SEND will skip below loop */
0454     while (data_bytes) {
0455         struct ib_pd *pd;
0456         struct siw_mem **mem, *mem_p;
0457         struct siw_sge *sge;
0458         u32 sge_bytes; /* data bytes avail for SGE */
0459 
0460         sge = &wqe->rqe.sge[frx->sge_idx];
0461 
0462         if (!sge->length) {
0463             /* just skip empty sge's */
0464             frx->sge_idx++;
0465             frx->sge_off = 0;
0466             frx->pbl_idx = 0;
0467             continue;
0468         }
0469         sge_bytes = min(data_bytes, sge->length - frx->sge_off);
0470         mem = &wqe->mem[frx->sge_idx];
0471 
0472         /*
0473          * check with QP's PD if no SRQ present, SRQ's PD otherwise
0474          */
0475         pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
0476 
0477         rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
0478                    frx->sge_off, sge_bytes);
0479         if (unlikely(rv)) {
0480             siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0481                        DDP_ETYPE_CATASTROPHIC,
0482                        DDP_ECODE_CATASTROPHIC, 0);
0483 
0484             siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
0485             break;
0486         }
0487         mem_p = *mem;
0488         if (mem_p->mem_obj == NULL)
0489             rv = siw_rx_kva(srx,
0490                 (void *)(uintptr_t)(sge->laddr + frx->sge_off),
0491                 sge_bytes);
0492         else if (!mem_p->is_pbl)
0493             rv = siw_rx_umem(srx, mem_p->umem,
0494                      sge->laddr + frx->sge_off, sge_bytes);
0495         else
0496             rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
0497                     sge->laddr + frx->sge_off, sge_bytes);
0498 
0499         if (unlikely(rv != sge_bytes)) {
0500             wqe->processed += rcvd_bytes;
0501 
0502             siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0503                        DDP_ETYPE_CATASTROPHIC,
0504                        DDP_ECODE_CATASTROPHIC, 0);
0505             return -EINVAL;
0506         }
0507         frx->sge_off += rv;
0508 
0509         if (frx->sge_off == sge->length) {
0510             frx->sge_idx++;
0511             frx->sge_off = 0;
0512             frx->pbl_idx = 0;
0513         }
0514         data_bytes -= rv;
0515         rcvd_bytes += rv;
0516 
0517         srx->fpdu_part_rem -= rv;
0518         srx->fpdu_part_rcvd += rv;
0519     }
0520     wqe->processed += rcvd_bytes;
0521 
0522     if (!srx->fpdu_part_rem)
0523         return 0;
0524 
0525     return (rv < 0) ? rv : -EAGAIN;
0526 }
0527 
0528 /*
0529  * siw_proc_write:
0530  *
0531  * Place incoming WRITE after referencing and checking target buffer
0532 
0533  * Function supports partially received WRITEs (suspending/resuming
0534  * current receive processing)
0535  *
0536  * return value:
0537  *  0:       reached the end of a DDP segment
0538  *  -EAGAIN: to be called again to finish the DDP segment
0539  */
0540 int siw_proc_write(struct siw_qp *qp)
0541 {
0542     struct siw_rx_stream *srx = &qp->rx_stream;
0543     struct siw_rx_fpdu *frx = &qp->rx_tagged;
0544     struct siw_mem *mem;
0545     int bytes, rv;
0546 
0547     if (srx->state == SIW_GET_DATA_START) {
0548         if (!srx->fpdu_part_rem) /* zero length WRITE */
0549             return 0;
0550 
0551         rv = siw_write_check_ntoh(srx, frx);
0552         if (unlikely(rv)) {
0553             siw_qp_event(qp, IB_EVENT_QP_FATAL);
0554             return rv;
0555         }
0556     }
0557     bytes = min(srx->fpdu_part_rem, srx->skb_new);
0558 
0559     if (frx->first_ddp_seg) {
0560         struct siw_wqe *wqe = rx_wqe(frx);
0561 
0562         rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
0563         if (unlikely(!rx_mem(frx))) {
0564             siw_dbg_qp(qp,
0565                    "sink stag not found/invalid, stag 0x%08x\n",
0566                    srx->ddp_stag);
0567 
0568             siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0569                        DDP_ETYPE_TAGGED_BUF,
0570                        DDP_ECODE_T_INVALID_STAG, 0);
0571             return -EINVAL;
0572         }
0573         wqe->rqe.num_sge = 1;
0574         rx_type(wqe) = SIW_OP_WRITE;
0575         wqe->wr_status = SIW_WR_INPROGRESS;
0576     }
0577     mem = rx_mem(frx);
0578 
0579     /*
0580      * Check if application re-registered memory with different
0581      * key field of STag.
0582      */
0583     if (unlikely(mem->stag != srx->ddp_stag)) {
0584         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0585                    DDP_ETYPE_TAGGED_BUF,
0586                    DDP_ECODE_T_INVALID_STAG, 0);
0587         return -EINVAL;
0588     }
0589     rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
0590                IB_ACCESS_REMOTE_WRITE, bytes);
0591     if (unlikely(rv)) {
0592         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0593                    DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
0594                    0);
0595 
0596         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
0597 
0598         return -EINVAL;
0599     }
0600 
0601     if (mem->mem_obj == NULL)
0602         rv = siw_rx_kva(srx,
0603             (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
0604             bytes);
0605     else if (!mem->is_pbl)
0606         rv = siw_rx_umem(srx, mem->umem,
0607                  srx->ddp_to + srx->fpdu_part_rcvd, bytes);
0608     else
0609         rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
0610                 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
0611 
0612     if (unlikely(rv != bytes)) {
0613         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0614                    DDP_ETYPE_CATASTROPHIC,
0615                    DDP_ECODE_CATASTROPHIC, 0);
0616         return -EINVAL;
0617     }
0618     srx->fpdu_part_rem -= rv;
0619     srx->fpdu_part_rcvd += rv;
0620 
0621     if (!srx->fpdu_part_rem) {
0622         srx->ddp_to += srx->fpdu_part_rcvd;
0623         return 0;
0624     }
0625     return -EAGAIN;
0626 }
0627 
0628 /*
0629  * Inbound RREQ's cannot carry user data.
0630  */
0631 int siw_proc_rreq(struct siw_qp *qp)
0632 {
0633     struct siw_rx_stream *srx = &qp->rx_stream;
0634 
0635     if (!srx->fpdu_part_rem)
0636         return 0;
0637 
0638     pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
0639         be16_to_cpu(srx->hdr.ctrl.mpa_len));
0640 
0641     return -EPROTO;
0642 }
0643 
0644 /*
0645  * siw_init_rresp:
0646  *
0647  * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
0648  * Put it at the tail of the IRQ, if there is another WQE currently in
0649  * transmit processing. If not, make it the current WQE to be processed
0650  * and schedule transmit processing.
0651  *
0652  * Can be called from softirq context and from process
0653  * context (RREAD socket loopback case!)
0654  *
0655  * return value:
0656  *  0:      success,
0657  *      failure code otherwise
0658  */
0659 
0660 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
0661 {
0662     struct siw_wqe *tx_work = tx_wqe(qp);
0663     struct siw_sqe *resp;
0664 
0665     uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
0666          laddr = be64_to_cpu(srx->hdr.rreq.source_to);
0667     uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
0668          lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
0669          rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
0670          msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
0671 
0672     int run_sq = 1, rv = 0;
0673     unsigned long flags;
0674 
0675     if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
0676         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0677                    DDP_ETYPE_UNTAGGED_BUF,
0678                    DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
0679         return -EPROTO;
0680     }
0681     spin_lock_irqsave(&qp->sq_lock, flags);
0682 
0683     if (unlikely(!qp->attrs.irq_size)) {
0684         run_sq = 0;
0685         goto error_irq;
0686     }
0687     if (tx_work->wr_status == SIW_WR_IDLE) {
0688         /*
0689          * immediately schedule READ response w/o
0690          * consuming IRQ entry: IRQ must be empty.
0691          */
0692         tx_work->processed = 0;
0693         tx_work->mem[0] = NULL;
0694         tx_work->wr_status = SIW_WR_QUEUED;
0695         resp = &tx_work->sqe;
0696     } else {
0697         resp = irq_alloc_free(qp);
0698         run_sq = 0;
0699     }
0700     if (likely(resp)) {
0701         resp->opcode = SIW_OP_READ_RESPONSE;
0702 
0703         resp->sge[0].length = length;
0704         resp->sge[0].laddr = laddr;
0705         resp->sge[0].lkey = lkey;
0706 
0707         /* Keep aside message sequence number for potential
0708          * error reporting during Read Response generation.
0709          */
0710         resp->sge[1].length = msn;
0711 
0712         resp->raddr = raddr;
0713         resp->rkey = rkey;
0714         resp->num_sge = length ? 1 : 0;
0715 
0716         /* RRESP now valid as current TX wqe or placed into IRQ */
0717         smp_store_mb(resp->flags, SIW_WQE_VALID);
0718     } else {
0719 error_irq:
0720         pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
0721             qp_id(qp), qp->attrs.irq_size);
0722 
0723         siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
0724                    RDMAP_ETYPE_REMOTE_OPERATION,
0725                    RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
0726         rv = -EPROTO;
0727     }
0728 
0729     spin_unlock_irqrestore(&qp->sq_lock, flags);
0730 
0731     if (run_sq)
0732         rv = siw_sq_start(qp);
0733 
0734     return rv;
0735 }
0736 
0737 /*
0738  * Only called at start of Read.Resonse processing.
0739  * Transfer pending Read from tip of ORQ into currrent rx wqe,
0740  * but keep ORQ entry valid until Read.Response processing done.
0741  * No Queue locking needed.
0742  */
0743 static int siw_orqe_start_rx(struct siw_qp *qp)
0744 {
0745     struct siw_sqe *orqe;
0746     struct siw_wqe *wqe = NULL;
0747 
0748     if (unlikely(!qp->attrs.orq_size))
0749         return -EPROTO;
0750 
0751     /* make sure ORQ indices are current */
0752     smp_mb();
0753 
0754     orqe = orq_get_current(qp);
0755     if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
0756         /* RRESP is a TAGGED RDMAP operation */
0757         wqe = rx_wqe(&qp->rx_tagged);
0758         wqe->sqe.id = orqe->id;
0759         wqe->sqe.opcode = orqe->opcode;
0760         wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
0761         wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
0762         wqe->sqe.sge[0].length = orqe->sge[0].length;
0763         wqe->sqe.flags = orqe->flags;
0764         wqe->sqe.num_sge = 1;
0765         wqe->bytes = orqe->sge[0].length;
0766         wqe->processed = 0;
0767         wqe->mem[0] = NULL;
0768         /* make sure WQE is completely written before valid */
0769         smp_wmb();
0770         wqe->wr_status = SIW_WR_INPROGRESS;
0771 
0772         return 0;
0773     }
0774     return -EPROTO;
0775 }
0776 
0777 /*
0778  * siw_proc_rresp:
0779  *
0780  * Place incoming RRESP data into memory referenced by RREQ WQE
0781  * which is at the tip of the ORQ
0782  *
0783  * Function supports partially received RRESP's (suspending/resuming
0784  * current receive processing)
0785  */
0786 int siw_proc_rresp(struct siw_qp *qp)
0787 {
0788     struct siw_rx_stream *srx = &qp->rx_stream;
0789     struct siw_rx_fpdu *frx = &qp->rx_tagged;
0790     struct siw_wqe *wqe = rx_wqe(frx);
0791     struct siw_mem **mem, *mem_p;
0792     struct siw_sge *sge;
0793     int bytes, rv;
0794 
0795     if (frx->first_ddp_seg) {
0796         if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
0797             pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
0798                 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
0799             rv = -EPROTO;
0800             goto error_term;
0801         }
0802         /*
0803          * fetch pending RREQ from orq
0804          */
0805         rv = siw_orqe_start_rx(qp);
0806         if (rv) {
0807             pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
0808                 qp_id(qp), qp->attrs.orq_size);
0809             goto error_term;
0810         }
0811         rv = siw_rresp_check_ntoh(srx, frx);
0812         if (unlikely(rv)) {
0813             siw_qp_event(qp, IB_EVENT_QP_FATAL);
0814             return rv;
0815         }
0816     } else {
0817         if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
0818             pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
0819                 qp_id(qp), wqe->wr_status);
0820             rv = -EPROTO;
0821             goto error_term;
0822         }
0823     }
0824     if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
0825         return 0;
0826 
0827     sge = wqe->sqe.sge; /* there is only one */
0828     mem = &wqe->mem[0];
0829 
0830     if (!(*mem)) {
0831         /*
0832          * check target memory which resolves memory on first fragment
0833          */
0834         rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
0835                    wqe->bytes);
0836         if (unlikely(rv)) {
0837             siw_dbg_qp(qp, "target mem check: %d\n", rv);
0838             wqe->wc_status = SIW_WC_LOC_PROT_ERR;
0839 
0840             siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
0841                        DDP_ETYPE_TAGGED_BUF,
0842                        siw_tagged_error(-rv), 0);
0843 
0844             siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
0845 
0846             return -EINVAL;
0847         }
0848     }
0849     mem_p = *mem;
0850 
0851     bytes = min(srx->fpdu_part_rem, srx->skb_new);
0852 
0853     if (mem_p->mem_obj == NULL)
0854         rv = siw_rx_kva(srx,
0855             (void *)(uintptr_t)(sge->laddr + wqe->processed),
0856             bytes);
0857     else if (!mem_p->is_pbl)
0858         rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
0859                  bytes);
0860     else
0861         rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
0862                 sge->laddr + wqe->processed, bytes);
0863     if (rv != bytes) {
0864         wqe->wc_status = SIW_WC_GENERAL_ERR;
0865         rv = -EINVAL;
0866         goto error_term;
0867     }
0868     srx->fpdu_part_rem -= rv;
0869     srx->fpdu_part_rcvd += rv;
0870     wqe->processed += rv;
0871 
0872     if (!srx->fpdu_part_rem) {
0873         srx->ddp_to += srx->fpdu_part_rcvd;
0874         return 0;
0875     }
0876     return -EAGAIN;
0877 
0878 error_term:
0879     siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
0880                DDP_ECODE_CATASTROPHIC, 0);
0881     return rv;
0882 }
0883 
0884 int siw_proc_terminate(struct siw_qp *qp)
0885 {
0886     struct siw_rx_stream *srx = &qp->rx_stream;
0887     struct sk_buff *skb = srx->skb;
0888     struct iwarp_terminate *term = &srx->hdr.terminate;
0889     union iwarp_hdr term_info;
0890     u8 *infop = (u8 *)&term_info;
0891     enum rdma_opcode op;
0892     u16 to_copy = sizeof(struct iwarp_ctrl);
0893 
0894     pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
0895         __rdmap_term_layer(term), __rdmap_term_etype(term),
0896         __rdmap_term_ecode(term));
0897 
0898     if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
0899         be32_to_cpu(term->ddp_msn) !=
0900             qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
0901         be32_to_cpu(term->ddp_mo) != 0) {
0902         pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
0903             be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
0904             be32_to_cpu(term->ddp_mo));
0905         return -ECONNRESET;
0906     }
0907     /*
0908      * Receive remaining pieces of TERM if indicated
0909      */
0910     if (!term->flag_m)
0911         return -ECONNRESET;
0912 
0913     /* Do not take the effort to reassemble a network fragmented
0914      * TERM message
0915      */
0916     if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
0917         return -ECONNRESET;
0918 
0919     memset(infop, 0, sizeof(term_info));
0920 
0921     skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
0922 
0923     op = __rdmap_get_opcode(&term_info.ctrl);
0924     if (op >= RDMAP_TERMINATE)
0925         goto out;
0926 
0927     infop += to_copy;
0928     srx->skb_offset += to_copy;
0929     srx->skb_new -= to_copy;
0930     srx->skb_copied += to_copy;
0931     srx->fpdu_part_rcvd += to_copy;
0932     srx->fpdu_part_rem -= to_copy;
0933 
0934     to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
0935 
0936     /* Again, no network fragmented TERM's */
0937     if (to_copy + MPA_CRC_SIZE > srx->skb_new)
0938         return -ECONNRESET;
0939 
0940     skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
0941 
0942     if (term->flag_r) {
0943         siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
0944                op, be16_to_cpu(term_info.ctrl.mpa_len),
0945                term->flag_m ? "valid" : "invalid");
0946     } else if (term->flag_d) {
0947         siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
0948                op, be16_to_cpu(term_info.ctrl.mpa_len),
0949                term->flag_m ? "valid" : "invalid");
0950     }
0951 out:
0952     srx->skb_new -= to_copy;
0953     srx->skb_offset += to_copy;
0954     srx->skb_copied += to_copy;
0955     srx->fpdu_part_rcvd += to_copy;
0956     srx->fpdu_part_rem -= to_copy;
0957 
0958     return -ECONNRESET;
0959 }
0960 
0961 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
0962 {
0963     struct sk_buff *skb = srx->skb;
0964     u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
0965     __wsum crc_in, crc_own = 0;
0966 
0967     siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
0968            srx->fpdu_part_rem, srx->skb_new, srx->pad);
0969 
0970     if (srx->skb_new < srx->fpdu_part_rem)
0971         return -EAGAIN;
0972 
0973     skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
0974 
0975     if (srx->mpa_crc_hd && srx->pad)
0976         crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
0977 
0978     srx->skb_new -= srx->fpdu_part_rem;
0979     srx->skb_offset += srx->fpdu_part_rem;
0980     srx->skb_copied += srx->fpdu_part_rem;
0981 
0982     if (!srx->mpa_crc_hd)
0983         return 0;
0984 
0985     /*
0986      * CRC32 is computed, transmitted and received directly in NBO,
0987      * so there's never a reason to convert byte order.
0988      */
0989     crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
0990     crc_in = (__force __wsum)srx->trailer.crc;
0991 
0992     if (unlikely(crc_in != crc_own)) {
0993         pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
0994             crc_in, crc_own, qp->rx_stream.rdmap_op);
0995 
0996         siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
0997                    LLP_ETYPE_MPA,
0998                    LLP_ECODE_RECEIVED_CRC, 0);
0999         return -EINVAL;
1000     }
1001     return 0;
1002 }
1003 
1004 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
1005 
1006 static int siw_get_hdr(struct siw_rx_stream *srx)
1007 {
1008     struct sk_buff *skb = srx->skb;
1009     struct siw_qp *qp = rx_qp(srx);
1010     struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1011     struct siw_rx_fpdu *frx;
1012     u8 opcode;
1013     int bytes;
1014 
1015     if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1016         /*
1017          * copy a mimimum sized (tagged) DDP frame control part
1018          */
1019         bytes = min_t(int, srx->skb_new,
1020                   MIN_DDP_HDR - srx->fpdu_part_rcvd);
1021 
1022         skb_copy_bits(skb, srx->skb_offset,
1023                   (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1024 
1025         srx->fpdu_part_rcvd += bytes;
1026 
1027         srx->skb_new -= bytes;
1028         srx->skb_offset += bytes;
1029         srx->skb_copied += bytes;
1030 
1031         if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1032             return -EAGAIN;
1033 
1034         if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1035             enum ddp_etype etype;
1036             enum ddp_ecode ecode;
1037 
1038             pr_warn("siw: received ddp version unsupported %d\n",
1039                 __ddp_get_version(c_hdr));
1040 
1041             if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1042                 etype = DDP_ETYPE_TAGGED_BUF;
1043                 ecode = DDP_ECODE_T_VERSION;
1044             } else {
1045                 etype = DDP_ETYPE_UNTAGGED_BUF;
1046                 ecode = DDP_ECODE_UT_VERSION;
1047             }
1048             siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1049                        etype, ecode, 0);
1050             return -EINVAL;
1051         }
1052         if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1053             pr_warn("siw: received rdmap version unsupported %d\n",
1054                 __rdmap_get_version(c_hdr));
1055 
1056             siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1057                        RDMAP_ETYPE_REMOTE_OPERATION,
1058                        RDMAP_ECODE_VERSION, 0);
1059             return -EINVAL;
1060         }
1061         opcode = __rdmap_get_opcode(c_hdr);
1062 
1063         if (opcode > RDMAP_TERMINATE) {
1064             pr_warn("siw: received unknown packet type %u\n",
1065                 opcode);
1066 
1067             siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1068                        RDMAP_ETYPE_REMOTE_OPERATION,
1069                        RDMAP_ECODE_OPCODE, 0);
1070             return -EINVAL;
1071         }
1072         siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1073     } else {
1074         opcode = __rdmap_get_opcode(c_hdr);
1075     }
1076     set_rx_fpdu_context(qp, opcode);
1077     frx = qp->rx_fpdu;
1078 
1079     /*
1080      * Figure out len of current hdr: variable length of
1081      * iwarp hdr may force us to copy hdr information in
1082      * two steps. Only tagged DDP messages are already
1083      * completely received.
1084      */
1085     if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1086         bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
1087 
1088         if (srx->skb_new < bytes)
1089             return -EAGAIN;
1090 
1091         skb_copy_bits(skb, srx->skb_offset,
1092                   (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1093 
1094         srx->fpdu_part_rcvd += bytes;
1095 
1096         srx->skb_new -= bytes;
1097         srx->skb_offset += bytes;
1098         srx->skb_copied += bytes;
1099     }
1100 
1101     /*
1102      * DDP/RDMAP header receive completed. Check if the current
1103      * DDP segment starts a new RDMAP message or continues a previously
1104      * started RDMAP message.
1105      *
1106      * Alternating reception of DDP segments (or FPDUs) from incomplete
1107      * tagged and untagged RDMAP messages is supported, as long as
1108      * the current tagged or untagged message gets eventually completed
1109      * w/o intersection from another message of the same type
1110      * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1111      * but not by a READ RESPONSE etc.
1112      */
1113     if (srx->mpa_crc_hd) {
1114         /*
1115          * Restart CRC computation
1116          */
1117         crypto_shash_init(srx->mpa_crc_hd);
1118         crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1119                     srx->fpdu_part_rcvd);
1120     }
1121     if (frx->more_ddp_segs) {
1122         frx->first_ddp_seg = 0;
1123         if (frx->prev_rdmap_op != opcode) {
1124             pr_warn("siw: packet intersection: %u : %u\n",
1125                 frx->prev_rdmap_op, opcode);
1126             /*
1127              * The last inbound RDMA operation of same type
1128              * (tagged or untagged) is left unfinished.
1129              * To complete it in error, make it the current
1130              * operation again, even with the header already
1131              * overwritten. For error handling, only the opcode
1132              * and current rx context are relevant.
1133              */
1134             set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1135             __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1136             return -EPROTO;
1137         }
1138     } else {
1139         frx->prev_rdmap_op = opcode;
1140         frx->first_ddp_seg = 1;
1141     }
1142     frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1143 
1144     return 0;
1145 }
1146 
1147 static int siw_check_tx_fence(struct siw_qp *qp)
1148 {
1149     struct siw_wqe *tx_waiting = tx_wqe(qp);
1150     struct siw_sqe *rreq;
1151     int resume_tx = 0, rv = 0;
1152     unsigned long flags;
1153 
1154     spin_lock_irqsave(&qp->orq_lock, flags);
1155 
1156     /* free current orq entry */
1157     rreq = orq_get_current(qp);
1158     WRITE_ONCE(rreq->flags, 0);
1159 
1160     qp->orq_get++;
1161 
1162     if (qp->tx_ctx.orq_fence) {
1163         if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1164             pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1165                 qp_id(qp), tx_waiting->wr_status);
1166             rv = -EPROTO;
1167             goto out;
1168         }
1169         /* resume SQ processing, if possible */
1170         if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1171             tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1172 
1173             /* SQ processing was stopped because of a full ORQ */
1174             rreq = orq_get_free(qp);
1175             if (unlikely(!rreq)) {
1176                 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1177                 rv = -EPROTO;
1178                 goto out;
1179             }
1180             siw_read_to_orq(rreq, &tx_waiting->sqe);
1181 
1182             qp->orq_put++;
1183             qp->tx_ctx.orq_fence = 0;
1184             resume_tx = 1;
1185 
1186         } else if (siw_orq_empty(qp)) {
1187             /*
1188              * SQ processing was stopped by fenced work request.
1189              * Resume since all previous Read's are now completed.
1190              */
1191             qp->tx_ctx.orq_fence = 0;
1192             resume_tx = 1;
1193         }
1194     }
1195 out:
1196     spin_unlock_irqrestore(&qp->orq_lock, flags);
1197 
1198     if (resume_tx)
1199         rv = siw_sq_start(qp);
1200 
1201     return rv;
1202 }
1203 
1204 /*
1205  * siw_rdmap_complete()
1206  *
1207  * Complete processing of an RDMA message after receiving all
1208  * DDP segmens or ABort processing after encountering error case.
1209  *
1210  *   o SENDs + RRESPs will need for completion,
1211  *   o RREQs need for  READ RESPONSE initialization
1212  *   o WRITEs need memory dereferencing
1213  *
1214  * TODO: Failed WRITEs need local error to be surfaced.
1215  */
1216 static int siw_rdmap_complete(struct siw_qp *qp, int error)
1217 {
1218     struct siw_rx_stream *srx = &qp->rx_stream;
1219     struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1220     enum siw_wc_status wc_status = wqe->wc_status;
1221     u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1222     int rv = 0;
1223 
1224     switch (opcode) {
1225     case RDMAP_SEND_SE:
1226     case RDMAP_SEND_SE_INVAL:
1227         wqe->rqe.flags |= SIW_WQE_SOLICITED;
1228         fallthrough;
1229 
1230     case RDMAP_SEND:
1231     case RDMAP_SEND_INVAL:
1232         if (wqe->wr_status == SIW_WR_IDLE)
1233             break;
1234 
1235         srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1236 
1237         if (error != 0 && wc_status == SIW_WC_SUCCESS)
1238             wc_status = SIW_WC_GENERAL_ERR;
1239         /*
1240          * Handle STag invalidation request
1241          */
1242         if (wc_status == SIW_WC_SUCCESS &&
1243             (opcode == RDMAP_SEND_INVAL ||
1244              opcode == RDMAP_SEND_SE_INVAL)) {
1245             rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1246             if (rv) {
1247                 siw_init_terminate(
1248                     qp, TERM_ERROR_LAYER_RDMAP,
1249                     rv == -EACCES ?
1250                         RDMAP_ETYPE_REMOTE_PROTECTION :
1251                         RDMAP_ETYPE_REMOTE_OPERATION,
1252                     RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1253 
1254                 wc_status = SIW_WC_REM_INV_REQ_ERR;
1255             }
1256             rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1257                           rv ? 0 : srx->inval_stag,
1258                           wc_status);
1259         } else {
1260             rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1261                           0, wc_status);
1262         }
1263         siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1264         break;
1265 
1266     case RDMAP_RDMA_READ_RESP:
1267         if (wqe->wr_status == SIW_WR_IDLE)
1268             break;
1269 
1270         if (error != 0) {
1271             if ((srx->state == SIW_GET_HDR &&
1272                  qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1273                 /* possible RREQ in ORQ left untouched */
1274                 break;
1275 
1276             if (wc_status == SIW_WC_SUCCESS)
1277                 wc_status = SIW_WC_GENERAL_ERR;
1278         } else if (rdma_is_kernel_res(&qp->base_qp.res) &&
1279                rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1280             /*
1281              * Handle any STag invalidation request
1282              */
1283             rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1284             if (rv) {
1285                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1286                            RDMAP_ETYPE_CATASTROPHIC,
1287                            RDMAP_ECODE_UNSPECIFIED, 0);
1288 
1289                 if (wc_status == SIW_WC_SUCCESS) {
1290                     wc_status = SIW_WC_GENERAL_ERR;
1291                     error = rv;
1292                 }
1293             }
1294         }
1295         /*
1296          * All errors turn the wqe into signalled.
1297          */
1298         if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1299             rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1300                           wc_status);
1301         siw_wqe_put_mem(wqe, SIW_OP_READ);
1302 
1303         if (!error) {
1304             rv = siw_check_tx_fence(qp);
1305         } else {
1306             /* Disable current ORQ element */
1307             if (qp->attrs.orq_size)
1308                 WRITE_ONCE(orq_get_current(qp)->flags, 0);
1309         }
1310         break;
1311 
1312     case RDMAP_RDMA_READ_REQ:
1313         if (!error) {
1314             rv = siw_init_rresp(qp, srx);
1315             srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1316         }
1317         break;
1318 
1319     case RDMAP_RDMA_WRITE:
1320         if (wqe->wr_status == SIW_WR_IDLE)
1321             break;
1322 
1323         /*
1324          * Free References from memory object if
1325          * attached to receive context (inbound WRITE).
1326          * While a zero-length WRITE is allowed,
1327          * no memory reference got created.
1328          */
1329         if (rx_mem(&qp->rx_tagged)) {
1330             siw_mem_put(rx_mem(&qp->rx_tagged));
1331             rx_mem(&qp->rx_tagged) = NULL;
1332         }
1333         break;
1334 
1335     default:
1336         break;
1337     }
1338     wqe->wr_status = SIW_WR_IDLE;
1339 
1340     return rv;
1341 }
1342 
1343 /*
1344  * siw_tcp_rx_data()
1345  *
1346  * Main routine to consume inbound TCP payload
1347  *
1348  * @rd_desc:    read descriptor
1349  * @skb:    socket buffer
1350  * @off:    offset in skb
1351  * @len:    skb->len - offset : payload in skb
1352  */
1353 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1354             unsigned int off, size_t len)
1355 {
1356     struct siw_qp *qp = rd_desc->arg.data;
1357     struct siw_rx_stream *srx = &qp->rx_stream;
1358     int rv;
1359 
1360     srx->skb = skb;
1361     srx->skb_new = skb->len - off;
1362     srx->skb_offset = off;
1363     srx->skb_copied = 0;
1364 
1365     siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1366 
1367     while (srx->skb_new) {
1368         int run_completion = 1;
1369 
1370         if (unlikely(srx->rx_suspend)) {
1371             /* Do not process any more data */
1372             srx->skb_copied += srx->skb_new;
1373             break;
1374         }
1375         switch (srx->state) {
1376         case SIW_GET_HDR:
1377             rv = siw_get_hdr(srx);
1378             if (!rv) {
1379                 srx->fpdu_part_rem =
1380                     be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1381                     srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1382 
1383                 if (srx->fpdu_part_rem)
1384                     srx->pad = -srx->fpdu_part_rem & 0x3;
1385                 else
1386                     srx->pad = 0;
1387 
1388                 srx->state = SIW_GET_DATA_START;
1389                 srx->fpdu_part_rcvd = 0;
1390             }
1391             break;
1392 
1393         case SIW_GET_DATA_MORE:
1394             /*
1395              * Another data fragment of the same DDP segment.
1396              * Setting first_ddp_seg = 0 avoids repeating
1397              * initializations that shall occur only once per
1398              * DDP segment.
1399              */
1400             qp->rx_fpdu->first_ddp_seg = 0;
1401             fallthrough;
1402 
1403         case SIW_GET_DATA_START:
1404             /*
1405              * Headers will be checked by the opcode-specific
1406              * data receive function below.
1407              */
1408             rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1409             if (!rv) {
1410                 int mpa_len =
1411                     be16_to_cpu(srx->hdr.ctrl.mpa_len)
1412                     + MPA_HDR_SIZE;
1413 
1414                 srx->fpdu_part_rem = (-mpa_len & 0x3)
1415                               + MPA_CRC_SIZE;
1416                 srx->fpdu_part_rcvd = 0;
1417                 srx->state = SIW_GET_TRAILER;
1418             } else {
1419                 if (unlikely(rv == -ECONNRESET))
1420                     run_completion = 0;
1421                 else
1422                     srx->state = SIW_GET_DATA_MORE;
1423             }
1424             break;
1425 
1426         case SIW_GET_TRAILER:
1427             /*
1428              * read CRC + any padding
1429              */
1430             rv = siw_get_trailer(qp, srx);
1431             if (likely(!rv)) {
1432                 /*
1433                  * FPDU completed.
1434                  * complete RDMAP message if last fragment
1435                  */
1436                 srx->state = SIW_GET_HDR;
1437                 srx->fpdu_part_rcvd = 0;
1438 
1439                 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1440                       DDP_FLAG_LAST))
1441                     /* more frags */
1442                     break;
1443 
1444                 rv = siw_rdmap_complete(qp, 0);
1445                 run_completion = 0;
1446             }
1447             break;
1448 
1449         default:
1450             pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1451             rv = -EPROTO;
1452             run_completion = 0;
1453         }
1454         if (unlikely(rv != 0 && rv != -EAGAIN)) {
1455             if ((srx->state > SIW_GET_HDR ||
1456                  qp->rx_fpdu->more_ddp_segs) && run_completion)
1457                 siw_rdmap_complete(qp, rv);
1458 
1459             siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1460                    srx->state);
1461 
1462             siw_qp_cm_drop(qp, 1);
1463 
1464             break;
1465         }
1466         if (rv) {
1467             siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1468                    srx->state, srx->fpdu_part_rem);
1469             break;
1470         }
1471     }
1472     return srx->skb_copied;
1473 }