Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
0002 /*
0003  * Copyright(c) 2015 - 2018 Intel Corporation.
0004  */
0005 
0006 #include "hfi.h"
0007 #include "verbs_txreq.h"
0008 #include "qp.h"
0009 
0010 /* cut down ridiculously long IB macro names */
0011 #define OP(x) UC_OP(x)
0012 
0013 /**
0014  * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
0015  * @qp: a pointer to the QP
0016  * @ps: the current packet state
0017  *
0018  * Assume s_lock is held.
0019  *
0020  * Return 1 if constructed; otherwise, return 0.
0021  */
0022 int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
0023 {
0024     struct hfi1_qp_priv *priv = qp->priv;
0025     struct ib_other_headers *ohdr;
0026     struct rvt_swqe *wqe;
0027     u32 hwords;
0028     u32 bth0 = 0;
0029     u32 len;
0030     u32 pmtu = qp->pmtu;
0031     int middle = 0;
0032 
0033     ps->s_txreq = get_txreq(ps->dev, qp);
0034     if (!ps->s_txreq)
0035         goto bail_no_tx;
0036 
0037     if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
0038         if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
0039             goto bail;
0040         /* We are in the error state, flush the work request. */
0041         if (qp->s_last == READ_ONCE(qp->s_head))
0042             goto bail;
0043         /* If DMAs are in progress, we can't flush immediately. */
0044         if (iowait_sdma_pending(&priv->s_iowait)) {
0045             qp->s_flags |= RVT_S_WAIT_DMA;
0046             goto bail;
0047         }
0048         clear_ahg(qp);
0049         wqe = rvt_get_swqe_ptr(qp, qp->s_last);
0050         rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
0051         goto done_free_tx;
0052     }
0053 
0054     if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
0055         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
0056         hwords = 5;
0057         if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
0058             ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
0059         else
0060             ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
0061     } else {
0062         /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
0063         hwords = 7;
0064         if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
0065             (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
0066             ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
0067         else
0068             ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
0069     }
0070 
0071     /* Get the next send request. */
0072     wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
0073     qp->s_wqe = NULL;
0074     switch (qp->s_state) {
0075     default:
0076         if (!(ib_rvt_state_ops[qp->state] &
0077             RVT_PROCESS_NEXT_SEND_OK))
0078             goto bail;
0079         /* Check if send work queue is empty. */
0080         if (qp->s_cur == READ_ONCE(qp->s_head)) {
0081             clear_ahg(qp);
0082             goto bail;
0083         }
0084         /*
0085          * Local operations are processed immediately
0086          * after all prior requests have completed.
0087          */
0088         if (wqe->wr.opcode == IB_WR_REG_MR ||
0089             wqe->wr.opcode == IB_WR_LOCAL_INV) {
0090             int local_ops = 0;
0091             int err = 0;
0092 
0093             if (qp->s_last != qp->s_cur)
0094                 goto bail;
0095             if (++qp->s_cur == qp->s_size)
0096                 qp->s_cur = 0;
0097             if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
0098                 err = rvt_invalidate_rkey(
0099                     qp, wqe->wr.ex.invalidate_rkey);
0100                 local_ops = 1;
0101             }
0102             rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
0103                             : IB_WC_SUCCESS);
0104             if (local_ops)
0105                 atomic_dec(&qp->local_ops_pending);
0106             goto done_free_tx;
0107         }
0108         /*
0109          * Start a new request.
0110          */
0111         qp->s_psn = wqe->psn;
0112         qp->s_sge.sge = wqe->sg_list[0];
0113         qp->s_sge.sg_list = wqe->sg_list + 1;
0114         qp->s_sge.num_sge = wqe->wr.num_sge;
0115         qp->s_sge.total_len = wqe->length;
0116         len = wqe->length;
0117         qp->s_len = len;
0118         switch (wqe->wr.opcode) {
0119         case IB_WR_SEND:
0120         case IB_WR_SEND_WITH_IMM:
0121             if (len > pmtu) {
0122                 qp->s_state = OP(SEND_FIRST);
0123                 len = pmtu;
0124                 break;
0125             }
0126             if (wqe->wr.opcode == IB_WR_SEND) {
0127                 qp->s_state = OP(SEND_ONLY);
0128             } else {
0129                 qp->s_state =
0130                     OP(SEND_ONLY_WITH_IMMEDIATE);
0131                 /* Immediate data comes after the BTH */
0132                 ohdr->u.imm_data = wqe->wr.ex.imm_data;
0133                 hwords += 1;
0134             }
0135             if (wqe->wr.send_flags & IB_SEND_SOLICITED)
0136                 bth0 |= IB_BTH_SOLICITED;
0137             qp->s_wqe = wqe;
0138             if (++qp->s_cur >= qp->s_size)
0139                 qp->s_cur = 0;
0140             break;
0141 
0142         case IB_WR_RDMA_WRITE:
0143         case IB_WR_RDMA_WRITE_WITH_IMM:
0144             ohdr->u.rc.reth.vaddr =
0145                 cpu_to_be64(wqe->rdma_wr.remote_addr);
0146             ohdr->u.rc.reth.rkey =
0147                 cpu_to_be32(wqe->rdma_wr.rkey);
0148             ohdr->u.rc.reth.length = cpu_to_be32(len);
0149             hwords += sizeof(struct ib_reth) / 4;
0150             if (len > pmtu) {
0151                 qp->s_state = OP(RDMA_WRITE_FIRST);
0152                 len = pmtu;
0153                 break;
0154             }
0155             if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
0156                 qp->s_state = OP(RDMA_WRITE_ONLY);
0157             } else {
0158                 qp->s_state =
0159                     OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
0160                 /* Immediate data comes after the RETH */
0161                 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
0162                 hwords += 1;
0163                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
0164                     bth0 |= IB_BTH_SOLICITED;
0165             }
0166             qp->s_wqe = wqe;
0167             if (++qp->s_cur >= qp->s_size)
0168                 qp->s_cur = 0;
0169             break;
0170 
0171         default:
0172             goto bail;
0173         }
0174         break;
0175 
0176     case OP(SEND_FIRST):
0177         qp->s_state = OP(SEND_MIDDLE);
0178         fallthrough;
0179     case OP(SEND_MIDDLE):
0180         len = qp->s_len;
0181         if (len > pmtu) {
0182             len = pmtu;
0183             middle = HFI1_CAP_IS_KSET(SDMA_AHG);
0184             break;
0185         }
0186         if (wqe->wr.opcode == IB_WR_SEND) {
0187             qp->s_state = OP(SEND_LAST);
0188         } else {
0189             qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
0190             /* Immediate data comes after the BTH */
0191             ohdr->u.imm_data = wqe->wr.ex.imm_data;
0192             hwords += 1;
0193         }
0194         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
0195             bth0 |= IB_BTH_SOLICITED;
0196         qp->s_wqe = wqe;
0197         if (++qp->s_cur >= qp->s_size)
0198             qp->s_cur = 0;
0199         break;
0200 
0201     case OP(RDMA_WRITE_FIRST):
0202         qp->s_state = OP(RDMA_WRITE_MIDDLE);
0203         fallthrough;
0204     case OP(RDMA_WRITE_MIDDLE):
0205         len = qp->s_len;
0206         if (len > pmtu) {
0207             len = pmtu;
0208             middle = HFI1_CAP_IS_KSET(SDMA_AHG);
0209             break;
0210         }
0211         if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
0212             qp->s_state = OP(RDMA_WRITE_LAST);
0213         } else {
0214             qp->s_state =
0215                 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
0216             /* Immediate data comes after the BTH */
0217             ohdr->u.imm_data = wqe->wr.ex.imm_data;
0218             hwords += 1;
0219             if (wqe->wr.send_flags & IB_SEND_SOLICITED)
0220                 bth0 |= IB_BTH_SOLICITED;
0221         }
0222         qp->s_wqe = wqe;
0223         if (++qp->s_cur >= qp->s_size)
0224             qp->s_cur = 0;
0225         break;
0226     }
0227     qp->s_len -= len;
0228     ps->s_txreq->hdr_dwords = hwords;
0229     ps->s_txreq->sde = priv->s_sde;
0230     ps->s_txreq->ss = &qp->s_sge;
0231     ps->s_txreq->s_cur_size = len;
0232     hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
0233                  qp->remote_qpn, mask_psn(qp->s_psn++),
0234                  middle, ps);
0235     return 1;
0236 
0237 done_free_tx:
0238     hfi1_put_txreq(ps->s_txreq);
0239     ps->s_txreq = NULL;
0240     return 1;
0241 
0242 bail:
0243     hfi1_put_txreq(ps->s_txreq);
0244 
0245 bail_no_tx:
0246     ps->s_txreq = NULL;
0247     qp->s_flags &= ~RVT_S_BUSY;
0248     return 0;
0249 }
0250 
0251 /**
0252  * hfi1_uc_rcv - handle an incoming UC packet
0253  * @packet: the packet structure
0254  *
0255  * This is called from qp_rcv() to process an incoming UC packet
0256  * for the given QP.
0257  * Called at interrupt level.
0258  */
0259 void hfi1_uc_rcv(struct hfi1_packet *packet)
0260 {
0261     struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
0262     void *data = packet->payload;
0263     u32 tlen = packet->tlen;
0264     struct rvt_qp *qp = packet->qp;
0265     struct ib_other_headers *ohdr = packet->ohdr;
0266     u32 opcode = packet->opcode;
0267     u32 hdrsize = packet->hlen;
0268     u32 psn;
0269     u32 pad = packet->pad;
0270     struct ib_wc wc;
0271     u32 pmtu = qp->pmtu;
0272     struct ib_reth *reth;
0273     int ret;
0274     u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
0275 
0276     if (hfi1_ruc_check_hdr(ibp, packet))
0277         return;
0278 
0279     process_ecn(qp, packet);
0280 
0281     psn = ib_bth_get_psn(ohdr);
0282     /* Compare the PSN verses the expected PSN. */
0283     if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
0284         /*
0285          * Handle a sequence error.
0286          * Silently drop any current message.
0287          */
0288         qp->r_psn = psn;
0289 inv:
0290         if (qp->r_state == OP(SEND_FIRST) ||
0291             qp->r_state == OP(SEND_MIDDLE)) {
0292             set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
0293             qp->r_sge.num_sge = 0;
0294         } else {
0295             rvt_put_ss(&qp->r_sge);
0296         }
0297         qp->r_state = OP(SEND_LAST);
0298         switch (opcode) {
0299         case OP(SEND_FIRST):
0300         case OP(SEND_ONLY):
0301         case OP(SEND_ONLY_WITH_IMMEDIATE):
0302             goto send_first;
0303 
0304         case OP(RDMA_WRITE_FIRST):
0305         case OP(RDMA_WRITE_ONLY):
0306         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
0307             goto rdma_first;
0308 
0309         default:
0310             goto drop;
0311         }
0312     }
0313 
0314     /* Check for opcode sequence errors. */
0315     switch (qp->r_state) {
0316     case OP(SEND_FIRST):
0317     case OP(SEND_MIDDLE):
0318         if (opcode == OP(SEND_MIDDLE) ||
0319             opcode == OP(SEND_LAST) ||
0320             opcode == OP(SEND_LAST_WITH_IMMEDIATE))
0321             break;
0322         goto inv;
0323 
0324     case OP(RDMA_WRITE_FIRST):
0325     case OP(RDMA_WRITE_MIDDLE):
0326         if (opcode == OP(RDMA_WRITE_MIDDLE) ||
0327             opcode == OP(RDMA_WRITE_LAST) ||
0328             opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
0329             break;
0330         goto inv;
0331 
0332     default:
0333         if (opcode == OP(SEND_FIRST) ||
0334             opcode == OP(SEND_ONLY) ||
0335             opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
0336             opcode == OP(RDMA_WRITE_FIRST) ||
0337             opcode == OP(RDMA_WRITE_ONLY) ||
0338             opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
0339             break;
0340         goto inv;
0341     }
0342 
0343     if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
0344         rvt_comm_est(qp);
0345 
0346     /* OK, process the packet. */
0347     switch (opcode) {
0348     case OP(SEND_FIRST):
0349     case OP(SEND_ONLY):
0350     case OP(SEND_ONLY_WITH_IMMEDIATE):
0351 send_first:
0352         if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
0353             qp->r_sge = qp->s_rdma_read_sge;
0354         } else {
0355             ret = rvt_get_rwqe(qp, false);
0356             if (ret < 0)
0357                 goto op_err;
0358             if (!ret)
0359                 goto drop;
0360             /*
0361              * qp->s_rdma_read_sge will be the owner
0362              * of the mr references.
0363              */
0364             qp->s_rdma_read_sge = qp->r_sge;
0365         }
0366         qp->r_rcv_len = 0;
0367         if (opcode == OP(SEND_ONLY))
0368             goto no_immediate_data;
0369         else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
0370             goto send_last_imm;
0371         fallthrough;
0372     case OP(SEND_MIDDLE):
0373         /* Check for invalid length PMTU or posted rwqe len. */
0374         /*
0375          * There will be no padding for 9B packet but 16B packets
0376          * will come in with some padding since we always add
0377          * CRC and LT bytes which will need to be flit aligned
0378          */
0379         if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
0380             goto rewind;
0381         qp->r_rcv_len += pmtu;
0382         if (unlikely(qp->r_rcv_len > qp->r_len))
0383             goto rewind;
0384         rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
0385         break;
0386 
0387     case OP(SEND_LAST_WITH_IMMEDIATE):
0388 send_last_imm:
0389         wc.ex.imm_data = ohdr->u.imm_data;
0390         wc.wc_flags = IB_WC_WITH_IMM;
0391         goto send_last;
0392     case OP(SEND_LAST):
0393 no_immediate_data:
0394         wc.ex.imm_data = 0;
0395         wc.wc_flags = 0;
0396 send_last:
0397         /* Check for invalid length. */
0398         /* LAST len should be >= 1 */
0399         if (unlikely(tlen < (hdrsize + extra_bytes)))
0400             goto rewind;
0401         /* Don't count the CRC. */
0402         tlen -= (hdrsize + extra_bytes);
0403         wc.byte_len = tlen + qp->r_rcv_len;
0404         if (unlikely(wc.byte_len > qp->r_len))
0405             goto rewind;
0406         wc.opcode = IB_WC_RECV;
0407         rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
0408         rvt_put_ss(&qp->s_rdma_read_sge);
0409 last_imm:
0410         wc.wr_id = qp->r_wr_id;
0411         wc.status = IB_WC_SUCCESS;
0412         wc.qp = &qp->ibqp;
0413         wc.src_qp = qp->remote_qpn;
0414         wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
0415         /*
0416          * It seems that IB mandates the presence of an SL in a
0417          * work completion only for the UD transport (see section
0418          * 11.4.2 of IBTA Vol. 1).
0419          *
0420          * However, the way the SL is chosen below is consistent
0421          * with the way that IB/qib works and is trying avoid
0422          * introducing incompatibilities.
0423          *
0424          * See also OPA Vol. 1, section 9.7.6, and table 9-17.
0425          */
0426         wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
0427         /* zero fields that are N/A */
0428         wc.vendor_err = 0;
0429         wc.pkey_index = 0;
0430         wc.dlid_path_bits = 0;
0431         wc.port_num = 0;
0432         /* Signal completion event if the solicited bit is set. */
0433         rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
0434         break;
0435 
0436     case OP(RDMA_WRITE_FIRST):
0437     case OP(RDMA_WRITE_ONLY):
0438     case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
0439 rdma_first:
0440         if (unlikely(!(qp->qp_access_flags &
0441                    IB_ACCESS_REMOTE_WRITE))) {
0442             goto drop;
0443         }
0444         reth = &ohdr->u.rc.reth;
0445         qp->r_len = be32_to_cpu(reth->length);
0446         qp->r_rcv_len = 0;
0447         qp->r_sge.sg_list = NULL;
0448         if (qp->r_len != 0) {
0449             u32 rkey = be32_to_cpu(reth->rkey);
0450             u64 vaddr = be64_to_cpu(reth->vaddr);
0451             int ok;
0452 
0453             /* Check rkey */
0454             ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
0455                      vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
0456             if (unlikely(!ok))
0457                 goto drop;
0458             qp->r_sge.num_sge = 1;
0459         } else {
0460             qp->r_sge.num_sge = 0;
0461             qp->r_sge.sge.mr = NULL;
0462             qp->r_sge.sge.vaddr = NULL;
0463             qp->r_sge.sge.length = 0;
0464             qp->r_sge.sge.sge_length = 0;
0465         }
0466         if (opcode == OP(RDMA_WRITE_ONLY)) {
0467             goto rdma_last;
0468         } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
0469             wc.ex.imm_data = ohdr->u.rc.imm_data;
0470             goto rdma_last_imm;
0471         }
0472         fallthrough;
0473     case OP(RDMA_WRITE_MIDDLE):
0474         /* Check for invalid length PMTU or posted rwqe len. */
0475         if (unlikely(tlen != (hdrsize + pmtu + 4)))
0476             goto drop;
0477         qp->r_rcv_len += pmtu;
0478         if (unlikely(qp->r_rcv_len > qp->r_len))
0479             goto drop;
0480         rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
0481         break;
0482 
0483     case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
0484         wc.ex.imm_data = ohdr->u.imm_data;
0485 rdma_last_imm:
0486         wc.wc_flags = IB_WC_WITH_IMM;
0487 
0488         /* Check for invalid length. */
0489         /* LAST len should be >= 1 */
0490         if (unlikely(tlen < (hdrsize + pad + 4)))
0491             goto drop;
0492         /* Don't count the CRC. */
0493         tlen -= (hdrsize + extra_bytes);
0494         if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
0495             goto drop;
0496         if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
0497             rvt_put_ss(&qp->s_rdma_read_sge);
0498         } else {
0499             ret = rvt_get_rwqe(qp, true);
0500             if (ret < 0)
0501                 goto op_err;
0502             if (!ret)
0503                 goto drop;
0504         }
0505         wc.byte_len = qp->r_len;
0506         wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
0507         rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
0508         rvt_put_ss(&qp->r_sge);
0509         goto last_imm;
0510 
0511     case OP(RDMA_WRITE_LAST):
0512 rdma_last:
0513         /* Check for invalid length. */
0514         /* LAST len should be >= 1 */
0515         if (unlikely(tlen < (hdrsize + pad + 4)))
0516             goto drop;
0517         /* Don't count the CRC. */
0518         tlen -= (hdrsize + extra_bytes);
0519         if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
0520             goto drop;
0521         rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
0522         rvt_put_ss(&qp->r_sge);
0523         break;
0524 
0525     default:
0526         /* Drop packet for unknown opcodes. */
0527         goto drop;
0528     }
0529     qp->r_psn++;
0530     qp->r_state = opcode;
0531     return;
0532 
0533 rewind:
0534     set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
0535     qp->r_sge.num_sge = 0;
0536 drop:
0537     ibp->rvp.n_pkt_drops++;
0538     return;
0539 
0540 op_err:
0541     rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
0542 }